Skip to content

Commit 3187d6d

Browse files
committed
Introduce parse_ident()
SQL-layer function to split qualified identifier into array parts. Author: Pavel Stehule with minor editorization by me and Jim Nasby
1 parent 992b5ba commit 3187d6d

File tree

10 files changed

+375
-2
lines changed

10 files changed

+375
-2
lines changed

doc/src/sgml/func.sgml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1818,6 +1818,32 @@
18181818
<entry><literal>900150983cd24fb0 d6963f7d28e17f72</literal></entry>
18191819
</row>
18201820

1821+
<row>
1822+
<entry>
1823+
<indexterm>
1824+
<primary>parse_ident</primary>
1825+
</indexterm>
1826+
<literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
1827+
[ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
1828+
</entry>
1829+
<entry><type>text[]</type></entry>
1830+
<entry>Split <parameter>qualified identifier</parameter> into array
1831+
<parameter>parts</parameter>. When <parameter>strictmode</parameter> is
1832+
false, extra characters after the identifier are ignored. This is useful
1833+
for parsing identifiers for objects like functions and arrays that may
1834+
have trailing characters. By default, extra characters after the last
1835+
identifier are considered an error, but if second parameter is false,
1836+
then chararacters after last identifier are ignored. Note that this
1837+
function does not truncate quoted identifiers. If you care about that
1838+
you should cast the result of this function to name[]. A non-printable
1839+
chararacters (like 0 to 31) are displayed as hexadecimal codes always,
1840+
what can be different from PostgreSQL internal SQL identifiers
1841+
processing, when the original escaped value is displayed.
1842+
</entry>
1843+
<entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
1844+
<entry><literal>"SomeSchema,sometable"</literal></entry>
1845+
</row>
1846+
18211847
<row>
18221848
<entry>
18231849
<indexterm>

src/backend/catalog/system_views.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -990,3 +990,10 @@ RETURNS jsonb
990990
LANGUAGE INTERNAL
991991
STRICT IMMUTABLE
992992
AS 'jsonb_set';
993+
994+
CREATE OR REPLACE FUNCTION
995+
parse_ident(str text, strict boolean DEFAULT true)
996+
RETURNS text[]
997+
LANGUAGE INTERNAL
998+
STRICT IMMUTABLE
999+
AS 'parse_ident';

src/backend/parser/scansup.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,15 @@ scanstr(const char *s)
129129
*/
130130
char *
131131
downcase_truncate_identifier(const char *ident, int len, bool warn)
132+
{
133+
return downcase_identifier(ident, len, warn, true);
134+
}
135+
136+
/*
137+
* a workhorse for downcase_truncate_identifier
138+
*/
139+
char *
140+
downcase_identifier(const char *ident, int len, bool warn, bool truncate)
132141
{
133142
char *result;
134143
int i;
@@ -158,12 +167,13 @@ downcase_truncate_identifier(const char *ident, int len, bool warn)
158167
}
159168
result[i] = '\0';
160169

161-
if (i >= NAMEDATALEN)
170+
if (i >= NAMEDATALEN && truncate)
162171
truncate_identifier(result, i, warn);
163172

164173
return result;
165174
}
166175

176+
167177
/*
168178
* truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
169179
*

src/backend/utils/adt/misc.c

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "commands/dbcommands.h"
2828
#include "funcapi.h"
2929
#include "miscadmin.h"
30+
#include "parser/scansup.h"
3031
#include "parser/keywords.h"
3132
#include "postmaster/syslogger.h"
3233
#include "rewrite/rewriteHandler.h"
@@ -719,3 +720,226 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
719720

720721
PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
721722
}
723+
724+
725+
/*
726+
* This simple parser utility are compatible with lexer implementation,
727+
* used only in parse_ident function
728+
*/
729+
static bool
730+
is_ident_start(unsigned char c)
731+
{
732+
if (c == '_')
733+
return true;
734+
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
735+
return true;
736+
737+
if (c >= 0200 && c <= 0377)
738+
return true;
739+
740+
return false;
741+
}
742+
743+
static bool
744+
is_ident_cont(unsigned char c)
745+
{
746+
if (c >= '0' && c <= '9')
747+
return true;
748+
749+
return is_ident_start(c);
750+
}
751+
752+
/*
753+
* Sanitize SQL string for using in error message.
754+
*/
755+
static char *
756+
sanitize_text(text *t)
757+
{
758+
int len = VARSIZE_ANY_EXHDR(t);
759+
const char *p = VARDATA_ANY(t);
760+
StringInfo dstr;
761+
762+
dstr = makeStringInfo();
763+
764+
appendStringInfoChar(dstr, '"');
765+
766+
while (len--)
767+
{
768+
switch (*p)
769+
{
770+
case '\b':
771+
appendStringInfoString(dstr, "\\b");
772+
break;
773+
case '\f':
774+
appendStringInfoString(dstr, "\\f");
775+
break;
776+
case '\n':
777+
appendStringInfoString(dstr, "\\n");
778+
break;
779+
case '\r':
780+
appendStringInfoString(dstr, "\\r");
781+
break;
782+
case '\t':
783+
appendStringInfoString(dstr, "\\t");
784+
break;
785+
case '\'':
786+
appendStringInfoString(dstr, "''");
787+
break;
788+
case '\\':
789+
appendStringInfoString(dstr, "\\\\");
790+
break;
791+
default:
792+
if ((unsigned char) *p < ' ')
793+
appendStringInfo(dstr, "\\u%04x", (int) *p);
794+
else
795+
appendStringInfoCharMacro(dstr, *p);
796+
break;
797+
}
798+
p++;
799+
}
800+
801+
appendStringInfoChar(dstr, '"');
802+
803+
return dstr->data;
804+
}
805+
806+
/*
807+
* parse_ident - parse SQL composed identifier to separate identifiers.
808+
* When strict mode is active (second parameter), then any chars after
809+
* last identifiers are disallowed.
810+
*/
811+
Datum
812+
parse_ident(PG_FUNCTION_ARGS)
813+
{
814+
text *qualname;
815+
char *qualname_str;
816+
bool strict;
817+
char *nextp;
818+
bool after_dot = false;
819+
ArrayBuildState *astate = NULL;
820+
821+
qualname = PG_GETARG_TEXT_PP(0);
822+
qualname_str = text_to_cstring(qualname);
823+
strict = PG_GETARG_BOOL(1);
824+
825+
nextp = qualname_str;
826+
827+
/* skip leading whitespace */
828+
while (isspace((unsigned char) *nextp))
829+
nextp++;
830+
831+
for (;;)
832+
{
833+
char *curname;
834+
char *endp;
835+
bool missing_ident;
836+
837+
missing_ident = true;
838+
839+
if (*nextp == '\"')
840+
{
841+
curname = nextp + 1;
842+
for (;;)
843+
{
844+
endp = strchr(nextp + 1, '\"');
845+
if (endp == NULL)
846+
ereport(ERROR,
847+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
848+
errmsg("unclosed double quotes"),
849+
errdetail("string %s is not valid identifier",
850+
sanitize_text(qualname))));
851+
if (endp[1] != '\"')
852+
break;
853+
memmove(endp, endp + 1, strlen(endp));
854+
nextp = endp;
855+
}
856+
nextp = endp + 1;
857+
*endp = '\0';
858+
859+
/* Show complete input string in this case. */
860+
if (endp - curname == 0)
861+
ereport(ERROR,
862+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
863+
errmsg("identifier should not be empty: %s",
864+
sanitize_text(qualname))));
865+
866+
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
867+
false, TEXTOID, CurrentMemoryContext);
868+
missing_ident = false;
869+
}
870+
else
871+
{
872+
if (is_ident_start((unsigned char) *nextp))
873+
{
874+
char *downname;
875+
int len;
876+
text *part;
877+
878+
curname = nextp++;
879+
while (is_ident_cont((unsigned char) *nextp))
880+
nextp++;
881+
882+
len = nextp - curname;
883+
884+
/*
885+
* Unlike name, we don't implicitly truncate identifiers. This
886+
* is useful for allowing the user to check for specific parts
887+
* of the identifier being too long. It's easy enough for the
888+
* user to get the truncated names by casting our output to
889+
* name[].
890+
*/
891+
downname = downcase_identifier(curname, len, false, false);
892+
part = cstring_to_text_with_len(downname, len);
893+
astate = accumArrayResult(astate, PointerGetDatum(part), false,
894+
TEXTOID, CurrentMemoryContext);
895+
missing_ident = false;
896+
}
897+
}
898+
899+
if (missing_ident)
900+
{
901+
/* Different error messages based on where we failed. */
902+
if (*nextp == '.')
903+
ereport(ERROR,
904+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
905+
errmsg("missing valid identifier before \".\" symbol: %s",
906+
sanitize_text(qualname))));
907+
else if (after_dot)
908+
ereport(ERROR,
909+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
910+
errmsg("missing valid identifier after \".\" symbol: %s",
911+
sanitize_text(qualname))));
912+
else
913+
ereport(ERROR,
914+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
915+
errmsg("missing valid identifier: %s",
916+
sanitize_text(qualname))));
917+
}
918+
919+
while (isspace((unsigned char) *nextp))
920+
nextp++;
921+
922+
if (*nextp == '.')
923+
{
924+
after_dot = true;
925+
nextp++;
926+
while (isspace((unsigned char) *nextp))
927+
nextp++;
928+
}
929+
else if (*nextp == '\0')
930+
{
931+
break;
932+
}
933+
else
934+
{
935+
if (strict)
936+
ereport(ERROR,
937+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
938+
errmsg("identifier contains disallowed characters: %s",
939+
sanitize_text(qualname))));
940+
break;
941+
}
942+
}
943+
944+
PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
945+
}

src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 201603151
56+
#define CATALOG_VERSION_NO 201603181
5757

5858
#endif

src/include/catalog/pg_proc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3463,6 +3463,9 @@ DESCR("I/O");
34633463
DATA(insert OID = 4086 ( to_regnamespace PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ ));
34643464
DESCR("convert namespace name to regnamespace");
34653465

3466+
DATA(insert OID = 1268 ( parse_ident PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strict}" _null_ _null_ parse_ident _null_ _null_ _null_ ));
3467+
DESCR("parse qualified identifier to array of identifiers");
3468+
34663469
DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ ));
34673470
DESCR("(internal)");
34683471
DATA(insert OID = 2247 ( fmgr_c_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ ));

src/include/parser/scansup.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ extern char *scanstr(const char *s);
2020
extern char *downcase_truncate_identifier(const char *ident, int len,
2121
bool warn);
2222

23+
extern char *downcase_identifier(const char *ident, int len,
24+
bool warn, bool truncate);
25+
2326
extern void truncate_identifier(char *ident, int len, bool warn);
2427

2528
extern bool scanner_isspace(char ch);

src/include/utils/builtins.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,7 @@ extern Datum pg_typeof(PG_FUNCTION_ARGS);
510510
extern Datum pg_collation_for(PG_FUNCTION_ARGS);
511511
extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
512512
extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
513+
extern Datum parse_ident(PG_FUNCTION_ARGS);
513514

514515
/* oid.c */
515516
extern Datum oidin(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy