Skip to content

Commit e651bcf

Browse files
committed
Add xmlpath() to evaluate XPath expressions, with namespaces support.
Nikolay Samokhvalov
1 parent 4f896da commit e651bcf

File tree

6 files changed

+309
-4
lines changed

6 files changed

+309
-4
lines changed

src/backend/utils/adt/xml.c

Lines changed: 249 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.35 2007/03/15 23:12:06 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.36 2007/03/22 20:14:58 momjian Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -47,6 +47,8 @@
4747
#include <libxml/uri.h>
4848
#include <libxml/xmlerror.h>
4949
#include <libxml/xmlwriter.h>
50+
#include <libxml/xpath.h>
51+
#include <libxml/xpathInternals.h>
5052
#endif /* USE_LIBXML */
5153

5254
#include "catalog/namespace.h"
@@ -67,6 +69,7 @@
6769
#include "utils/datetime.h"
6870
#include "utils/lsyscache.h"
6971
#include "utils/memutils.h"
72+
#include "access/tupmacs.h"
7073
#include "utils/xml.h"
7174

7275

@@ -88,6 +91,7 @@ static xmlChar *xml_text2xmlChar(text *in);
8891
static int parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **encoding, int *standalone);
8992
static bool print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone);
9093
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, xmlChar *encoding);
94+
static text *xml_xmlnodetotext(xmlNodePtr cur);
9195

9296
#endif /* USE_LIBXML */
9397

@@ -1463,7 +1467,6 @@ map_xml_name_to_sql_identifier(char *name)
14631467
return buf.data;
14641468
}
14651469

1466-
14671470
/*
14681471
* Map SQL value to XML value; see SQL/XML:2003 section 9.16.
14691472
*/
@@ -2403,3 +2406,247 @@ SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename, bool n
24032406
else
24042407
appendStringInfoString(result, "</row>\n\n");
24052408
}
2409+
2410+
2411+
/*
2412+
* XPath related functions
2413+
*/
2414+
2415+
#ifdef USE_LIBXML
2416+
/*
2417+
* Convert XML node to text (return value only, it's not dumping)
2418+
*/
2419+
text *
2420+
xml_xmlnodetotext(xmlNodePtr cur)
2421+
{
2422+
xmlChar *str;
2423+
text *result;
2424+
size_t len;
2425+
2426+
str = xmlXPathCastNodeToString(cur);
2427+
len = strlen((char *) str);
2428+
result = (text *) palloc(len + VARHDRSZ);
2429+
SET_VARSIZE(result, len + VARHDRSZ);
2430+
memcpy(VARDATA(result), str, len);
2431+
2432+
return result;
2433+
}
2434+
#endif
2435+
2436+
/*
2437+
* Evaluate XPath expression and return array of XML values.
2438+
* As we have no support of XQuery sequences yet, this functions seems
2439+
* to be the most useful one (array of XML functions plays a role of
2440+
* some kind of substritution for XQuery sequences).
2441+
2442+
* Workaround here: we parse XML data in different way to allow XPath for
2443+
* fragments (see "XPath for fragment" TODO comment inside).
2444+
*/
2445+
Datum
2446+
xmlpath(PG_FUNCTION_ARGS)
2447+
{
2448+
#ifdef USE_LIBXML
2449+
ArrayBuildState *astate = NULL;
2450+
xmlParserCtxtPtr ctxt = NULL;
2451+
xmlDocPtr doc = NULL;
2452+
xmlXPathContextPtr xpathctx = NULL;
2453+
xmlXPathCompExprPtr xpathcomp = NULL;
2454+
xmlXPathObjectPtr xpathobj = NULL;
2455+
int32 len, xpath_len;
2456+
xmlChar *string, *xpath_expr;
2457+
bool res_is_null = FALSE;
2458+
int i;
2459+
xmltype *data;
2460+
text *xpath_expr_text;
2461+
ArrayType *namespaces;
2462+
int *dims, ndims, ns_count = 0, bitmask = 1;
2463+
char *ptr;
2464+
bits8 *bitmap;
2465+
char **ns_names = NULL, **ns_uris = NULL;
2466+
int16 typlen;
2467+
bool typbyval;
2468+
char typalign;
2469+
2470+
/* the function is not strict, we must check first two args */
2471+
if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
2472+
PG_RETURN_NULL();
2473+
2474+
xpath_expr_text = PG_GETARG_TEXT_P(0);
2475+
data = PG_GETARG_XML_P(1);
2476+
2477+
/* Namespace mappings passed as text[].
2478+
* Assume that 2-dimensional array has been passed,
2479+
* the 1st subarray is array of names, the 2nd -- array of URIs,
2480+
* example: ARRAY[ARRAY['myns', 'myns2'], ARRAY['http://example.com', 'http://example2.com']].
2481+
*/
2482+
if (!PG_ARGISNULL(2))
2483+
{
2484+
namespaces = PG_GETARG_ARRAYTYPE_P(2);
2485+
ndims = ARR_NDIM(namespaces);
2486+
dims = ARR_DIMS(namespaces);
2487+
2488+
/* Sanity check */
2489+
if (ndims != 2)
2490+
ereport(ERROR, (errmsg("invalid array passed for namespace mappings"),
2491+
errdetail("Only 2-dimensional array may be used for namespace mappings.")));
2492+
2493+
Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
2494+
2495+
ns_count = ArrayGetNItems(ndims, dims) / 2;
2496+
get_typlenbyvalalign(ARR_ELEMTYPE(namespaces),
2497+
&typlen, &typbyval, &typalign);
2498+
ns_names = (char **) palloc(ns_count * sizeof(char *));
2499+
ns_uris = (char **) palloc(ns_count * sizeof(char *));
2500+
ptr = ARR_DATA_PTR(namespaces);
2501+
bitmap = ARR_NULLBITMAP(namespaces);
2502+
bitmask = 1;
2503+
2504+
for (i = 0; i < ns_count * 2; i++)
2505+
{
2506+
if (bitmap && (*bitmap & bitmask) == 0)
2507+
ereport(ERROR, (errmsg("neither namespace nor URI may be NULL"))); /* TODO: better message */
2508+
else
2509+
{
2510+
if (i < ns_count)
2511+
ns_names[i] = DatumGetCString(DirectFunctionCall1(textout,
2512+
PointerGetDatum(ptr)));
2513+
else
2514+
ns_uris[i - ns_count] = DatumGetCString(DirectFunctionCall1(textout,
2515+
PointerGetDatum(ptr)));
2516+
ptr = att_addlength(ptr, typlen, PointerGetDatum(ptr));
2517+
ptr = (char *) att_align(ptr, typalign);
2518+
}
2519+
2520+
/* advance bitmap pointer if any */
2521+
if (bitmap)
2522+
{
2523+
bitmask <<= 1;
2524+
if (bitmask == 0x100)
2525+
{
2526+
bitmap++;
2527+
bitmask = 1;
2528+
}
2529+
}
2530+
}
2531+
}
2532+
2533+
len = VARSIZE(data) - VARHDRSZ;
2534+
xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
2535+
if (xpath_len == 0)
2536+
ereport(ERROR, (errmsg("empty XPath expression")));
2537+
2538+
if (xmlStrncmp((xmlChar *) VARDATA(data), (xmlChar *) "<?xml", 5) == 0)
2539+
{
2540+
string = palloc(len + 1);
2541+
memcpy(string, VARDATA(data), len);
2542+
string[len] = '\0';
2543+
xpath_expr = palloc(xpath_len + 1);
2544+
memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
2545+
xpath_expr[xpath_len] = '\0';
2546+
}
2547+
else
2548+
{
2549+
/* use "<x>...</x>" as dummy root element to enable XPath for fragments */
2550+
/* TODO: (XPath for fragment) find better solution to work with XML fragment! */
2551+
string = xmlStrncatNew((xmlChar *) "<x>", (xmlChar *) VARDATA(data), len);
2552+
string = xmlStrncat(string, (xmlChar *) "</x>", 5);
2553+
len += 7;
2554+
xpath_expr = xmlStrncatNew((xmlChar *) "/x", (xmlChar *) VARDATA(xpath_expr_text), xpath_len);
2555+
len += 2;
2556+
}
2557+
2558+
xml_init();
2559+
2560+
PG_TRY();
2561+
{
2562+
/* redundant XML parsing (two parsings for the same value in the same session are possible) */
2563+
ctxt = xmlNewParserCtxt();
2564+
if (ctxt == NULL)
2565+
xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
2566+
"could not allocate parser context");
2567+
doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
2568+
if (doc == NULL)
2569+
xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
2570+
"could not parse XML data");
2571+
xpathctx = xmlXPathNewContext(doc);
2572+
if (xpathctx == NULL)
2573+
xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
2574+
"could not allocate XPath context");
2575+
xpathctx->node = xmlDocGetRootElement(doc);
2576+
if (xpathctx->node == NULL)
2577+
xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
2578+
"could not find root XML element");
2579+
2580+
/* register namespaces, if any */
2581+
if ((ns_count > 0) && ns_names && ns_uris)
2582+
for (i = 0; i < ns_count; i++)
2583+
if (0 != xmlXPathRegisterNs(xpathctx, (xmlChar *) ns_names[i], (xmlChar *) ns_uris[i]))
2584+
ereport(ERROR,
2585+
(errmsg("could not register XML namespace with prefix=\"%s\" and href=\"%s\"", ns_names[i], ns_uris[i])));
2586+
2587+
xpathcomp = xmlXPathCompile(xpath_expr);
2588+
if (xpathcomp == NULL)
2589+
xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
2590+
"invalid XPath expression"); /* TODO: show proper XPath error details */
2591+
2592+
xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
2593+
xmlXPathFreeCompExpr(xpathcomp);
2594+
if (xpathobj == NULL)
2595+
ereport(ERROR, (errmsg("could not create XPath object")));
2596+
2597+
if (xpathobj->nodesetval == NULL)
2598+
res_is_null = TRUE;
2599+
2600+
if (!res_is_null && xpathobj->nodesetval->nodeNr == 0)
2601+
/* TODO maybe empty array should be here, not NULL? (if so -- fix segfault) */
2602+
/*PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));*/
2603+
res_is_null = TRUE;
2604+
2605+
if (!res_is_null)
2606+
for (i = 0; i < xpathobj->nodesetval->nodeNr; i++)
2607+
{
2608+
Datum elem;
2609+
bool elemisnull = false;
2610+
elem = PointerGetDatum(xml_xmlnodetotext(xpathobj->nodesetval->nodeTab[i]));
2611+
astate = accumArrayResult(astate, elem,
2612+
elemisnull, XMLOID,
2613+
CurrentMemoryContext);
2614+
}
2615+
2616+
xmlXPathFreeObject(xpathobj);
2617+
xmlXPathFreeContext(xpathctx);
2618+
xmlFreeParserCtxt(ctxt);
2619+
xmlFreeDoc(doc);
2620+
xmlCleanupParser();
2621+
}
2622+
PG_CATCH();
2623+
{
2624+
if (xpathcomp)
2625+
xmlXPathFreeCompExpr(xpathcomp);
2626+
if (xpathobj)
2627+
xmlXPathFreeObject(xpathobj);
2628+
if (xpathctx)
2629+
xmlXPathFreeContext(xpathctx);
2630+
if (doc)
2631+
xmlFreeDoc(doc);
2632+
if (ctxt)
2633+
xmlFreeParserCtxt(ctxt);
2634+
xmlCleanupParser();
2635+
2636+
PG_RE_THROW();
2637+
}
2638+
PG_END_TRY();
2639+
2640+
if (res_is_null)
2641+
{
2642+
PG_RETURN_NULL();
2643+
}
2644+
else
2645+
{
2646+
PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
2647+
}
2648+
#else
2649+
NO_XML_SUPPORT();
2650+
return 0;
2651+
#endif
2652+
}

src/include/catalog/pg_proc.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.449 2007/03/20 05:45:00 neilc Exp $
10+
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.450 2007/03/22 20:14:58 momjian Exp $
1111
*
1212
* NOTES
1313
* The script catalog/genbki.sh reads this file and generates .bki
@@ -4095,6 +4095,10 @@ DESCR("map table contents and structure to XML and XML Schema");
40954095
DATA(insert OID = 2930 ( query_to_xml_and_xmlschema PGNSP PGUID 12 100 0 f f t f s 4 142 "25 16 16 25" _null_ _null_ "{query,nulls,tableforest,targetns}" query_to_xml_and_xmlschema - _null_ ));
40964096
DESCR("map query result and structure to XML and XML Schema");
40974097

4098+
DATA(insert OID = 2931 ( xmlpath PGNSP PGUID 12 1 0 f f f f i 3 143 "25 142 1009" _null_ _null_ _null_ xmlpath - _null_ ));
4099+
DESCR("evaluate XPath expression, with namespaces support");
4100+
DATA(insert OID = 2932 ( xmlpath PGNSP PGUID 14 1 0 f f f f i 2 143 "25 142" _null_ _null_ _null_ "select pg_catalog.xmlpath($1, $2, NULL)" - _null_ ));
4101+
DESCR("evaluate XPath expression");
40984102

40994103
/* uuid */
41004104
DATA(insert OID = 2952 ( uuid_in PGNSP PGUID 12 1 0 f f t f i 1 2950 "2275" _null_ _null_ _null_ uuid_in - _null_ ));

src/include/utils/xml.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/utils/xml.h,v 1.16 2007/02/16 07:46:55 petere Exp $
10+
* $PostgreSQL: pgsql/src/include/utils/xml.h,v 1.17 2007/03/22 20:14:58 momjian Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -36,6 +36,7 @@ extern Datum xmlconcat2(PG_FUNCTION_ARGS);
3636
extern Datum texttoxml(PG_FUNCTION_ARGS);
3737
extern Datum xmltotext(PG_FUNCTION_ARGS);
3838
extern Datum xmlvalidate(PG_FUNCTION_ARGS);
39+
extern Datum xmlpath(PG_FUNCTION_ARGS);
3940

4041
extern Datum table_to_xml(PG_FUNCTION_ARGS);
4142
extern Datum query_to_xml(PG_FUNCTION_ARGS);

src/test/regress/expected/xml.out

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,3 +401,33 @@ SELECT table_name, view_definition FROM information_schema.views
401401
xmlview9 | SELECT XMLSERIALIZE(CONTENT 'good'::"xml" AS text) AS "xmlserialize";
402402
(9 rows)
403403

404+
-- Text XPath expressions evaluation
405+
SELECT xmlpath('/value', data) FROM xmltest;
406+
xmlpath
407+
---------
408+
{one}
409+
{two}
410+
(2 rows)
411+
412+
SELECT xmlpath(NULL, NULL) IS NULL FROM xmltest;
413+
?column?
414+
----------
415+
t
416+
t
417+
(2 rows)
418+
419+
SELECT xmlpath('', '<!-- error -->');
420+
ERROR: empty XPath expression
421+
CONTEXT: SQL function "xmlpath" statement 1
422+
SELECT xmlpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
423+
xmlpath
424+
----------------
425+
{"number one"}
426+
(1 row)
427+
428+
SELECT xmlpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc'], ARRAY['http://127.0.0.1']]);
429+
xmlpath
430+
---------
431+
{1,2}
432+
(1 row)
433+

src/test/regress/expected/xml_1.out

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,3 +197,18 @@ SELECT table_name, view_definition FROM information_schema.views
197197
xmlview5 | SELECT XMLPARSE(CONTENT '<abc>x</abc>'::text STRIP WHITESPACE) AS "xmlparse";
198198
(2 rows)
199199

200+
-- Text XPath expressions evaluation
201+
SELECT xmlpath('/value', data) FROM xmltest;
202+
xmlpath
203+
---------
204+
(0 rows)
205+
206+
SELECT xmlpath(NULL, NULL) IS NULL FROM xmltest;
207+
ERROR: no XML support in this installation
208+
CONTEXT: SQL function "xmlpath" statement 1
209+
SELECT xmlpath('', '<!-- error -->');
210+
ERROR: no XML support in this installation
211+
SELECT xmlpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
212+
ERROR: no XML support in this installation
213+
SELECT xmlpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc'], ARRAY['http://127.0.0.1']]);
214+
ERROR: no XML support in this installation

src/test/regress/sql/xml.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,3 +144,11 @@ CREATE VIEW xmlview9 AS SELECT xmlserialize(content 'good' as text);
144144

145145
SELECT table_name, view_definition FROM information_schema.views
146146
WHERE table_name LIKE 'xmlview%' ORDER BY 1;
147+
148+
-- Text XPath expressions evaluation
149+
150+
SELECT xmlpath('/value', data) FROM xmltest;
151+
SELECT xmlpath(NULL, NULL) IS NULL FROM xmltest;
152+
SELECT xmlpath('', '<!-- error -->');
153+
SELECT xmlpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
154+
SELECT xmlpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc'], ARRAY['http://127.0.0.1']]);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy