Skip to content

Commit 0ffbd34

Browse files
tglsfdcewie
andcommitted
Avoid regression in the size of XML input that we will accept.
This mostly reverts commit 6082b3d, "Use xmlParseInNodeContext not xmlParseBalancedChunkMemory". It turns out that xmlParseInNodeContext will reject text chunks exceeding 10MB, while (in most libxml2 versions) xmlParseBalancedChunkMemory will not. The bleeding-edge libxml2 bug that we needed to work around a year ago is presumably no longer a factor, and the argument that xmlParseBalancedChunkMemory is semi-deprecated is not enough to justify a functionality regression. Hence, go back to doing it the old way. Reported-by: Michael Paquier <michael@paquier.xyz> Author: Michael Paquier <michael@paquier.xyz> Co-authored-by: Erik Wienhold <ewie@ewie.name> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/aIGknLuc8b8ega2X@paquier.xyz Backpatch-through: 13
1 parent b248a3b commit 0ffbd34

File tree

1 file changed

+28
-29
lines changed
  • src/backend/utils/adt

1 file changed

+28
-29
lines changed

src/backend/utils/adt/xml.c

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1528,6 +1528,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15281528
PgXmlErrorContext *xmlerrcxt;
15291529
volatile xmlParserCtxtPtr ctxt = NULL;
15301530
volatile xmlDocPtr doc = NULL;
1531+
volatile int save_keep_blanks = -1;
15311532

15321533
len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
15331534
string = xml_text2xmlChar(data);
@@ -1544,7 +1545,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15441545
PG_TRY();
15451546
{
15461547
bool parse_as_document = false;
1547-
int options;
15481548
int res_code;
15491549
size_t count = 0;
15501550
xmlChar *version = NULL;
@@ -1570,25 +1570,28 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15701570
parse_as_document = true;
15711571
}
15721572

1573-
/*
1574-
* Select parse options.
1575-
*
1576-
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1577-
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
1578-
* internal DTD are applied'. As for external DTDs, we try to support
1579-
* them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
1580-
* happen because xmlPgEntityLoader prevents it.
1581-
*/
1582-
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1583-
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1584-
15851573
if (parse_as_document)
15861574
{
1575+
int options;
1576+
1577+
/* set up parser context used by xmlCtxtReadDoc */
15871578
ctxt = xmlNewParserCtxt();
15881579
if (ctxt == NULL || xmlerrcxt->err_occurred)
15891580
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
15901581
"could not allocate parser context");
15911582

1583+
/*
1584+
* Select parse options.
1585+
*
1586+
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1587+
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined
1588+
* by internal DTD are applied'. As for external DTDs, we try to
1589+
* support them too (see SQL/XML:2008 GR 10.16.7.e), but that
1590+
* doesn't really happen because xmlPgEntityLoader prevents it.
1591+
*/
1592+
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1593+
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1594+
15921595
doc = xmlCtxtReadDoc(ctxt, utf8string,
15931596
NULL, /* no URL */
15941597
"UTF-8",
@@ -1607,43 +1610,36 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
16071610
}
16081611
else
16091612
{
1610-
xmlNodePtr root;
1611-
1612-
/* set up document with empty root node to be the context node */
1613+
/* set up document that xmlParseBalancedChunkMemory will add to */
16131614
doc = xmlNewDoc(version);
16141615
Assert(doc->encoding == NULL);
16151616
doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
16161617
doc->standalone = standalone;
16171618

1618-
root = xmlNewNode(NULL, (const xmlChar *) "content-root");
1619-
if (root == NULL || xmlerrcxt->err_occurred)
1620-
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1621-
"could not allocate xml node");
1622-
/* This attaches root to doc, so we need not free it separately. */
1623-
xmlDocSetRootElement(doc, root);
1619+
/* set parse options --- have to do this the ugly way */
1620+
save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
16241621

16251622
/* allow empty content */
16261623
if (*(utf8string + count))
16271624
{
16281625
xmlNodePtr node_list = NULL;
1629-
xmlParserErrors res;
16301626

1631-
res = xmlParseInNodeContext(root,
1632-
(char *) utf8string + count,
1633-
strlen((char *) utf8string + count),
1634-
options,
1635-
&node_list);
1627+
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1628+
utf8string + count,
1629+
&node_list);
16361630

16371631
xmlFreeNodeList(node_list);
16381632

1639-
if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
1633+
if (res_code != 0 || xmlerrcxt->err_occurred)
16401634
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
16411635
"invalid XML content");
16421636
}
16431637
}
16441638
}
16451639
PG_CATCH();
16461640
{
1641+
if (save_keep_blanks != -1)
1642+
xmlKeepBlanksDefault(save_keep_blanks);
16471643
if (doc != NULL)
16481644
xmlFreeDoc(doc);
16491645
if (ctxt != NULL)
@@ -1655,6 +1651,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
16551651
}
16561652
PG_END_TRY();
16571653

1654+
if (save_keep_blanks != -1)
1655+
xmlKeepBlanksDefault(save_keep_blanks);
1656+
16581657
if (ctxt != NULL)
16591658
xmlFreeParserCtxt(ctxt);
16601659

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy