Skip to content

Commit 589d6e6

Browse files
tglsfdcewie
andcommitted
Avoid regression in the size of XML input that we will accept.
This mostly reverts commit 6082b3d, "Use xmlParseInNodeContext not xmlParseBalancedChunkMemory". It turns out that xmlParseInNodeContext will reject text chunks exceeding 10MB, while (in most libxml2 versions) xmlParseBalancedChunkMemory will not. The bleeding-edge libxml2 bug that we needed to work around a year ago is presumably no longer a factor, and the argument that xmlParseBalancedChunkMemory is semi-deprecated is not enough to justify a functionality regression. Hence, go back to doing it the old way. Reported-by: Michael Paquier <michael@paquier.xyz> Author: Michael Paquier <michael@paquier.xyz> Co-authored-by: Erik Wienhold <ewie@ewie.name> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/aIGknLuc8b8ega2X@paquier.xyz Backpatch-through: 13
1 parent f32a471 commit 589d6e6

File tree

1 file changed

+28
-29
lines changed
  • src/backend/utils/adt

1 file changed

+28
-29
lines changed

src/backend/utils/adt/xml.c

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,6 +1529,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15291529
PgXmlErrorContext *xmlerrcxt;
15301530
volatile xmlParserCtxtPtr ctxt = NULL;
15311531
volatile xmlDocPtr doc = NULL;
1532+
volatile int save_keep_blanks = -1;
15321533

15331534
len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
15341535
string = xml_text2xmlChar(data);
@@ -1545,7 +1546,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15451546
PG_TRY();
15461547
{
15471548
bool parse_as_document = false;
1548-
int options;
15491549
int res_code;
15501550
size_t count = 0;
15511551
xmlChar *version = NULL;
@@ -1571,25 +1571,28 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15711571
parse_as_document = true;
15721572
}
15731573

1574-
/*
1575-
* Select parse options.
1576-
*
1577-
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1578-
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
1579-
* internal DTD are applied'. As for external DTDs, we try to support
1580-
* them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
1581-
* happen because xmlPgEntityLoader prevents it.
1582-
*/
1583-
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1584-
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1585-
15861574
if (parse_as_document)
15871575
{
1576+
int options;
1577+
1578+
/* set up parser context used by xmlCtxtReadDoc */
15881579
ctxt = xmlNewParserCtxt();
15891580
if (ctxt == NULL || xmlerrcxt->err_occurred)
15901581
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
15911582
"could not allocate parser context");
15921583

1584+
/*
1585+
* Select parse options.
1586+
*
1587+
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1588+
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined
1589+
* by internal DTD are applied'. As for external DTDs, we try to
1590+
* support them too (see SQL/XML:2008 GR 10.16.7.e), but that
1591+
* doesn't really happen because xmlPgEntityLoader prevents it.
1592+
*/
1593+
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1594+
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1595+
15931596
doc = xmlCtxtReadDoc(ctxt, utf8string,
15941597
NULL, /* no URL */
15951598
"UTF-8",
@@ -1608,43 +1611,36 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
16081611
}
16091612
else
16101613
{
1611-
xmlNodePtr root;
1612-
1613-
/* set up document with empty root node to be the context node */
1614+
/* set up document that xmlParseBalancedChunkMemory will add to */
16141615
doc = xmlNewDoc(version);
16151616
Assert(doc->encoding == NULL);
16161617
doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
16171618
doc->standalone = standalone;
16181619

1619-
root = xmlNewNode(NULL, (const xmlChar *) "content-root");
1620-
if (root == NULL || xmlerrcxt->err_occurred)
1621-
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1622-
"could not allocate xml node");
1623-
/* This attaches root to doc, so we need not free it separately. */
1624-
xmlDocSetRootElement(doc, root);
1620+
/* set parse options --- have to do this the ugly way */
1621+
save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
16251622

16261623
/* allow empty content */
16271624
if (*(utf8string + count))
16281625
{
16291626
xmlNodePtr node_list = NULL;
1630-
xmlParserErrors res;
16311627

1632-
res = xmlParseInNodeContext(root,
1633-
(char *) utf8string + count,
1634-
strlen((char *) utf8string + count),
1635-
options,
1636-
&node_list);
1628+
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1629+
utf8string + count,
1630+
&node_list);
16371631

16381632
xmlFreeNodeList(node_list);
16391633

1640-
if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
1634+
if (res_code != 0 || xmlerrcxt->err_occurred)
16411635
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
16421636
"invalid XML content");
16431637
}
16441638
}
16451639
}
16461640
PG_CATCH();
16471641
{
1642+
if (save_keep_blanks != -1)
1643+
xmlKeepBlanksDefault(save_keep_blanks);
16481644
if (doc != NULL)
16491645
xmlFreeDoc(doc);
16501646
if (ctxt != NULL)
@@ -1656,6 +1652,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
16561652
}
16571653
PG_END_TRY();
16581654

1655+
if (save_keep_blanks != -1)
1656+
xmlKeepBlanksDefault(save_keep_blanks);
1657+
16591658
if (ctxt != NULL)
16601659
xmlFreeParserCtxt(ctxt);
16611660

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy