Skip to content

Commit fd4ad33

Browse files
tglsfdcewie
andcommitted
Avoid regression in the size of XML input that we will accept.
This mostly reverts commit 6082b3d, "Use xmlParseInNodeContext not xmlParseBalancedChunkMemory". It turns out that xmlParseInNodeContext will reject text chunks exceeding 10MB, while (in most libxml2 versions) xmlParseBalancedChunkMemory will not. The bleeding-edge libxml2 bug that we needed to work around a year ago is presumably no longer a factor, and the argument that xmlParseBalancedChunkMemory is semi-deprecated is not enough to justify a functionality regression. Hence, go back to doing it the old way. Reported-by: Michael Paquier <michael@paquier.xyz> Author: Michael Paquier <michael@paquier.xyz> Co-authored-by: Erik Wienhold <ewie@ewie.name> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/aIGknLuc8b8ega2X@paquier.xyz Backpatch-through: 13
1 parent 13559de commit fd4ad33

File tree

1 file changed

+30
-38
lines changed
  • src/backend/utils/adt

1 file changed

+30
-38
lines changed

src/backend/utils/adt/xml.c

Lines changed: 30 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1730,7 +1730,7 @@ xml_doctype_in_content(const xmlChar *str)
17301730
* xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
17311731
*
17321732
* If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
1733-
* of parsed nodes from the xmlParseInNodeContext call will be returned
1733+
* of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
17341734
* to *parsed_nodes. (It is caller's responsibility to free that.)
17351735
*
17361736
* Errors normally result in ereport(ERROR), but if escontext is an
@@ -1756,6 +1756,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
17561756
PgXmlErrorContext *xmlerrcxt;
17571757
volatile xmlParserCtxtPtr ctxt = NULL;
17581758
volatile xmlDocPtr doc = NULL;
1759+
volatile int save_keep_blanks = -1;
17591760

17601761
/*
17611762
* This step looks annoyingly redundant, but we must do it to have a
@@ -1783,7 +1784,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
17831784
PG_TRY();
17841785
{
17851786
bool parse_as_document = false;
1786-
int options;
17871787
int res_code;
17881788
size_t count = 0;
17891789
xmlChar *version = NULL;
@@ -1814,18 +1814,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
18141814
parse_as_document = true;
18151815
}
18161816

1817-
/*
1818-
* Select parse options.
1819-
*
1820-
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1821-
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
1822-
* internal DTD are applied'. As for external DTDs, we try to support
1823-
* them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
1824-
* happen because xmlPgEntityLoader prevents it.
1825-
*/
1826-
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1827-
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1828-
18291817
/* initialize output parameters */
18301818
if (parsed_xmloptiontype != NULL)
18311819
*parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
@@ -1835,11 +1823,26 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
18351823

18361824
if (parse_as_document)
18371825
{
1826+
int options;
1827+
1828+
/* set up parser context used by xmlCtxtReadDoc */
18381829
ctxt = xmlNewParserCtxt();
18391830
if (ctxt == NULL || xmlerrcxt->err_occurred)
18401831
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
18411832
"could not allocate parser context");
18421833

1834+
/*
1835+
* Select parse options.
1836+
*
1837+
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1838+
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined
1839+
* by internal DTD are applied'. As for external DTDs, we try to
1840+
* support them too (see SQL/XML:2008 GR 10.16.7.e), but that
1841+
* doesn't really happen because xmlPgEntityLoader prevents it.
1842+
*/
1843+
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1844+
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1845+
18431846
doc = xmlCtxtReadDoc(ctxt, utf8string,
18441847
NULL, /* no URL */
18451848
"UTF-8",
@@ -1861,10 +1864,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
18611864
}
18621865
else
18631866
{
1864-
xmlNodePtr root;
1865-
xmlNodePtr oldroot PG_USED_FOR_ASSERTS_ONLY;
1866-
1867-
/* set up document with empty root node to be the context node */
1867+
/* set up document that xmlParseBalancedChunkMemory will add to */
18681868
doc = xmlNewDoc(version);
18691869
if (doc == NULL || xmlerrcxt->err_occurred)
18701870
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
@@ -1877,36 +1877,23 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
18771877
"could not allocate XML document");
18781878
doc->standalone = standalone;
18791879

1880-
root = xmlNewNode(NULL, (const xmlChar *) "content-root");
1881-
if (root == NULL || xmlerrcxt->err_occurred)
1882-
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1883-
"could not allocate xml node");
1884-
1885-
/*
1886-
* This attaches root to doc, so we need not free it separately;
1887-
* and there can't yet be any old root to free.
1888-
*/
1889-
oldroot = xmlDocSetRootElement(doc, root);
1890-
Assert(oldroot == NULL);
1880+
/* set parse options --- have to do this the ugly way */
1881+
save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
18911882

18921883
/* allow empty content */
18931884
if (*(utf8string + count))
18941885
{
18951886
xmlNodePtr node_list = NULL;
1896-
xmlParserErrors res;
1897-
1898-
res = xmlParseInNodeContext(root,
1899-
(char *) utf8string + count,
1900-
strlen((char *) utf8string + count),
1901-
options,
1902-
&node_list);
19031887

1904-
if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
1888+
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1889+
utf8string + count,
1890+
&node_list);
1891+
if (res_code != 0 || xmlerrcxt->err_occurred)
19051892
{
1906-
xmlFreeNodeList(node_list);
19071893
xml_errsave(escontext, xmlerrcxt,
19081894
ERRCODE_INVALID_XML_CONTENT,
19091895
"invalid XML content");
1896+
xmlFreeNodeList(node_list);
19101897
goto fail;
19111898
}
19121899

@@ -1922,6 +1909,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
19221909
}
19231910
PG_CATCH();
19241911
{
1912+
if (save_keep_blanks != -1)
1913+
xmlKeepBlanksDefault(save_keep_blanks);
19251914
if (doc != NULL)
19261915
xmlFreeDoc(doc);
19271916
if (ctxt != NULL)
@@ -1933,6 +1922,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
19331922
}
19341923
PG_END_TRY();
19351924

1925+
if (save_keep_blanks != -1)
1926+
xmlKeepBlanksDefault(save_keep_blanks);
1927+
19361928
if (ctxt != NULL)
19371929
xmlFreeParserCtxt(ctxt);
19381930

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy