diff --git a/javascript/ql/lib/change-notes/2025-07-15-xml-bomb-sinks.md b/javascript/ql/lib/change-notes/2025-07-15-xml-bomb-sinks.md new file mode 100644 index 000000000000..b10509c0e068 --- /dev/null +++ b/javascript/ql/lib/change-notes/2025-07-15-xml-bomb-sinks.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Removed `libxmljs` as an XML bomb sink. The underlying libxml2 library now includes [entity reference loop detection](https://github.com/GNOME/libxml2/blob/0c948334a8f5c66d50e9f8992e62998017dc4fc6/NEWS#L905-L908) that prevents XML bomb attacks. diff --git a/javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll b/javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll index a451182aa21a..c0a783c17644 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll @@ -49,9 +49,7 @@ module XML { override JS::Expr getSourceArgument() { result = this.getArgument(0) } override predicate resolvesEntities(EntityKind kind) { - // internal entities are always resolved - kind = InternalEntity() - or + not kind = InternalEntity() and // other entities are only resolved if the configuration option `noent` is set to `true` exists(JS::Expr noent | this.hasOptionArgument(1, "noent", noent) and @@ -126,8 +124,9 @@ module XML { override JS::Expr getSourceArgument() { result = this.getArgument(0) } override predicate resolvesEntities(EntityKind kind) { - // entities are resolved by default - any() + // SAX parsers in libxmljs also inherit libxml2's protection against XML bombs + kind = ExternalEntity(_) or + kind = ParameterEntity(true) } override DataFlow::Node getAResult() { @@ -149,8 +148,9 @@ module XML { override JS::Expr getSourceArgument() { result = this.getArgument(0) } override predicate resolvesEntities(EntityKind kind) { - // entities are resolved by default - any() + // SAX push parsers in libxmljs also inherit libxml2's protection against XML bombs + kind = ExternalEntity(_) or + kind = ParameterEntity(true) } override DataFlow::Node getAResult() { diff --git a/javascript/ql/test/query-tests/Security/CWE-776/XmlBomb.expected b/javascript/ql/test/query-tests/Security/CWE-776/XmlBomb.expected index 2b4d41804915..6a5c2adfb7a1 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/XmlBomb.expected +++ b/javascript/ql/test/query-tests/Security/CWE-776/XmlBomb.expected @@ -5,10 +5,6 @@ | domparser.js:11:57:11:59 | src | domparser.js:2:13:2:36 | documen ... .search | domparser.js:11:57:11:59 | src | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | domparser.js:2:13:2:36 | documen ... .search | user-provided value | | expat.js:6:16:6:36 | req.par ... e-xml") | expat.js:6:16:6:36 | req.par ... e-xml") | expat.js:6:16:6:36 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | expat.js:6:16:6:36 | req.par ... e-xml") | user-provided value | | jquery.js:4:14:4:16 | src | jquery.js:2:13:2:36 | documen ... .search | jquery.js:4:14:4:16 | src | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | jquery.js:2:13:2:36 | documen ... .search | user-provided value | -| libxml.js:5:21:5:41 | req.par ... e-xml") | libxml.js:5:21:5:41 | req.par ... e-xml") | libxml.js:5:21:5:41 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.js:5:21:5:41 | req.par ... e-xml") | user-provided value | -| libxml.noent.js:5:21:5:41 | req.par ... e-xml") | libxml.noent.js:5:21:5:41 | req.par ... e-xml") | libxml.noent.js:5:21:5:41 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.noent.js:5:21:5:41 | req.par ... e-xml") | user-provided value | -| libxml.sax.js:6:22:6:42 | req.par ... e-xml") | libxml.sax.js:6:22:6:42 | req.par ... e-xml") | libxml.sax.js:6:22:6:42 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.sax.js:6:22:6:42 | req.par ... e-xml") | user-provided value | -| libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | user-provided value | edges | closure.js:2:7:2:36 | src | closure.js:3:24:3:26 | src | provenance | | | closure.js:2:13:2:36 | documen ... .search | closure.js:2:7:2:36 | src | provenance | | @@ -31,8 +27,4 @@ nodes | jquery.js:2:7:2:36 | src | semmle.label | src | | jquery.js:2:13:2:36 | documen ... .search | semmle.label | documen ... .search | | jquery.js:4:14:4:16 | src | semmle.label | src | -| libxml.js:5:21:5:41 | req.par ... e-xml") | semmle.label | req.par ... e-xml") | -| libxml.noent.js:5:21:5:41 | req.par ... e-xml") | semmle.label | req.par ... e-xml") | -| libxml.sax.js:6:22:6:42 | req.par ... e-xml") | semmle.label | req.par ... e-xml") | -| libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | semmle.label | req.par ... e-xml") | subpaths diff --git a/javascript/ql/test/query-tests/Security/CWE-776/libxml.js b/javascript/ql/test/query-tests/Security/CWE-776/libxml.js index 6af2da17ef5d..3f16e457dc38 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/libxml.js +++ b/javascript/ql/test/query-tests/Security/CWE-776/libxml.js @@ -2,5 +2,5 @@ const express = require('express'); const libxmljs = require('libxmljs'); express().get('/some/path', function(req) { - libxmljs.parseXml(req.param("some-xml")); // $ Alert - libxml expands internal general entities by default + libxmljs.parseXml(req.param("some-xml")); }); diff --git a/javascript/ql/test/query-tests/Security/CWE-776/libxml.noent.js b/javascript/ql/test/query-tests/Security/CWE-776/libxml.noent.js index da133cc782e0..de633c04688a 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/libxml.noent.js +++ b/javascript/ql/test/query-tests/Security/CWE-776/libxml.noent.js @@ -2,5 +2,5 @@ const express = require('express'); const libxmljs = require('libxmljs'); express().get('/some/path', function(req) { - libxmljs.parseXml(req.param("some-xml"), { noent: true }); // $ Alert - unguarded entity expansion + libxmljs.parseXml(req.param("some-xml"), { noent: true }); }); diff --git a/javascript/ql/test/query-tests/Security/CWE-776/libxml.sax.js b/javascript/ql/test/query-tests/Security/CWE-776/libxml.sax.js index 6049a8297a98..dc7ec2ddec00 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/libxml.sax.js +++ b/javascript/ql/test/query-tests/Security/CWE-776/libxml.sax.js @@ -3,5 +3,5 @@ const libxmljs = require('libxmljs'); express().get('/some/path', function(req) { const parser = new libxmljs.SaxParser(); - parser.parseString(req.param("some-xml")); // $ Alert - the SAX parser expands external entities by default + parser.parseString(req.param("some-xml")); }); diff --git a/javascript/ql/test/query-tests/Security/CWE-776/libxml.saxpush.js b/javascript/ql/test/query-tests/Security/CWE-776/libxml.saxpush.js index 2fc4afc8ce47..15e63bf5d532 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/libxml.saxpush.js +++ b/javascript/ql/test/query-tests/Security/CWE-776/libxml.saxpush.js @@ -3,5 +3,5 @@ const libxmljs = require('libxmljs'); express().get('/some/path', function(req) { const parser = new libxmljs.SaxPushParser(); - parser.push(req.param("some-xml")); // $ Alert - the SAX parser expands external entities by default + parser.push(req.param("some-xml")); }); diff --git a/python/ql/lib/change-notes/2025-07-15-xml-bomb-sinks-python.md b/python/ql/lib/change-notes/2025-07-15-xml-bomb-sinks-python.md new file mode 100644 index 000000000000..11ff0181a017 --- /dev/null +++ b/python/ql/lib/change-notes/2025-07-15-xml-bomb-sinks-python.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Removed `lxml` as an XML bomb sink. The underlying libxml2 library now includes [entity reference loop detection](https://github.com/lxml/lxml/blob/f33ac2c2f5f9c4c4c1fc47f363be96db308f2fa6/doc/FAQ.txt#L1077) that prevents XML bomb attacks. diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index c503d7d5cfbc..e043093ed49e 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -129,11 +129,6 @@ module Lxml { any(True t) ) or - kind.isXmlBomb() and - this.getKeywordParameter("huge_tree").getAValueReachingSink().asExpr() = any(True t) and - not this.getKeywordParameter("resolve_entities").getAValueReachingSink().asExpr() = - any(False t) - or kind.isDtdRetrieval() and this.getKeywordParameter("load_dtd").getAValueReachingSink().asExpr() = any(True t) and this.getKeywordParameter("no_network").getAValueReachingSink().asExpr() = any(False t) @@ -305,9 +300,8 @@ module Lxml { // note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O kind.isXxe() or - kind.isXmlBomb() and - this.getKeywordParameter("huge_tree").getAValueReachingSink().asExpr() = any(True t) - or + // libxml2 has built-in protection against XML bombs via entity reference loop detection, + // so lxml is not vulnerable to XML bomb attacks. kind.isDtdRetrieval() and this.getKeywordParameter("load_dtd").getAValueReachingSink().asExpr() = any(True t) and this.getKeywordParameter("no_network").getAValueReachingSink().asExpr() = any(False t) diff --git a/python/ql/test/library-tests/frameworks/lxml/parsing.py b/python/ql/test/library-tests/frameworks/lxml/parsing.py index 63cdc79b4c1d..bff508f24ab0 100644 --- a/python/ql/test/library-tests/frameworks/lxml/parsing.py +++ b/python/ql/test/library-tests/frameworks/lxml/parsing.py @@ -50,7 +50,7 @@ # Billion laughs vuln (also XXE) parser = lxml.etree.XMLParser(huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) # Safe for both Billion laughs and XXE parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) @@ -63,5 +63,5 @@ # iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of # the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O -lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file +lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file diff --git a/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected index 8a7d7bc75e3d..e217064d1dfc 100644 --- a/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected +++ b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected @@ -1,14 +1,4 @@ edges -| test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:1:26:1:32 | ControlFlowNode for request | provenance | | -| test.py:1:26:1:32 | ControlFlowNode for request | test.py:19:19:19:25 | ControlFlowNode for request | provenance | | -| test.py:19:5:19:15 | ControlFlowNode for xml_content | test.py:30:34:30:44 | ControlFlowNode for xml_content | provenance | | -| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:5:19:15 | ControlFlowNode for xml_content | provenance | AdditionalTaintStep | nodes -| test.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember | -| test.py:1:26:1:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| test.py:19:5:19:15 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | subpaths #select -| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:30:34:30:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: