From 0e615586440ca11393a9f81ae9c5c5e7dbbf6372 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Sat, 19 Jun 2021 18:39:58 +0200 Subject: [PATCH 01/79] Empty commit --- python/ql/src/semmle/python/frameworks/XML.qll | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 python/ql/src/semmle/python/frameworks/XML.qll diff --git a/python/ql/src/semmle/python/frameworks/XML.qll b/python/ql/src/semmle/python/frameworks/XML.qll new file mode 100644 index 000000000000..e69de29bb2d1 From 78deec84fc8ebd17e4aed6d108455e9aa3c58fce Mon Sep 17 00:00:00 2001 From: jorgectf Date: Tue, 22 Jun 2021 16:41:08 +0200 Subject: [PATCH 02/79] Upload main structure and initial tests --- .../src/experimental/Security/CWE-611/XXE.ql | 20 ++++++ .../Security/CWE-611/unit_tests/XXE.qlref | 1 + .../Security/CWE-611/unit_tests/general.py | 63 ++++++++++++++++++ .../CWE-611/unit_tests/xml_sax_make_parser.py | 66 +++++++++++++++++++ .../experimental/semmle/python/Concepts.qll | 43 ++++++++++++ .../experimental/semmle/python/Frameworks.qll | 1 + .../semmle/python/security/XXE.qll | 26 ++++++++ .../ql/src/semmle/python/frameworks/XML.qll | 0 8 files changed, 220 insertions(+) create mode 100644 python/ql/src/experimental/Security/CWE-611/XXE.ql create mode 100644 python/ql/src/experimental/Security/CWE-611/unit_tests/XXE.qlref create mode 100644 python/ql/src/experimental/Security/CWE-611/unit_tests/general.py create mode 100644 python/ql/src/experimental/Security/CWE-611/unit_tests/xml_sax_make_parser.py create mode 100644 python/ql/src/experimental/semmle/python/security/XXE.qll delete mode 100644 python/ql/src/semmle/python/frameworks/XML.qll diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.ql b/python/ql/src/experimental/Security/CWE-611/XXE.ql new file mode 100644 index 000000000000..61ff4f8310bc --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/XXE.ql @@ -0,0 +1,20 @@ +/** + * @name XML External Entity abuse + * @description User input should not be parsed by XML parsers without security options enabled. + * @kind path-problem + * @problem.severity error + * @id py/xxe + * @tags security + * external/cwe/cwe-611 + */ + +// determine precision above +import python +import experimental.semmle.python.security.XXE +import DataFlow::PathGraph + +from XXEFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink +where config.hasFlowPath(source, sink) +select sink.getNode(), source, sink, + "$@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse", + sink.getNode(), "This", source.getNode(), "user-provided value" diff --git a/python/ql/src/experimental/Security/CWE-611/unit_tests/XXE.qlref b/python/ql/src/experimental/Security/CWE-611/unit_tests/XXE.qlref new file mode 100644 index 000000000000..ada2b1e52024 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/unit_tests/XXE.qlref @@ -0,0 +1 @@ +experimental/Security/CWE-611/XXE.ql diff --git a/python/ql/src/experimental/Security/CWE-611/unit_tests/general.py b/python/ql/src/experimental/Security/CWE-611/unit_tests/general.py new file mode 100644 index 000000000000..96c3d46f7438 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/unit_tests/general.py @@ -0,0 +1,63 @@ +from flask import request, Flask +from io import StringIO +import xml.etree, xml.etree.ElementTree +import lxml.etree +import xml.dom.minidom, xml.dom.pulldom +import xmltodict + +''' +XML Parsers: + xml.etree.ElementTree.XMLParser() - no options, vuln by default + lxml.etree.XMLParser() - no_network=True huge_tree=False resolve_entities=True + lxml.etree.get_default_parser() - no options, default above options + xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) + +XML Parsing: + string: + xml.etree.ElementTree.fromstring(list) + xml.etree.ElementTree.XML + lxml.etree.fromstring(list) + lxml.etree.XML + xmltodict.parse + + file StringIO(), BytesIO(b): + xml.etree.ElementTree.parse + lxml.etree.parse + xml.dom.(mini|pull)dom.parse(String) +''' + +@app.route("/XMLParser-Empty&xml.etree.ElementTree.fromstring") +def test1(): + xml_content = request.args['xml_content'] # ]>&xxe; + + parser = lxml.etree.XMLParser() + return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text # 'root...' + +@app.route("/XMLParser-Empty&xml.etree.ElementTree.parse")#! +def test1(): + xml_content = request.args['xml_content'] # ]>&xxe; + + parser = lxml.etree.XMLParser() + return xml.etree.ElementTree.parse(StringIO(xml_content), parser=parser).getroot().text # 'jorgectf' + +@app.route("/XMLParser-Empty&lxml.etree.fromstring") +def test1(): + xml_content = request.args['xml_content'] # ]>&xxe; + + parser = lxml.etree.XMLParser() + return lxml.etree.fromstring(xml_content, parser=parser).text # 'jorgectf' + +@app.route("/XMLParser-Empty&xml.etree.parse")#! +def test1(): + xml_content = request.args['xml_content'] # ]>&xxe; + + parser = lxml.etree.XMLParser() + return lxml.etree.parse(StringIO(xml_content), parser=parser).getroot().text # 'jorgectf' + +@app.route("/xmltodict-disable_entities_False") +def test2(): + xml_content = request.args['xml_content'] # ]>&xxe; + + return xmltodict.parse(xml_content, disable_entities=False) + + diff --git a/python/ql/src/experimental/Security/CWE-611/unit_tests/xml_sax_make_parser.py b/python/ql/src/experimental/Security/CWE-611/unit_tests/xml_sax_make_parser.py new file mode 100644 index 000000000000..39c3063a2fe4 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/unit_tests/xml_sax_make_parser.py @@ -0,0 +1,66 @@ +from io import StringIO +import xml.sax + +# https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges + +class MainHandler(xml.sax.ContentHandler): + def __init__(self): + self._result = [] + + def characters(self, data): + self._result.append(data) + + def parse(self, f): + xml.sax.parse(f, self) + return self._result + +# GOOD +@app.route("/MainHandler") +def test1(): + xml_content = request.args['xml_content'] # ]>&xxe; + + return MainHandler().parse(StringIO(xml_content)) + +@app.route("/xml.sax.make_parser()+MainHandler") +def test1(): + xml_content = request.args['xml_content'] # ]>&xxe; + + BadHandler = MainHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(BadHandler) + parser.parse(StringIO(xml_content)) + return BadHandler._result + +@app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_False") +def test1(): + xml_content = request.args['xml_content'] # ]>&xxe; + + BadHandler = MainHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(BadHandler) + parser.setFeature(xml.sax.handler.feature_external_ges, False) + parser.parse(StringIO(xml_content)) + return BadHandler._result + +# BAD +@app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_True") +def test1(): + xml_content = request.args['xml_content'] # ]>&xxe; + + GoodHandler = MainHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(GoodHandler) + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(xml_content)) + return GoodHandler._result + +@app.route("/xml.sax.make_parser()+xml.dom.minidom.parse-xml.sax.handler.feature_external_ges_True") +def test1(): + xml_content = request.args['xml_content'] # ]>&xxe; + + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes + + + diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 904b7967ee87..6937c6481b5d 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -13,3 +13,46 @@ private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking private import experimental.semmle.python.Frameworks + +/** Provides classes for modeling XML parsing APIs. */ +module XMLParsing { + /** + * A data-flow node that collects functions parsing XML. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `XMLParsing` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets the argument containing the content to parse. + */ + abstract DataFlow::Node getAnInput(); + + /** + * Holds if the parser may be parsing the input dangerously. + */ + abstract predicate mayBeDangerous(); + } +} + +/** + * A data-flow node that collects functions setting HTTP Headers' content. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `XMLParsing` instead. + */ +class XMLParsing extends DataFlow::Node { + XMLParsing::Range range; + + XMLParsing() { this = range } + + /** + * Gets the argument containing the content to parse. + */ + DataFlow::Node getAnInput() { result = range.getAnInput() } + + /** + * Holds if the parser may be parsing the input dangerously. + */ + predicate mayBeDangerous() { range.mayBeDangerous() } +} diff --git a/python/ql/src/experimental/semmle/python/Frameworks.qll b/python/ql/src/experimental/semmle/python/Frameworks.qll index ca1dd04e57d6..fa83e685fbcc 100644 --- a/python/ql/src/experimental/semmle/python/Frameworks.qll +++ b/python/ql/src/experimental/semmle/python/Frameworks.qll @@ -3,3 +3,4 @@ */ private import experimental.semmle.python.frameworks.Stdlib +private import experimental.semmle.python.frameworks.XML diff --git a/python/ql/src/experimental/semmle/python/security/XXE.qll b/python/ql/src/experimental/semmle/python/security/XXE.qll new file mode 100644 index 000000000000..2b5bab29b8e4 --- /dev/null +++ b/python/ql/src/experimental/semmle/python/security/XXE.qll @@ -0,0 +1,26 @@ +import python +import experimental.semmle.python.Concepts +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TaintTracking +import semmle.python.dataflow.new.RemoteFlowSources +import semmle.python.dataflow.new.BarrierGuards + +/** + * A taint-tracking configuration for detecting XML External entities abuse. + * + * This configuration uses `RemoteFlowSource` as a source because there's no + * risk at parsing not user-supplied input without security options enabled. + */ +class XXEFlowConfig extends TaintTracking::Configuration { + XXEFlowConfig() { this = "XXEFlowConfig" } + + override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } + + override predicate isSink(DataFlow::Node sink) { + exists(XMLParsing xmlParsing | xmlParsing.mayBeDangerous() and sink = xmlParsing.getAnInput()) + } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof StringConstCompare + } +} diff --git a/python/ql/src/semmle/python/frameworks/XML.qll b/python/ql/src/semmle/python/frameworks/XML.qll deleted file mode 100644 index e69de29bb2d1..000000000000 From b9fa57f518a0eb81ec4be5a4fdb2b56d5c4017be Mon Sep 17 00:00:00 2001 From: jorgectf Date: Wed, 30 Jun 2021 00:58:58 +0200 Subject: [PATCH 03/79] Move tests to `test/` --- .../experimental/query-tests/Security/CWE-611}/XXE.qlref | 0 .../experimental/query-tests/Security/CWE-611}/general.py | 7 +++++++ .../query-tests/Security/CWE-611}/xml_sax_make_parser.py | 0 3 files changed, 7 insertions(+) rename python/ql/{src/experimental/Security/CWE-611/unit_tests => test/experimental/query-tests/Security/CWE-611}/XXE.qlref (100%) rename python/ql/{src/experimental/Security/CWE-611/unit_tests => test/experimental/query-tests/Security/CWE-611}/general.py (96%) rename python/ql/{src/experimental/Security/CWE-611/unit_tests => test/experimental/query-tests/Security/CWE-611}/xml_sax_make_parser.py (100%) diff --git a/python/ql/src/experimental/Security/CWE-611/unit_tests/XXE.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.qlref similarity index 100% rename from python/ql/src/experimental/Security/CWE-611/unit_tests/XXE.qlref rename to python/ql/test/experimental/query-tests/Security/CWE-611/XXE.qlref diff --git a/python/ql/src/experimental/Security/CWE-611/unit_tests/general.py b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py similarity index 96% rename from python/ql/src/experimental/Security/CWE-611/unit_tests/general.py rename to python/ql/test/experimental/query-tests/Security/CWE-611/general.py index 96c3d46f7438..f548f950fba3 100644 --- a/python/ql/src/experimental/Security/CWE-611/unit_tests/general.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py @@ -6,6 +6,13 @@ import xmltodict ''' +TO-DO + +Extend tests +Model xmltodict and xml.dom +Write StringIO/BytesIO additional tain steps + + XML Parsers: xml.etree.ElementTree.XMLParser() - no options, vuln by default lxml.etree.XMLParser() - no_network=True huge_tree=False resolve_entities=True diff --git a/python/ql/src/experimental/Security/CWE-611/unit_tests/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py similarity index 100% rename from python/ql/src/experimental/Security/CWE-611/unit_tests/xml_sax_make_parser.py rename to python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py From c3b3bde35ded8d80caf0df5ce7c200374e4b7a13 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Wed, 30 Jun 2021 00:59:17 +0200 Subject: [PATCH 04/79] Add `XMLParser` concept --- .../experimental/semmle/python/Concepts.qll | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 6937c6481b5d..dc85e023af28 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -36,7 +36,7 @@ module XMLParsing { } /** - * A data-flow node that collects functions setting HTTP Headers' content. + * A data-flow node that collects functions parsing XML. * * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParsing` instead. @@ -56,3 +56,46 @@ class XMLParsing extends DataFlow::Node { */ predicate mayBeDangerous() { range.mayBeDangerous() } } + +/** Provides classes for modeling XML parsers. */ +module XMLParser { + /** + * A data-flow node that collects XML parsers. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `XMLParser` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets the argument containing the content to parse. + */ + abstract DataFlow::Node getAnInput(); + + /** + * Holds if the parser may be dangerously configured. + */ + abstract predicate mayBeDangerous(); + } +} + +/** + * A data-flow node that collects XML parsers. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `XMLParser` instead. + */ +class XMLParser extends DataFlow::Node { + XMLParser::Range range; + + XMLParser() { this = range } + + /** + * Gets the argument containing the content to parse. + */ + DataFlow::Node getAnInput() { result = range.getAnInput() } + + /** + * Holds if the parser may be dangerously configured. + */ + predicate mayBeDangerous() { range.mayBeDangerous() } +} From d475d52c76aee9eaa82e0f34b470eed8e5212d76 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Wed, 30 Jun 2021 00:59:40 +0200 Subject: [PATCH 05/79] Add partial modeling --- .../semmle/python/frameworks/XML.qll | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 python/ql/src/experimental/semmle/python/frameworks/XML.qll diff --git a/python/ql/src/experimental/semmle/python/frameworks/XML.qll b/python/ql/src/experimental/semmle/python/frameworks/XML.qll new file mode 100644 index 000000000000..1007fe352792 --- /dev/null +++ b/python/ql/src/experimental/semmle/python/frameworks/XML.qll @@ -0,0 +1,90 @@ +/** + * Provides class and predicates to track external data that + * may represent malicious XML objects. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import experimental.semmle.python.Concepts +private import semmle.python.ApiGraphs + +private module XML { + private API::Node xml() { result = API::moduleImport("xml") } + + private API::Node xmlEtree() { result = xml().getMember("etree").getMember("ElementTree") } + + private class XMLEtreeParser extends DataFlow::CallCfgNode, XMLParser::Range { + XMLEtreeParser() { this = xmlEtree().getMember("XMLParser").getACall() } + + override DataFlow::Node getAnInput() { none() } + + override predicate mayBeDangerous() { any() } + } + + private class XMLEtreeParsing extends DataFlow::CallCfgNode, XMLParsing::Range { + XMLEtreeParsing() { + this = xmlEtree().getMember(["fromstring", "fromstringlist", "XML", "parse"]).getACall() + } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + + override predicate mayBeDangerous() { + exists(XMLParser xmlParser | + xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser + ) + } + } + + private API::Node xmlSax() { result = xml().getMember("sax") } + + private class XMLSaxParser extends DataFlow::CallCfgNode, XMLParser::Range { + DataFlow::CallCfgNode attrCall; + + XMLSaxParser() { + this = xmlSax().getMember("make_parser").getACall() and + attrCall.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = this + } + + override DataFlow::Node getAnInput() { + attrCall.getFunction().(DataFlow::AttrRead).getAttributeName() = "parse" and + result = attrCall.getArg(0) + } + + override predicate mayBeDangerous() { + attrCall.getFunction().(DataFlow::AttrRead).getAttributeName() = "setFeature" and + attrCall.getArg(0) = xmlSax().getMember("handler").getMember("feature_external_ges").getAUse() and + DataFlow::localFlow(DataFlow::exprNode(any(True trueName)), attrCall.getArg(1)) + } + } + + private API::Node lxml() { result = API::moduleImport("lxml") } + + private API::Node lxmlEtree() { result = lxml().getMember("etree") } + + private class LXMLParser extends DataFlow::CallCfgNode, XMLParser::Range { + LXMLParser() { this = lxmlEtree().getMember(["XMLParser", "get_default_parser"]).getACall() } + + override DataFlow::Node getAnInput() { none() } + + override predicate mayBeDangerous() { + not exists(this.getArgByName(_)) or + DataFlow::localFlow(DataFlow::exprNode(any(False falseName)), this.getArgByName("no_network")) or + DataFlow::localFlow(DataFlow::exprNode(any(True trueName)), + this.getArgByName(["huge_tree", "resolve_entities"])) + } + } + + private class LXMLParsing extends DataFlow::CallCfgNode, XMLParsing::Range { + LXMLParsing() { + this = lxmlEtree().getMember(["fromstring", "fromstringlist", "XML"]).getACall() + } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + + override predicate mayBeDangerous() { + exists(XMLParser xmlParser | + xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser + ) + } + } +} From 11f4c1cc8e74ff9403b75a732b7153fe8d8b6457 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Thu, 22 Jul 2021 19:04:35 +0200 Subject: [PATCH 06/79] Format tests --- .../query-tests/Security/CWE-611/general.py | 53 +++++++----- .../Security/CWE-611/xml_sax_make_parser.py | 82 +++++++++++-------- 2 files changed, 79 insertions(+), 56 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py index f548f950fba3..b7e933e6f335 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py @@ -1,8 +1,10 @@ from flask import request, Flask from io import StringIO -import xml.etree, xml.etree.ElementTree +import xml.etree +import xml.etree.ElementTree import lxml.etree -import xml.dom.minidom, xml.dom.pulldom +import xml.dom.minidom +import xml.dom.pulldom import xmltodict ''' @@ -33,38 +35,49 @@ xml.dom.(mini|pull)dom.parse(String) ''' + @app.route("/XMLParser-Empty&xml.etree.ElementTree.fromstring") def test1(): - xml_content = request.args['xml_content'] # ]>&xxe; + # ]>&xxe; + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser() + # 'root...' + return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text - parser = lxml.etree.XMLParser() - return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text # 'root...' -@app.route("/XMLParser-Empty&xml.etree.ElementTree.parse")#! +@app.route("/XMLParser-Empty&xml.etree.ElementTree.parse") # ! def test1(): - xml_content = request.args['xml_content'] # ]>&xxe; + # ]>&xxe; + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser() + # 'jorgectf' + return xml.etree.ElementTree.parse(StringIO(xml_content), parser=parser).getroot().text - parser = lxml.etree.XMLParser() - return xml.etree.ElementTree.parse(StringIO(xml_content), parser=parser).getroot().text # 'jorgectf' @app.route("/XMLParser-Empty&lxml.etree.fromstring") def test1(): - xml_content = request.args['xml_content'] # ]>&xxe; + # ]>&xxe; + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser() + return lxml.etree.fromstring(xml_content, parser=parser).text # 'jorgectf' - parser = lxml.etree.XMLParser() - return lxml.etree.fromstring(xml_content, parser=parser).text # 'jorgectf' -@app.route("/XMLParser-Empty&xml.etree.parse")#! +@app.route("/XMLParser-Empty&xml.etree.parse") # ! def test1(): - xml_content = request.args['xml_content'] # ]>&xxe; + # ]>&xxe; + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser() + # 'jorgectf' + return lxml.etree.parse(StringIO(xml_content), parser=parser).getroot().text - parser = lxml.etree.XMLParser() - return lxml.etree.parse(StringIO(xml_content), parser=parser).getroot().text # 'jorgectf' @app.route("/xmltodict-disable_entities_False") def test2(): - xml_content = request.args['xml_content'] # ]>&xxe; - - return xmltodict.parse(xml_content, disable_entities=False) - + # ]>&xxe; + xml_content = request.args['xml_content'] + return xmltodict.parse(xml_content, disable_entities=False) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py index 39c3063a2fe4..151930ddd7dd 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py @@ -3,64 +3,74 @@ # https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges + class MainHandler(xml.sax.ContentHandler): - def __init__(self): - self._result = [] + def __init__(self): + self._result = [] - def characters(self, data): - self._result.append(data) + def characters(self, data): + self._result.append(data) - def parse(self, f): - xml.sax.parse(f, self) - return self._result + def parse(self, f): + xml.sax.parse(f, self) + return self._result # GOOD + + @app.route("/MainHandler") def test1(): - xml_content = request.args['xml_content'] # ]>&xxe; + # ]>&xxe; + xml_content = request.args['xml_content'] + + return MainHandler().parse(StringIO(xml_content)) - return MainHandler().parse(StringIO(xml_content)) @app.route("/xml.sax.make_parser()+MainHandler") def test1(): - xml_content = request.args['xml_content'] # ]>&xxe; + # ]>&xxe; + xml_content = request.args['xml_content'] + + BadHandler = MainHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(BadHandler) + parser.parse(StringIO(xml_content)) + return BadHandler._result - BadHandler = MainHandler() - parser = xml.sax.make_parser() - parser.setContentHandler(BadHandler) - parser.parse(StringIO(xml_content)) - return BadHandler._result @app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_False") def test1(): - xml_content = request.args['xml_content'] # ]>&xxe; + # ]>&xxe; + xml_content = request.args['xml_content'] - BadHandler = MainHandler() - parser = xml.sax.make_parser() - parser.setContentHandler(BadHandler) - parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(xml_content)) - return BadHandler._result + BadHandler = MainHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(BadHandler) + parser.setFeature(xml.sax.handler.feature_external_ges, False) + parser.parse(StringIO(xml_content)) + return BadHandler._result # BAD -@app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_True") -def test1(): - xml_content = request.args['xml_content'] # ]>&xxe; - GoodHandler = MainHandler() - parser = xml.sax.make_parser() - parser.setContentHandler(GoodHandler) - parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(xml_content)) - return GoodHandler._result -@app.route("/xml.sax.make_parser()+xml.dom.minidom.parse-xml.sax.handler.feature_external_ges_True") +@app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_True") def test1(): - xml_content = request.args['xml_content'] # ]>&xxe; + # ]>&xxe; + xml_content = request.args['xml_content'] - parser = xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) - return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes + GoodHandler = MainHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(GoodHandler) + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(xml_content)) + return GoodHandler._result +@app.route("/xml.sax.make_parser()+xml.dom.minidom.parse-xml.sax.handler.feature_external_ges_True") +def test1(): + # ]>&xxe; + xml_content = request.args['xml_content'] + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes From b5e10b6c426e5516a304d23e7dc8fe48754080a7 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Thu, 22 Jul 2021 19:15:30 +0200 Subject: [PATCH 07/79] Write `(String|Bytes)IO` additional taint step --- .../ql/src/experimental/semmle/python/security/XXE.qll | 9 +++++++++ .../experimental/query-tests/Security/CWE-611/general.py | 4 ++-- .../query-tests/Security/CWE-611/xml_sax_make_parser.py | 4 ++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/security/XXE.qll b/python/ql/src/experimental/semmle/python/security/XXE.qll index 2b5bab29b8e4..2d4a1aca7f16 100644 --- a/python/ql/src/experimental/semmle/python/security/XXE.qll +++ b/python/ql/src/experimental/semmle/python/security/XXE.qll @@ -4,6 +4,7 @@ import semmle.python.dataflow.new.DataFlow import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.RemoteFlowSources import semmle.python.dataflow.new.BarrierGuards +import semmle.python.ApiGraphs /** * A taint-tracking configuration for detecting XML External entities abuse. @@ -23,4 +24,12 @@ class XXEFlowConfig extends TaintTracking::Configuration { override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { guard instanceof StringConstCompare } + + override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeIn) { + exists(DataFlow::CallCfgNode ioCalls | + ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and + nodeFrom = ioCalls and + nodeIn = ioCalls.getArg(0) + ) + } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py index b7e933e6f335..8e38780ab5e7 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py @@ -12,8 +12,6 @@ Extend tests Model xmltodict and xml.dom -Write StringIO/BytesIO additional tain steps - XML Parsers: xml.etree.ElementTree.XMLParser() - no options, vuln by default @@ -35,6 +33,8 @@ xml.dom.(mini|pull)dom.parse(String) ''' +app = Flask(__name__) + @app.route("/XMLParser-Empty&xml.etree.ElementTree.fromstring") def test1(): diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py index 151930ddd7dd..04a01eda4489 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py @@ -1,6 +1,10 @@ +from flask import request, Flask from io import StringIO import xml.sax + +app = Flask(__name__) + # https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges From 068150b1ab28d3b9dd393f2a85a2cc4985e068a0 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Thu, 22 Jul 2021 19:34:23 +0200 Subject: [PATCH 08/79] Finish modeling --- .../semmle/python/frameworks/XML.qll | 29 ++++++++++++++++++- .../query-tests/Security/CWE-611/general.py | 29 ++----------------- 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/XML.qll b/python/ql/src/experimental/semmle/python/frameworks/XML.qll index 1007fe352792..a4f949da1997 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/XML.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/XML.qll @@ -76,7 +76,7 @@ private module XML { private class LXMLParsing extends DataFlow::CallCfgNode, XMLParsing::Range { LXMLParsing() { - this = lxmlEtree().getMember(["fromstring", "fromstringlist", "XML"]).getACall() + this = lxmlEtree().getMember(["fromstring", "fromstringlist", "XML", "parse"]).getACall() } override DataFlow::Node getAnInput() { result = this.getArg(0) } @@ -87,4 +87,31 @@ private module XML { ) } } + + private API::Node xmltodict() { result = API::moduleImport("xmltodict") } + + private class XMLtoDictParsing extends DataFlow::CallCfgNode, XMLParsing::Range { + XMLtoDictParsing() { this = xmltodict().getMember("parse").getACall() } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + + override predicate mayBeDangerous() { + DataFlow::localFlow(DataFlow::exprNode(any(False falseName)), + this.getArgByName("disable_entities")) + } + } + + private API::Node xmlDom() { result = xml().getMember("dom").getMember(["mini", "pull"] + "dom") } + + private class XMLDomParsing extends DataFlow::CallCfgNode, XMLParsing::Range { + XMLDomParsing() { this = xmlDom().getMember("parse").getACall() } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + + override predicate mayBeDangerous() { + exists(XMLParser xmlParser | + xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser + ) + } + } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py index 8e38780ab5e7..0b07397a8c40 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py @@ -7,31 +7,6 @@ import xml.dom.pulldom import xmltodict -''' -TO-DO - -Extend tests -Model xmltodict and xml.dom - -XML Parsers: - xml.etree.ElementTree.XMLParser() - no options, vuln by default - lxml.etree.XMLParser() - no_network=True huge_tree=False resolve_entities=True - lxml.etree.get_default_parser() - no options, default above options - xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) - -XML Parsing: - string: - xml.etree.ElementTree.fromstring(list) - xml.etree.ElementTree.XML - lxml.etree.fromstring(list) - lxml.etree.XML - xmltodict.parse - - file StringIO(), BytesIO(b): - xml.etree.ElementTree.parse - lxml.etree.parse - xml.dom.(mini|pull)dom.parse(String) -''' app = Flask(__name__) @@ -46,7 +21,7 @@ def test1(): return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text -@app.route("/XMLParser-Empty&xml.etree.ElementTree.parse") # ! +@app.route("/XMLParser-Empty&xml.etree.ElementTree.parse") def test1(): # ]>&xxe; xml_content = request.args['xml_content'] @@ -65,7 +40,7 @@ def test1(): return lxml.etree.fromstring(xml_content, parser=parser).text # 'jorgectf' -@app.route("/XMLParser-Empty&xml.etree.parse") # ! +@app.route("/XMLParser-Empty&xml.etree.parse") def test1(): # ]>&xxe; xml_content = request.args['xml_content'] From 0d2646fd3dade349586b362fb610830867249322 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Sat, 24 Jul 2021 01:23:51 +0200 Subject: [PATCH 09/79] Polish documentation --- .../src/experimental/Security/CWE-611/XXE.py | 13 +++++ .../src/experimental/Security/CWE-611/XXE.ql | 2 + .../experimental/Security/CWE-611/XXE.qlref | 47 +++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 python/ql/src/experimental/Security/CWE-611/XXE.py create mode 100644 python/ql/src/experimental/Security/CWE-611/XXE.qlref diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.py b/python/ql/src/experimental/Security/CWE-611/XXE.py new file mode 100644 index 000000000000..7b42c8990983 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/XXE.py @@ -0,0 +1,13 @@ +from flask import request, Flask +import lxml.etree +import xml.etree.ElementTree + + +@app.route("/example") +def example(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser() + parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser) + + return parsed_xml.text diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.ql b/python/ql/src/experimental/Security/CWE-611/XXE.ql index 61ff4f8310bc..78866def1f58 100644 --- a/python/ql/src/experimental/Security/CWE-611/XXE.ql +++ b/python/ql/src/experimental/Security/CWE-611/XXE.ql @@ -6,6 +6,8 @@ * @id py/xxe * @tags security * external/cwe/cwe-611 + * external/cwe/cwe-776 + * external/cwe/cwe-827 */ // determine precision above diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.qlref b/python/ql/src/experimental/Security/CWE-611/XXE.qlref new file mode 100644 index 000000000000..d54b1182ccaf --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/XXE.qlref @@ -0,0 +1,47 @@ + + + + +

+Parsing untrusted XML files with a weakly configured XML parser may lead to an XML External Entity (XXE) attack. +This type of attack uses external entity references to access arbitrary files on a system, carry out denial of +service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band +data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out +in this situation. +

+

+There are many XML parsers for Python, and most of them are vulnerable to XXE because their default settings enable +parsing of external entities. This query currently identifies vulnerable XML parsing from the following parsers: +xml.etree.ElementTree.XMLParser, lxml.etree.XMLParser, lxml.etree.get_default_parser, +xml.sax.make_parser. +

+
+ + +

+The best way to prevent XXE attacks is to disable the parsing of any Document Type Declarations (DTDs) in untrusted data. +If this is not possible you should disable the parsing of external general entities and external parameter entities. +This improves security but the code will still be at risk of denial of service and server side request forgery attacks. +

+
+ + +

+The following example calls xml.etree.ElementTree.fromstring using a parser (lxml.etree.XMLParser) +that is not safely configured on untrusted data, and is therefore inherently unsafe. +

+ +
+ + +
  • Python XML Parsing.
  • +
  • OWASP vulnerability description: XML External Entity (XXE) Processing.
  • +
  • OWASP guidance on parsing xml files: XXE Prevention Cheat Sheet.
  • +
  • Paper by Timothy Morgen: XML Schema, DTD, and Entity Attacks
  • +
  • Out-of-band data retrieval: Timur Yunusov & Alexey Osipov, Black hat EU 2013: XML Out-Of-Band Data Retrieval.
  • +
  • Denial of service attack (Billion laughs): Billion Laughs.
  • +
    + +
    \ No newline at end of file From 61e873d725e41b942d1121a64786d30d5f228ca0 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Sat, 24 Jul 2021 02:09:23 +0200 Subject: [PATCH 10/79] Polish tests --- .../query-tests/Security/CWE-611/general.py | 59 ++++++++++++------- .../Security/CWE-611/xml_sax_make_parser.py | 9 +-- 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py index 0b07397a8c40..2833e595b76b 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py @@ -10,49 +10,64 @@ app = Flask(__name__) +# xml_content = ']>&xxe;' -@app.route("/XMLParser-Empty&xml.etree.ElementTree.fromstring") -def test1(): - # ]>&xxe; + +@app.route("/lxml.etree.fromstring") +def lxml_fromstring(): xml_content = request.args['xml_content'] - parser = lxml.etree.XMLParser() - # 'root...' - return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content).text + + +@app.route("/lxml.etree.XML") +def lxml_XML(): + xml_content = request.args['xml_content'] + + return lxml.etree.XML(xml_content).text + + +@app.route("/lxml.etree.parse") +def lxml_parse(): + xml_content = request.args['xml_content'] + + return lxml.etree.parse(StringIO(xml_content)).text -@app.route("/XMLParser-Empty&xml.etree.ElementTree.parse") +@app.route("/xmltodict.parse") +def xmltodict_parse(): + xml_content = request.args['xml_content'] + + return xmltodict.parse(xml_content, disable_entities=False) + + +@app.route("/lxml.etree.XMLParser+lxml.etree.fromstring") def test1(): - # ]>&xxe; xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser() - # 'jorgectf' - return xml.etree.ElementTree.parse(StringIO(xml_content), parser=parser).getroot().text + return lxml.etree.fromstring(xml_content, parser=parser).text -@app.route("/XMLParser-Empty&lxml.etree.fromstring") +@app.route("/lxml.etree.get_default_parser+lxml.etree.fromstring") def test1(): - # ]>&xxe; xml_content = request.args['xml_content'] - parser = lxml.etree.XMLParser() - return lxml.etree.fromstring(xml_content, parser=parser).text # 'jorgectf' + parser = lxml.etree.get_default_parser() + return lxml.etree.fromstring(xml_content, parser=parser).text -@app.route("/XMLParser-Empty&xml.etree.parse") +@app.route("/lxml.etree.XMLParser+xml.etree.ElementTree.fromstring") def test1(): - # ]>&xxe; xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser() - # 'jorgectf' - return lxml.etree.parse(StringIO(xml_content), parser=parser).getroot().text + return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text -@app.route("/xmltodict-disable_entities_False") -def test2(): - # ]>&xxe; +@app.route("/lxml.etree.XMLParser+xml.etree.ElementTree.parse") +def test1(): xml_content = request.args['xml_content'] - return xmltodict.parse(xml_content, disable_entities=False) + parser = lxml.etree.XMLParser() + return xml.etree.ElementTree.parse(StringIO(xml_content), parser=parser).getroot().text diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py index 04a01eda4489..16cee1a4d2c3 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py @@ -2,11 +2,10 @@ from io import StringIO import xml.sax +# xml_content = ']>&xxe;' app = Flask(__name__) -# https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges - class MainHandler(xml.sax.ContentHandler): def __init__(self): @@ -24,7 +23,6 @@ def parse(self, f): @app.route("/MainHandler") def test1(): - # ]>&xxe; xml_content = request.args['xml_content'] return MainHandler().parse(StringIO(xml_content)) @@ -32,7 +30,6 @@ def test1(): @app.route("/xml.sax.make_parser()+MainHandler") def test1(): - # ]>&xxe; xml_content = request.args['xml_content'] BadHandler = MainHandler() @@ -44,12 +41,12 @@ def test1(): @app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_False") def test1(): - # ]>&xxe; xml_content = request.args['xml_content'] BadHandler = MainHandler() parser = xml.sax.make_parser() parser.setContentHandler(BadHandler) + # https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges parser.setFeature(xml.sax.handler.feature_external_ges, False) parser.parse(StringIO(xml_content)) return BadHandler._result @@ -59,7 +56,6 @@ def test1(): @app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_True") def test1(): - # ]>&xxe; xml_content = request.args['xml_content'] GoodHandler = MainHandler() @@ -72,7 +68,6 @@ def test1(): @app.route("/xml.sax.make_parser()+xml.dom.minidom.parse-xml.sax.handler.feature_external_ges_True") def test1(): - # ]>&xxe; xml_content = request.args['xml_content'] parser = xml.sax.make_parser() From b83b31cc7a9aa8e299f02381437a7ab17cc9d4cf Mon Sep 17 00:00:00 2001 From: jorgectf Date: Sat, 24 Jul 2021 02:33:57 +0200 Subject: [PATCH 11/79] Write qldocs --- .../semmle/python/frameworks/XML.qll | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/python/ql/src/experimental/semmle/python/frameworks/XML.qll b/python/ql/src/experimental/semmle/python/frameworks/XML.qll index a4f949da1997..f44e42c23806 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/XML.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/XML.qll @@ -9,10 +9,13 @@ private import experimental.semmle.python.Concepts private import semmle.python.ApiGraphs private module XML { + /** Gets a reference to the `xml` module. */ private API::Node xml() { result = API::moduleImport("xml") } + /** Gets a reference to `xml.etree.ElementTree`. */ private API::Node xmlEtree() { result = xml().getMember("etree").getMember("ElementTree") } + /** Gets a call to `xml.etree.ElementTree.XMLParser`. */ private class XMLEtreeParser extends DataFlow::CallCfgNode, XMLParser::Range { XMLEtreeParser() { this = xmlEtree().getMember("XMLParser").getACall() } @@ -21,6 +24,20 @@ private module XML { override predicate mayBeDangerous() { any() } } + /** + * Gets a call to `xml.etree.ElementTree.fromstring`, `xml.etree.ElementTree.fromstringlist`, + * `xml.etree.ElementTree.XML` or `xml.etree.ElementTree.parse`. + * + * Given the following example: + * + * ```py + * parser = lxml.etree.XMLParser() + * parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser).text + * ``` + * + * `this` would be `xml.etree.ElementTree.fromstring(xml_content, parser=parser)` + * and `xml_content` would be the result of `getAnInput()`. + */ private class XMLEtreeParsing extends DataFlow::CallCfgNode, XMLParsing::Range { XMLEtreeParsing() { this = xmlEtree().getMember(["fromstring", "fromstringlist", "XML", "parse"]).getACall() @@ -35,8 +52,27 @@ private module XML { } } + /** Gets a reference to `xml.sax`. */ private API::Node xmlSax() { result = xml().getMember("sax") } + /** + * Gets a call to `xml.sax.make_parser` and following calls. + * + * Given the following example: + * + * ```py + * BadHandler = MainHandler() + * parser = xml.sax.make_parser() + * parser.setContentHandler(BadHandler) + * parser.setFeature(xml.sax.handler.feature_external_ges, False) + * parser.parse(StringIO(xml_content)) + * parsed_xml = BadHandler._result + * ``` + * + * `this` would be `xml.sax.make_parser()`, `getAnInput()` would return `StringIO(xml_content)` + * and `mayBeDangerous()` would succeed since `xml.sax.handler.feature_external_ges` is set to + * `False` and so it's vulnerable. + */ private class XMLSaxParser extends DataFlow::CallCfgNode, XMLParser::Range { DataFlow::CallCfgNode attrCall; @@ -57,10 +93,17 @@ private module XML { } } + /** Gets a reference to `lxml`. */ private API::Node lxml() { result = API::moduleImport("lxml") } + /** Gets a reference to `lxml.etree`. */ private API::Node lxmlEtree() { result = lxml().getMember("etree") } + /** + * Gets a call to `lxml.etree.XMLParser` or `lxml.etree.get_default_parser` and `mayBeDangerous()` + * identifies whether the argument `no_network` is set to `False` or the arguments `huge_tree` + * or `resolve_entities` are set to True. + */ private class LXMLParser extends DataFlow::CallCfgNode, XMLParser::Range { LXMLParser() { this = lxmlEtree().getMember(["XMLParser", "get_default_parser"]).getACall() } @@ -74,6 +117,20 @@ private module XML { } } + /** + * Gets a call to `lxml.etree.fromstring`, `xml.etree.fromstringlist`, + * `xml.etree.XML` or `xml.etree.parse`. + * + * Given the following example: + * + * ```py + * parser = lxml.etree.XMLParser() + * parsed_xml = lxml.etree.fromstring(xml_content, parser=parser).text + * ``` + * + * `this` would be `lxml.etree.fromstring(xml_content, parser=parser)` + * and `xml_content` would be the result of `getAnInput()`. + */ private class LXMLParsing extends DataFlow::CallCfgNode, XMLParsing::Range { LXMLParsing() { this = lxmlEtree().getMember(["fromstring", "fromstringlist", "XML", "parse"]).getACall() @@ -88,8 +145,13 @@ private module XML { } } + /** Gets a reference to the `xmltodict` module. */ private API::Node xmltodict() { result = API::moduleImport("xmltodict") } + /** + * Gets a call to `xmltodict.parse` and `mayBeDangerous()` identifies + * whether the argument `disable_entities` is set to `False`. + */ private class XMLtoDictParsing extends DataFlow::CallCfgNode, XMLParsing::Range { XMLtoDictParsing() { this = xmltodict().getMember("parse").getACall() } @@ -101,8 +163,23 @@ private module XML { } } + /** Gets a reference to `xml.dom.minidom` or `xml.dom.pulldom`. */ private API::Node xmlDom() { result = xml().getMember("dom").getMember(["mini", "pull"] + "dom") } + /** + * Gets a call to `xml.dom.minidom.parse` or `xml.dom.pulldom.parse`. + * + * Given the following example: + * + * ```py + * parser = xml.sax.make_parser() + * parser.setFeature(xml.sax.handler.feature_external_ges, True) + * parsed_xml = xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNod + * ``` + * + * `this` would be `xml.dom.minidom.parse(StringIO(xml_content), parser=parser)` + * and `StringIO(xml_content)` would be the result of `getAnInput()`. + */ private class XMLDomParsing extends DataFlow::CallCfgNode, XMLParsing::Range { XMLDomParsing() { this = xmlDom().getMember("parse").getACall() } From 1dd77f167a9554663b16fac66b4d3c1a3bbcf2fd Mon Sep 17 00:00:00 2001 From: jorgectf Date: Sun, 25 Jul 2021 01:51:52 +0200 Subject: [PATCH 12/79] Fix undetected tests --- .../src/experimental/semmle/python/frameworks/XML.qll | 2 ++ .../ql/src/experimental/semmle/python/security/XXE.qll | 8 +++++--- .../query-tests/Security/CWE-611/general.py | 10 +++++----- .../Security/CWE-611/xml_sax_make_parser.py | 10 +++++----- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/XML.qll b/python/ql/src/experimental/semmle/python/frameworks/XML.qll index f44e42c23806..6c1228494d01 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/XML.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/XML.qll @@ -142,6 +142,8 @@ private module XML { exists(XMLParser xmlParser | xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser ) + or + not exists(this.getArgByName("parser")) } } diff --git a/python/ql/src/experimental/semmle/python/security/XXE.qll b/python/ql/src/experimental/semmle/python/security/XXE.qll index 2d4a1aca7f16..7998d4081db8 100644 --- a/python/ql/src/experimental/semmle/python/security/XXE.qll +++ b/python/ql/src/experimental/semmle/python/security/XXE.qll @@ -19,17 +19,19 @@ class XXEFlowConfig extends TaintTracking::Configuration { override predicate isSink(DataFlow::Node sink) { exists(XMLParsing xmlParsing | xmlParsing.mayBeDangerous() and sink = xmlParsing.getAnInput()) + or + exists(XMLParser xmlParser | sink = xmlParser.getAnInput() and xmlParser.mayBeDangerous()) } override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { guard instanceof StringConstCompare } - override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeIn) { + override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { exists(DataFlow::CallCfgNode ioCalls | ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and - nodeFrom = ioCalls and - nodeIn = ioCalls.getArg(0) + nodeFrom = ioCalls.getArg(0) and + nodeTo = ioCalls ) } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py index 2833e595b76b..c9f8cc984bda 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py @@ -1,5 +1,5 @@ from flask import request, Flask -from io import StringIO +from io import StringIO, BytesIO import xml.etree import xml.etree.ElementTree import lxml.etree @@ -42,7 +42,7 @@ def xmltodict_parse(): @app.route("/lxml.etree.XMLParser+lxml.etree.fromstring") -def test1(): +def lxml_XMLParser_fromstring(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser() @@ -50,7 +50,7 @@ def test1(): @app.route("/lxml.etree.get_default_parser+lxml.etree.fromstring") -def test1(): +def lxml_defaultParser_fromstring(): xml_content = request.args['xml_content'] parser = lxml.etree.get_default_parser() @@ -58,7 +58,7 @@ def test1(): @app.route("/lxml.etree.XMLParser+xml.etree.ElementTree.fromstring") -def test1(): +def lxml_XMLParser_xml_fromstring(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser() @@ -66,7 +66,7 @@ def test1(): @app.route("/lxml.etree.XMLParser+xml.etree.ElementTree.parse") -def test1(): +def lxml_XMLParser_xml_parse(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser() diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py index 16cee1a4d2c3..11bb1715a0bf 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py @@ -22,14 +22,14 @@ def parse(self, f): @app.route("/MainHandler") -def test1(): +def mainHandler(): xml_content = request.args['xml_content'] return MainHandler().parse(StringIO(xml_content)) @app.route("/xml.sax.make_parser()+MainHandler") -def test1(): +def xml_makeparser_MainHandler(): xml_content = request.args['xml_content'] BadHandler = MainHandler() @@ -40,7 +40,7 @@ def test1(): @app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_False") -def test1(): +def xml_makeparser_MainHandler_entitiesFalse(): xml_content = request.args['xml_content'] BadHandler = MainHandler() @@ -55,7 +55,7 @@ def test1(): @app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_True") -def test1(): +def xml_makeparser_MainHandler_entitiesTrue(): xml_content = request.args['xml_content'] GoodHandler = MainHandler() @@ -67,7 +67,7 @@ def test1(): @app.route("/xml.sax.make_parser()+xml.dom.minidom.parse-xml.sax.handler.feature_external_ges_True") -def test1(): +def xml_makeparser_minidom_entitiesTrue(): xml_content = request.args['xml_content'] parser = xml.sax.make_parser() From 93c8529fc9214125547529ca4a6fbe7dd27f7340 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Sun, 25 Jul 2021 01:53:21 +0200 Subject: [PATCH 13/79] Add `.expected` --- .../query-tests/Security/CWE-611/XXE.expected | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/XXE.expected diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.expected new file mode 100644 index 000000000000..f177d78d9403 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.expected @@ -0,0 +1,83 @@ +edges +| general.py:18:19:18:25 | ControlFlowNode for request | general.py:18:19:18:30 | ControlFlowNode for Attribute | +| general.py:18:19:18:30 | ControlFlowNode for Attribute | general.py:18:19:18:45 | ControlFlowNode for Subscript | +| general.py:18:19:18:45 | ControlFlowNode for Subscript | general.py:20:34:20:44 | ControlFlowNode for xml_content | +| general.py:25:19:25:25 | ControlFlowNode for request | general.py:25:19:25:30 | ControlFlowNode for Attribute | +| general.py:25:19:25:30 | ControlFlowNode for Attribute | general.py:25:19:25:45 | ControlFlowNode for Subscript | +| general.py:25:19:25:45 | ControlFlowNode for Subscript | general.py:27:27:27:37 | ControlFlowNode for xml_content | +| general.py:32:19:32:25 | ControlFlowNode for request | general.py:32:19:32:30 | ControlFlowNode for Attribute | +| general.py:32:19:32:30 | ControlFlowNode for Attribute | general.py:32:19:32:45 | ControlFlowNode for Subscript | +| general.py:32:19:32:45 | ControlFlowNode for Subscript | general.py:34:29:34:49 | ControlFlowNode for StringIO() | +| general.py:39:19:39:25 | ControlFlowNode for request | general.py:39:19:39:30 | ControlFlowNode for Attribute | +| general.py:39:19:39:30 | ControlFlowNode for Attribute | general.py:39:19:39:45 | ControlFlowNode for Subscript | +| general.py:39:19:39:45 | ControlFlowNode for Subscript | general.py:41:28:41:38 | ControlFlowNode for xml_content | +| general.py:46:19:46:25 | ControlFlowNode for request | general.py:46:19:46:30 | ControlFlowNode for Attribute | +| general.py:46:19:46:30 | ControlFlowNode for Attribute | general.py:46:19:46:45 | ControlFlowNode for Subscript | +| general.py:46:19:46:45 | ControlFlowNode for Subscript | general.py:49:34:49:44 | ControlFlowNode for xml_content | +| general.py:54:19:54:25 | ControlFlowNode for request | general.py:54:19:54:30 | ControlFlowNode for Attribute | +| general.py:54:19:54:30 | ControlFlowNode for Attribute | general.py:54:19:54:45 | ControlFlowNode for Subscript | +| general.py:54:19:54:45 | ControlFlowNode for Subscript | general.py:57:34:57:44 | ControlFlowNode for xml_content | +| general.py:62:19:62:25 | ControlFlowNode for request | general.py:62:19:62:30 | ControlFlowNode for Attribute | +| general.py:62:19:62:30 | ControlFlowNode for Attribute | general.py:62:19:62:45 | ControlFlowNode for Subscript | +| general.py:62:19:62:45 | ControlFlowNode for Subscript | general.py:65:45:65:55 | ControlFlowNode for xml_content | +| general.py:70:19:70:25 | ControlFlowNode for request | general.py:70:19:70:30 | ControlFlowNode for Attribute | +| general.py:70:19:70:30 | ControlFlowNode for Attribute | general.py:70:19:70:45 | ControlFlowNode for Subscript | +| general.py:70:19:70:45 | ControlFlowNode for Subscript | general.py:73:40:73:60 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:59:19:59:25 | ControlFlowNode for request | xml_sax_make_parser.py:59:19:59:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:59:19:59:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:59:19:59:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:59:19:59:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:65:18:65:38 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:71:19:71:25 | ControlFlowNode for request | xml_sax_make_parser.py:71:19:71:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:71:19:71:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:71:19:71:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:71:19:71:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:75:34:75:54 | ControlFlowNode for StringIO() | +nodes +| general.py:18:19:18:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| general.py:18:19:18:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| general.py:18:19:18:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| general.py:20:34:20:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| general.py:25:19:25:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| general.py:25:19:25:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| general.py:25:19:25:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| general.py:27:27:27:37 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| general.py:32:19:32:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| general.py:32:19:32:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| general.py:32:19:32:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| general.py:34:29:34:49 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| general.py:39:19:39:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| general.py:39:19:39:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| general.py:39:19:39:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| general.py:41:28:41:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| general.py:46:19:46:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| general.py:46:19:46:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| general.py:46:19:46:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| general.py:49:34:49:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| general.py:54:19:54:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| general.py:54:19:54:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| general.py:54:19:54:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| general.py:57:34:57:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| general.py:62:19:62:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| general.py:62:19:62:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| general.py:62:19:62:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| general.py:65:45:65:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| general.py:70:19:70:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| general.py:70:19:70:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| general.py:70:19:70:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| general.py:73:40:73:60 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:59:19:59:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:59:19:59:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:59:19:59:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:65:18:65:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:71:19:71:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:71:19:71:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:71:19:71:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:75:34:75:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +#select +| general.py:20:34:20:44 | ControlFlowNode for xml_content | general.py:18:19:18:25 | ControlFlowNode for request | general.py:20:34:20:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:20:34:20:44 | ControlFlowNode for xml_content | This | general.py:18:19:18:25 | ControlFlowNode for request | user-provided value | +| general.py:27:27:27:37 | ControlFlowNode for xml_content | general.py:25:19:25:25 | ControlFlowNode for request | general.py:27:27:27:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:27:27:27:37 | ControlFlowNode for xml_content | This | general.py:25:19:25:25 | ControlFlowNode for request | user-provided value | +| general.py:34:29:34:49 | ControlFlowNode for StringIO() | general.py:32:19:32:25 | ControlFlowNode for request | general.py:34:29:34:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:34:29:34:49 | ControlFlowNode for StringIO() | This | general.py:32:19:32:25 | ControlFlowNode for request | user-provided value | +| general.py:41:28:41:38 | ControlFlowNode for xml_content | general.py:39:19:39:25 | ControlFlowNode for request | general.py:41:28:41:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:41:28:41:38 | ControlFlowNode for xml_content | This | general.py:39:19:39:25 | ControlFlowNode for request | user-provided value | +| general.py:49:34:49:44 | ControlFlowNode for xml_content | general.py:46:19:46:25 | ControlFlowNode for request | general.py:49:34:49:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:49:34:49:44 | ControlFlowNode for xml_content | This | general.py:46:19:46:25 | ControlFlowNode for request | user-provided value | +| general.py:57:34:57:44 | ControlFlowNode for xml_content | general.py:54:19:54:25 | ControlFlowNode for request | general.py:57:34:57:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:57:34:57:44 | ControlFlowNode for xml_content | This | general.py:54:19:54:25 | ControlFlowNode for request | user-provided value | +| general.py:65:45:65:55 | ControlFlowNode for xml_content | general.py:62:19:62:25 | ControlFlowNode for request | general.py:65:45:65:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:65:45:65:55 | ControlFlowNode for xml_content | This | general.py:62:19:62:25 | ControlFlowNode for request | user-provided value | +| general.py:73:40:73:60 | ControlFlowNode for StringIO() | general.py:70:19:70:25 | ControlFlowNode for request | general.py:73:40:73:60 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:73:40:73:60 | ControlFlowNode for StringIO() | This | general.py:70:19:70:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:65:18:65:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:59:19:59:25 | ControlFlowNode for request | xml_sax_make_parser.py:65:18:65:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | xml_sax_make_parser.py:65:18:65:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:59:19:59:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:75:34:75:54 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:71:19:71:25 | ControlFlowNode for request | xml_sax_make_parser.py:75:34:75:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | xml_sax_make_parser.py:75:34:75:54 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:71:19:71:25 | ControlFlowNode for request | user-provided value | From 48bca5beb8a63b24e824c19fccaf5c9ddbc4d977 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Wed, 25 Aug 2021 17:09:47 +0200 Subject: [PATCH 14/79] Fix references' link anchor --- python/ql/src/experimental/Security/CWE-611/XXE.qlref | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.qlref b/python/ql/src/experimental/Security/CWE-611/XXE.qlref index d54b1182ccaf..6e7b39fffb02 100644 --- a/python/ql/src/experimental/Security/CWE-611/XXE.qlref +++ b/python/ql/src/experimental/Security/CWE-611/XXE.qlref @@ -38,7 +38,7 @@ that is not safely configured on untrusted data, and is therefore inherently uns
  • Python XML Parsing.
  • OWASP vulnerability description: XML External Entity (XXE) Processing.
  • -
  • OWASP guidance on parsing xml files: XXE Prevention Cheat Sheet.
  • +
  • OWASP guidance on parsing xml files: XXE Prevention Cheat Sheet.
  • Paper by Timothy Morgen: XML Schema, DTD, and Entity Attacks
  • Out-of-band data retrieval: Timur Yunusov & Alexey Osipov, Black hat EU 2013: XML Out-Of-Band Data Retrieval.
  • Denial of service attack (Billion laughs): Billion Laughs.
  • From 21da603d8188a6cd751e4a17357d572520db4c05 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Tue, 7 Sep 2021 20:13:39 +0200 Subject: [PATCH 15/79] Update `.qlref` --- .../ql/src/experimental/Security/CWE-611/XXE.qlref | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.qlref b/python/ql/src/experimental/Security/CWE-611/XXE.qlref index 6e7b39fffb02..928cdb469233 100644 --- a/python/ql/src/experimental/Security/CWE-611/XXE.qlref +++ b/python/ql/src/experimental/Security/CWE-611/XXE.qlref @@ -12,18 +12,16 @@ data retrieval techniques may allow attackers to steal sensitive data. Denial of in this situation.

    -There are many XML parsers for Python, and most of them are vulnerable to XXE because their default settings enable -parsing of external entities. This query currently identifies vulnerable XML parsing from the following parsers: -xml.etree.ElementTree.XMLParser, lxml.etree.XMLParser, lxml.etree.get_default_parser, -xml.sax.make_parser. +Refer to the following links to check the details regarding how and which libraries are vulnerable: +

  • Python 3.
  • +
  • Python 2.
  • -The best way to prevent XXE attacks is to disable the parsing of any Document Type Declarations (DTDs) in untrusted data. -If this is not possible you should disable the parsing of external general entities and external parameter entities. -This improves security but the code will still be at risk of denial of service and server side request forgery attacks. +Use defusedxml, a Python package aimed +to prevent any potentially malicious operation.

    From 61a81b60e8ff56e2af69271edf0bb662322627bb Mon Sep 17 00:00:00 2001 From: jorgectf Date: Thu, 9 Sep 2021 19:06:58 +0200 Subject: [PATCH 16/79] Extend `.qlref` --- python/ql/src/experimental/Security/CWE-611/XXE.qlref | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.qlref b/python/ql/src/experimental/Security/CWE-611/XXE.qlref index 928cdb469233..d8b0c6a7ed19 100644 --- a/python/ql/src/experimental/Security/CWE-611/XXE.qlref +++ b/python/ql/src/experimental/Security/CWE-611/XXE.qlref @@ -16,6 +16,11 @@ Refer to the following links to check the details regarding how and which librar
  • Python 3.
  • Python 2.
  • +

    +This query currently identifies vulnerable XML parsing from the following parsers: +xml.etree.ElementTree.XMLParser, lxml.etree.XMLParser, lxml.etree.get_default_parser, +xml.sax.make_parser. +

    From 9c286a1b5024bd75304f60448416c04c2ebac720 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 28 Sep 2021 16:57:46 +0200 Subject: [PATCH 17/79] Python: fix name of `.qhelp` file --- .../ql/src/experimental/Security/CWE-611/{XXE.qlref => XXE.qhelp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename python/ql/src/experimental/Security/CWE-611/{XXE.qlref => XXE.qhelp} (100%) diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.qlref b/python/ql/src/experimental/Security/CWE-611/XXE.qhelp similarity index 100% rename from python/ql/src/experimental/Security/CWE-611/XXE.qlref rename to python/ql/src/experimental/Security/CWE-611/XXE.qhelp From e472814ddd53b2f636ab06366352b54e76237bbc Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 28 Sep 2021 17:02:39 +0200 Subject: [PATCH 18/79] Python: Fix XXE qhelp --- python/ql/src/experimental/Security/CWE-611/XXE.qhelp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.qhelp b/python/ql/src/experimental/Security/CWE-611/XXE.qhelp index d8b0c6a7ed19..bc00aa2f756d 100644 --- a/python/ql/src/experimental/Security/CWE-611/XXE.qhelp +++ b/python/ql/src/experimental/Security/CWE-611/XXE.qhelp @@ -13,11 +13,15 @@ in this situation.

    Refer to the following links to check the details regarding how and which libraries are vulnerable: +

    + + +

    -This query currently identifies vulnerable XML parsing from the following parsers: +This query currently identifies vulnerable XML parsing from the following parsers: xml.etree.ElementTree.XMLParser, lxml.etree.XMLParser, lxml.etree.get_default_parser, xml.sax.make_parser.

    @@ -47,4 +51,4 @@ that is not safely configured on untrusted data, and is therefore inherently uns
  • Denial of service attack (Billion laughs): Billion Laughs.
  • - \ No newline at end of file + From 8df3dab1210e11c587b7f7e28eb20f5b68c16973 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 28 Sep 2021 17:04:20 +0200 Subject: [PATCH 19/79] Python: Adjust `.expected` with subpaths --- .../test/experimental/query-tests/Security/CWE-611/XXE.expected | 1 + 1 file changed, 1 insertion(+) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.expected index f177d78d9403..5471b1329e79 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.expected @@ -70,6 +70,7 @@ nodes | xml_sax_make_parser.py:71:19:71:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_sax_make_parser.py:71:19:71:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | xml_sax_make_parser.py:75:34:75:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +subpaths #select | general.py:20:34:20:44 | ControlFlowNode for xml_content | general.py:18:19:18:25 | ControlFlowNode for request | general.py:20:34:20:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:20:34:20:44 | ControlFlowNode for xml_content | This | general.py:18:19:18:25 | ControlFlowNode for request | user-provided value | | general.py:27:27:27:37 | ControlFlowNode for xml_content | general.py:25:19:25:25 | ControlFlowNode for request | general.py:27:27:27:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:27:27:27:37 | ControlFlowNode for xml_content | This | general.py:25:19:25:25 | ControlFlowNode for request | user-provided value | From 15dfc6d1da9d8af2ed9a6f519744fab11a68367e Mon Sep 17 00:00:00 2001 From: jorgectf Date: Sat, 16 Oct 2021 09:50:58 +0200 Subject: [PATCH 20/79] Fix `xml_sax_parser.py` good/bad naming --- .../Security/CWE-611/xml_sax_make_parser.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py index 11bb1715a0bf..9a7bc0050f7e 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py @@ -32,24 +32,24 @@ def mainHandler(): def xml_makeparser_MainHandler(): xml_content = request.args['xml_content'] - BadHandler = MainHandler() + GoodHandler = MainHandler() parser = xml.sax.make_parser() - parser.setContentHandler(BadHandler) + parser.setContentHandler(GoodHandler) parser.parse(StringIO(xml_content)) - return BadHandler._result + return GoodHandler._result @app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_False") def xml_makeparser_MainHandler_entitiesFalse(): xml_content = request.args['xml_content'] - BadHandler = MainHandler() + GoodHandler = MainHandler() parser = xml.sax.make_parser() - parser.setContentHandler(BadHandler) + parser.setContentHandler(GoodHandler) # https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges parser.setFeature(xml.sax.handler.feature_external_ges, False) parser.parse(StringIO(xml_content)) - return BadHandler._result + return GoodHandler._result # BAD @@ -58,12 +58,12 @@ def xml_makeparser_MainHandler_entitiesFalse(): def xml_makeparser_MainHandler_entitiesTrue(): xml_content = request.args['xml_content'] - GoodHandler = MainHandler() + BadHandler = MainHandler() parser = xml.sax.make_parser() - parser.setContentHandler(GoodHandler) + parser.setContentHandler(BadHandler) parser.setFeature(xml.sax.handler.feature_external_ges, True) parser.parse(StringIO(xml_content)) - return GoodHandler._result + return BadHandler._result @app.route("/xml.sax.make_parser()+xml.dom.minidom.parse-xml.sax.handler.feature_external_ges_True") From 5b66a15de35fbaf17b757f20b86ad4f6acbb9cb9 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Sat, 16 Oct 2021 09:57:28 +0200 Subject: [PATCH 21/79] Extend `mayBeDangerous()` QLDoc --- .../ql/src/experimental/semmle/python/Concepts.qll | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index c5789888c9d6..82bf7bde537e 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -97,6 +97,9 @@ module XMLParsing { /** * Holds if the parser may be parsing the input dangerously. + * + * Specifically, this predicate holds whether the XML parsing parses/extends external + * entities in the parsed XML stream. */ abstract predicate mayBeDangerous(); } @@ -115,6 +118,9 @@ class XMLParsing extends DataFlow::Node { /** * Gets the argument containing the content to parse. + * + * Specifically, this predicate holds whether the XML parsing parses/extends external + * entities in the parsed XML stream. */ DataFlow::Node getAnInput() { result = range.getAnInput() } @@ -140,6 +146,9 @@ module XMLParser { /** * Holds if the parser may be dangerously configured. + * + * Specifically, this predicate holds whether the XML parser parses/extends external + * entities in the parsed XML stream. */ abstract predicate mayBeDangerous(); } @@ -163,6 +172,9 @@ class XMLParser extends DataFlow::Node { /** * Holds if the parser may be dangerously configured. + * + * Specifically, this predicate holds whether the XML parser parses/extends external + * entities in the parsed XML stream. */ predicate mayBeDangerous() { range.mayBeDangerous() } } From 320a00be31999c40203698779f46c052cb80f653 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Sat, 16 Oct 2021 10:02:43 +0200 Subject: [PATCH 22/79] Delete simple `API::Node`s --- .../semmle/python/frameworks/XML.qll | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/XML.qll b/python/ql/src/experimental/semmle/python/frameworks/XML.qll index 6c1228494d01..be3f082d59ec 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/XML.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/XML.qll @@ -9,11 +9,10 @@ private import experimental.semmle.python.Concepts private import semmle.python.ApiGraphs private module XML { - /** Gets a reference to the `xml` module. */ - private API::Node xml() { result = API::moduleImport("xml") } - /** Gets a reference to `xml.etree.ElementTree`. */ - private API::Node xmlEtree() { result = xml().getMember("etree").getMember("ElementTree") } + private API::Node xmlEtree() { + result = API::moduleImport("xml").getMember("etree").getMember("ElementTree") + } /** Gets a call to `xml.etree.ElementTree.XMLParser`. */ private class XMLEtreeParser extends DataFlow::CallCfgNode, XMLParser::Range { @@ -53,7 +52,7 @@ private module XML { } /** Gets a reference to `xml.sax`. */ - private API::Node xmlSax() { result = xml().getMember("sax") } + private API::Node xmlSax() { result = API::moduleImport("xml").getMember("sax") } /** * Gets a call to `xml.sax.make_parser` and following calls. @@ -93,11 +92,8 @@ private module XML { } } - /** Gets a reference to `lxml`. */ - private API::Node lxml() { result = API::moduleImport("lxml") } - /** Gets a reference to `lxml.etree`. */ - private API::Node lxmlEtree() { result = lxml().getMember("etree") } + private API::Node lxmlEtree() { result = API::moduleImport("lxml").getMember("etree") } /** * Gets a call to `lxml.etree.XMLParser` or `lxml.etree.get_default_parser` and `mayBeDangerous()` @@ -166,7 +162,9 @@ private module XML { } /** Gets a reference to `xml.dom.minidom` or `xml.dom.pulldom`. */ - private API::Node xmlDom() { result = xml().getMember("dom").getMember(["mini", "pull"] + "dom") } + private API::Node xmlDom() { + result = API::moduleImport("xml").getMember("dom").getMember(["mini", "pull"] + "dom") + } /** * Gets a call to `xml.dom.minidom.parse` or `xml.dom.pulldom.parse`. From be424704a64f2a186b049e3b6856d9108f39b4bf Mon Sep 17 00:00:00 2001 From: Jorge <46056498+jorgectf@users.noreply.github.com> Date: Sat, 16 Oct 2021 10:04:50 +0200 Subject: [PATCH 23/79] Apply suggestions from code review Co-authored-by: Rasmus Wriedt Larsen --- python/ql/src/experimental/semmle/python/frameworks/XML.qll | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/XML.qll b/python/ql/src/experimental/semmle/python/frameworks/XML.qll index be3f082d59ec..081f085801ec 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/XML.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/XML.qll @@ -69,8 +69,9 @@ private module XML { * ``` * * `this` would be `xml.sax.make_parser()`, `getAnInput()` would return `StringIO(xml_content)` - * and `mayBeDangerous()` would succeed since `xml.sax.handler.feature_external_ges` is set to - * `False` and so it's vulnerable. + * and `mayBeDangerous()` would not hold since `xml.sax.handler.feature_external_ges` is set to + * `False` and so is not vulnerable. + * see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges */ private class XMLSaxParser extends DataFlow::CallCfgNode, XMLParser::Range { DataFlow::CallCfgNode attrCall; From c2046f177782b7403031dbf8238617077500f93a Mon Sep 17 00:00:00 2001 From: jorgectf Date: Sat, 16 Oct 2021 10:07:11 +0200 Subject: [PATCH 24/79] Improve readability for `xmlDom()` --- python/ql/src/experimental/semmle/python/frameworks/XML.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/XML.qll b/python/ql/src/experimental/semmle/python/frameworks/XML.qll index be3f082d59ec..6440a61bbe0f 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/XML.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/XML.qll @@ -163,7 +163,7 @@ private module XML { /** Gets a reference to `xml.dom.minidom` or `xml.dom.pulldom`. */ private API::Node xmlDom() { - result = API::moduleImport("xml").getMember("dom").getMember(["mini", "pull"] + "dom") + result = API::moduleImport("xml").getMember("dom").getMember(["minidom", "pulldom"]) } /** From 066b40098c899b41565b4d2bec7159036c47720d Mon Sep 17 00:00:00 2001 From: jorgectf Date: Thu, 28 Oct 2021 19:34:15 +0200 Subject: [PATCH 25/79] Add `lxml.etree.XMLParser` missing `resolve_entities` dangerous case --- python/ql/src/experimental/semmle/python/frameworks/XML.qll | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/XML.qll b/python/ql/src/experimental/semmle/python/frameworks/XML.qll index 1116d83e7f9d..067c101a6af9 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/XML.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/XML.qll @@ -99,7 +99,8 @@ private module XML { /** * Gets a call to `lxml.etree.XMLParser` or `lxml.etree.get_default_parser` and `mayBeDangerous()` * identifies whether the argument `no_network` is set to `False` or the arguments `huge_tree` - * or `resolve_entities` are set to True. + * or `resolve_entities` are set to True. Since `resolve_entities` default value is `True`, + * the predicate will also succeed if the argument is not set. */ private class LXMLParser extends DataFlow::CallCfgNode, XMLParser::Range { LXMLParser() { this = lxmlEtree().getMember(["XMLParser", "get_default_parser"]).getACall() } @@ -110,7 +111,8 @@ private module XML { not exists(this.getArgByName(_)) or DataFlow::localFlow(DataFlow::exprNode(any(False falseName)), this.getArgByName("no_network")) or DataFlow::localFlow(DataFlow::exprNode(any(True trueName)), - this.getArgByName(["huge_tree", "resolve_entities"])) + this.getArgByName(["huge_tree", "resolve_entities"])) or + not exists(this.getArgByName("resolve_entities")) } } From 637901d980968c3e244587a954e981248cd7643f Mon Sep 17 00:00:00 2001 From: jorgectf Date: Tue, 16 Nov 2021 13:25:29 +0100 Subject: [PATCH 26/79] Make concepts instances of their ranges --- .../experimental/semmle/python/Concepts.qll | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 7bb988f10594..c655f5efb9a9 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -74,23 +74,19 @@ module XMLParsing { * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParsing` instead. */ -class XMLParsing extends DataFlow::Node { - XMLParsing::Range range; - - XMLParsing() { this = range } - +class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range { /** * Gets the argument containing the content to parse. * * Specifically, this predicate holds whether the XML parsing parses/extends external * entities in the parsed XML stream. */ - DataFlow::Node getAnInput() { result = range.getAnInput() } + DataFlow::Node getAnInput() { result = super.getAnInput() } /** * Holds if the parser may be parsing the input dangerously. */ - predicate mayBeDangerous() { range.mayBeDangerous() } + predicate mayBeDangerous() { super.mayBeDangerous() } } /** Provides classes for modeling XML parsers. */ @@ -123,15 +119,11 @@ module XMLParser { * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParser` instead. */ -class XMLParser extends DataFlow::Node { - XMLParser::Range range; - - XMLParser() { this = range } - +class XMLParser extends DataFlow::Node instanceof XMLParser::Range { /** * Gets the argument containing the content to parse. */ - DataFlow::Node getAnInput() { result = range.getAnInput() } + DataFlow::Node getAnInput() { result = super.getAnInput() } /** * Holds if the parser may be dangerously configured. @@ -139,7 +131,7 @@ class XMLParser extends DataFlow::Node { * Specifically, this predicate holds whether the XML parser parses/extends external * entities in the parsed XML stream. */ - predicate mayBeDangerous() { range.mayBeDangerous() } + predicate mayBeDangerous() { super.mayBeDangerous() } } /** Provides classes for modeling LDAP query execution-related APIs. */ From cb8e54e38ef2cebcd0104c144c942930f0d780ac Mon Sep 17 00:00:00 2001 From: jorgectf Date: Tue, 16 Nov 2021 13:27:24 +0100 Subject: [PATCH 27/79] Delete redundant `LXMLParser` dangerous check --- python/ql/src/experimental/semmle/python/frameworks/XML.qll | 1 - 1 file changed, 1 deletion(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/XML.qll b/python/ql/src/experimental/semmle/python/frameworks/XML.qll index 067c101a6af9..8fce0b0172c4 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/XML.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/XML.qll @@ -108,7 +108,6 @@ private module XML { override DataFlow::Node getAnInput() { none() } override predicate mayBeDangerous() { - not exists(this.getArgByName(_)) or DataFlow::localFlow(DataFlow::exprNode(any(False falseName)), this.getArgByName("no_network")) or DataFlow::localFlow(DataFlow::exprNode(any(True trueName)), this.getArgByName(["huge_tree", "resolve_entities"])) or From 9ab6d217578877344a2c2d516bae174318231772 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Fri, 14 Jan 2022 22:56:51 +0100 Subject: [PATCH 28/79] Add forward type tracking test --- .../Security/CWE-611/xml_sax_make_parser.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py index 9a7bc0050f7e..b48dee89abe2 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py @@ -73,3 +73,13 @@ def xml_makeparser_minidom_entitiesTrue(): parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes + +# Forward Type Tracker test + +def contrived_example(user_input, action): + parser = xml.sax.make_parser() + if action == 'load-config': + parser.setFeature(xml.sax.handler.feature_external_ges, False) + parser.parse("/not-user-controlled/default_config.xml") + else: + parser.parse(StringIO(user_input)) \ No newline at end of file From 8f9cd168067a4c4d56d12d20a9e926bb05142f19 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Tue, 8 Feb 2022 17:23:18 +0100 Subject: [PATCH 29/79] Update --- .../src/experimental/Security/CWE-611/XXE.py | 13 - .../src/experimental/Security/CWE-611/XXE.ql | 22 -- .../src/experimental/Security/CWE-611/XXE.xml | 4 + .../Security/CWE-611/XmlInjection.py | 25 ++ .../CWE-611/{XXE.qhelp => XmlInjection.qhelp} | 28 +- .../Security/CWE-611/XmlInjection.ql | 22 ++ .../experimental/semmle/python/Concepts.qll | 108 +++---- .../experimental/semmle/python/Frameworks.qll | 2 +- .../semmle/python/frameworks/XML.qll | 196 ------------ .../semmle/python/frameworks/Xml.qll | 301 ++++++++++++++++++ .../semmle/python/security/XXE.qll | 37 --- .../python/security/dataflow/XmlInjection.qll | 44 +++ .../dataflow/XmlInjectionCustomizations.qll | 95 ++++++ .../query-tests/Security/CWE-611/XXE.qlref | 1 - .../{XXE.expected => XmlInjection.expected} | 0 .../Security/CWE-611/XmlInjection.qlref | 1 + .../query-tests/Security/CWE-611/general.py | 73 ----- .../Security/CWE-611/lxml_etree.py | 76 +++++ .../query-tests/Security/CWE-611/xml_dom.py | 44 +++ .../query-tests/Security/CWE-611/xml_etree.py | 66 ++++ .../Security/CWE-611/xml_sax_make_parser.py | 24 +- .../Security/CWE-611/xml_to_dict.py | 17 + .../Security/CWE-611/xmlrpc_server.py | 10 + 23 files changed, 786 insertions(+), 423 deletions(-) delete mode 100644 python/ql/src/experimental/Security/CWE-611/XXE.py delete mode 100644 python/ql/src/experimental/Security/CWE-611/XXE.ql create mode 100644 python/ql/src/experimental/Security/CWE-611/XXE.xml create mode 100644 python/ql/src/experimental/Security/CWE-611/XmlInjection.py rename python/ql/src/experimental/Security/CWE-611/{XXE.qhelp => XmlInjection.qhelp} (71%) create mode 100644 python/ql/src/experimental/Security/CWE-611/XmlInjection.ql delete mode 100644 python/ql/src/experimental/semmle/python/frameworks/XML.qll create mode 100644 python/ql/src/experimental/semmle/python/frameworks/Xml.qll delete mode 100644 python/ql/src/experimental/semmle/python/security/XXE.qll create mode 100644 python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll create mode 100644 python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/XXE.qlref rename python/ql/test/experimental/query-tests/Security/CWE-611/{XXE.expected => XmlInjection.expected} (100%) create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/general.py create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.py b/python/ql/src/experimental/Security/CWE-611/XXE.py deleted file mode 100644 index 7b42c8990983..000000000000 --- a/python/ql/src/experimental/Security/CWE-611/XXE.py +++ /dev/null @@ -1,13 +0,0 @@ -from flask import request, Flask -import lxml.etree -import xml.etree.ElementTree - - -@app.route("/example") -def example(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser() - parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser) - - return parsed_xml.text diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.ql b/python/ql/src/experimental/Security/CWE-611/XXE.ql deleted file mode 100644 index 78866def1f58..000000000000 --- a/python/ql/src/experimental/Security/CWE-611/XXE.ql +++ /dev/null @@ -1,22 +0,0 @@ -/** - * @name XML External Entity abuse - * @description User input should not be parsed by XML parsers without security options enabled. - * @kind path-problem - * @problem.severity error - * @id py/xxe - * @tags security - * external/cwe/cwe-611 - * external/cwe/cwe-776 - * external/cwe/cwe-827 - */ - -// determine precision above -import python -import experimental.semmle.python.security.XXE -import DataFlow::PathGraph - -from XXEFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink -where config.hasFlowPath(source, sink) -select sink.getNode(), source, sink, - "$@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse", - sink.getNode(), "This", source.getNode(), "user-provided value" diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.xml b/python/ql/src/experimental/Security/CWE-611/XXE.xml new file mode 100644 index 000000000000..ddd196f2f137 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/XXE.xml @@ -0,0 +1,4 @@ + +]> +&xxe; \ No newline at end of file diff --git a/python/ql/src/experimental/Security/CWE-611/XmlInjection.py b/python/ql/src/experimental/Security/CWE-611/XmlInjection.py new file mode 100644 index 000000000000..0e9eec933d7d --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/XmlInjection.py @@ -0,0 +1,25 @@ +from flask import request, Flask +import lxml.etree +import xml.etree.ElementTree + +app = Flask(__name__) + +# BAD +@app.route("/bad") +def bad(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser() + parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser) + + return parsed_xml.text + +# GOOD +@app.route("/good") +def good(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser(resolve_entities=False) + parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser) + + return parsed_xml.text \ No newline at end of file diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.qhelp b/python/ql/src/experimental/Security/CWE-611/XmlInjection.qhelp similarity index 71% rename from python/ql/src/experimental/Security/CWE-611/XXE.qhelp rename to python/ql/src/experimental/Security/CWE-611/XmlInjection.qhelp index bc00aa2f756d..e617835bdef2 100644 --- a/python/ql/src/experimental/Security/CWE-611/XXE.qhelp +++ b/python/ql/src/experimental/Security/CWE-611/XmlInjection.qhelp @@ -5,31 +5,18 @@

    -Parsing untrusted XML files with a weakly configured XML parser may lead to an XML External Entity (XXE) attack. +Parsing untrusted XML files with a weakly configured XML parser may lead to attacks such as XML External Entity (XXE), +Billion Laughs, Quadratic Blowup and DTD retrieval. This type of attack uses external entity references to access arbitrary files on a system, carry out denial of service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out in this situation.

    -

    -Refer to the following links to check the details regarding how and which libraries are vulnerable: -

    - - - -

    -This query currently identifies vulnerable XML parsing from the following parsers: -xml.etree.ElementTree.XMLParser, lxml.etree.XMLParser, lxml.etree.get_default_parser, -xml.sax.make_parser. -

    -Use defusedxml, a Python package aimed +Use defusedxml, a Python package aimed to prevent any potentially malicious operation.

    @@ -39,10 +26,17 @@ to prevent any potentially malicious operation. The following example calls xml.etree.ElementTree.fromstring using a parser (lxml.etree.XMLParser) that is not safely configured on untrusted data, and is therefore inherently unsafe.

    - + +

    +Providing an input (xml_content) like the following XML content against /bad, the request response would contain the contents of +/etc/passwd. +

    + +
  • Python 3 XML Vulnerabilities.
  • +
  • Python 2 XML Vulnerabilities.
  • Python XML Parsing.
  • OWASP vulnerability description: XML External Entity (XXE) Processing.
  • OWASP guidance on parsing xml files: XXE Prevention Cheat Sheet.
  • diff --git a/python/ql/src/experimental/Security/CWE-611/XmlInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlInjection.ql new file mode 100644 index 000000000000..78213f624eaf --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/XmlInjection.ql @@ -0,0 +1,22 @@ +/** + * @name XML injection + * @description User input should not be parsed without security options enabled. + * @kind path-problem + * @problem.severity error + * @id py/xml-injection + * @tags security + * external/cwe/cwe-611 + * external/cwe/cwe-776 + * external/cwe/cwe-827 + */ + +// determine precision above +import python +import experimental.semmle.python.security.dataflow.XmlInjection +import DataFlow::PathGraph + +from DataFlow::PathNode source, DataFlow::PathNode sink, string kind +where XmlInjection::xmlInjectionVulnerable(source, sink, kind) +select sink.getNode(), source, sink, + "$@ XML input is constructed from a $@ and is vulnerable to " + kind + ".", sink.getNode(), + "This", source.getNode(), "user-provided value" diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index c6b6ed6a0d5c..e2dbf0547d8e 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -44,94 +44,84 @@ class LogOutput extends DataFlow::Node { DataFlow::Node getAnInput() { result = range.getAnInput() } } -/** Provides classes for modeling XML parsing APIs. */ -module XMLParsing { +module XML { /** * A data-flow node that collects functions parsing XML. * * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParsing` instead. */ - abstract class Range extends DataFlow::Node { + class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range { /** * Gets the argument containing the content to parse. */ - abstract DataFlow::Node getAnInput(); + DataFlow::Node getAnInput() { result = super.getAnInput() } /** - * Holds if the parser may be parsing the input dangerously. - * - * Specifically, this predicate holds whether the XML parsing parses/extends external - * entities in the parsed XML stream. + * Holds if the parsing method or the parser holding it is vulnerable to `kind`. */ - abstract predicate mayBeDangerous(); + predicate vulnerable(string kind) { super.vulnerable(kind) } } -} -/** - * A data-flow node that collects functions parsing XML. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `XMLParsing` instead. - */ -class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range { - /** - * Gets the argument containing the content to parse. - * - * Specifically, this predicate holds whether the XML parsing parses/extends external - * entities in the parsed XML stream. - */ - DataFlow::Node getAnInput() { result = super.getAnInput() } - - /** - * Holds if the parser may be parsing the input dangerously. - */ - predicate mayBeDangerous() { super.mayBeDangerous() } -} + /** Provides classes for modeling XML parsing APIs. */ + module XMLParsing { + /** + * A data-flow node that collects functions parsing XML. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `XMLParsing` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets the argument containing the content to parse. + */ + abstract DataFlow::Node getAnInput(); + + /** + * Holds if the parsing method or the parser holding it is vulnerable to `kind`. + */ + abstract predicate vulnerable(string kind); + } + } -/** Provides classes for modeling XML parsers. */ -module XMLParser { /** * A data-flow node that collects XML parsers. * * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParser` instead. */ - abstract class Range extends DataFlow::Node { + class XMLParser extends DataFlow::Node instanceof XMLParser::Range { /** * Gets the argument containing the content to parse. */ - abstract DataFlow::Node getAnInput(); + DataFlow::Node getAnInput() { result = super.getAnInput() } /** - * Holds if the parser may be dangerously configured. - * - * Specifically, this predicate holds whether the XML parser parses/extends external - * entities in the parsed XML stream. + * Holds if the parser is vulnerable to `kind`. */ - abstract predicate mayBeDangerous(); + predicate vulnerable(string kind) { super.vulnerable(kind) } } -} - -/** - * A data-flow node that collects XML parsers. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `XMLParser` instead. - */ -class XMLParser extends DataFlow::Node instanceof XMLParser::Range { - /** - * Gets the argument containing the content to parse. - */ - DataFlow::Node getAnInput() { result = super.getAnInput() } - /** - * Holds if the parser may be dangerously configured. - * - * Specifically, this predicate holds whether the XML parser parses/extends external - * entities in the parsed XML stream. - */ - predicate mayBeDangerous() { super.mayBeDangerous() } + /** Provides classes for modeling XML parsers. */ + module XMLParser { + /** + * A data-flow node that collects XML parsers. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `XMLParser` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets the argument containing the content to parse. + */ + abstract DataFlow::Node getAnInput(); + + /** + * Holds if the parser is vulnerable to `kind`. + */ + abstract predicate vulnerable(string kind); + } + } } /** Provides classes for modeling LDAP query execution-related APIs. */ diff --git a/python/ql/src/experimental/semmle/python/Frameworks.qll b/python/ql/src/experimental/semmle/python/Frameworks.qll index a32d1b0d0dea..37620db889ac 100644 --- a/python/ql/src/experimental/semmle/python/Frameworks.qll +++ b/python/ql/src/experimental/semmle/python/Frameworks.qll @@ -3,7 +3,7 @@ */ private import experimental.semmle.python.frameworks.Stdlib -private import experimental.semmle.python.frameworks.XML +private import experimental.semmle.python.frameworks.Xml private import experimental.semmle.python.frameworks.Flask private import experimental.semmle.python.frameworks.Django private import experimental.semmle.python.frameworks.Werkzeug diff --git a/python/ql/src/experimental/semmle/python/frameworks/XML.qll b/python/ql/src/experimental/semmle/python/frameworks/XML.qll deleted file mode 100644 index 8fce0b0172c4..000000000000 --- a/python/ql/src/experimental/semmle/python/frameworks/XML.qll +++ /dev/null @@ -1,196 +0,0 @@ -/** - * Provides class and predicates to track external data that - * may represent malicious XML objects. - */ - -private import python -private import semmle.python.dataflow.new.DataFlow -private import experimental.semmle.python.Concepts -private import semmle.python.ApiGraphs - -private module XML { - /** Gets a reference to `xml.etree.ElementTree`. */ - private API::Node xmlEtree() { - result = API::moduleImport("xml").getMember("etree").getMember("ElementTree") - } - - /** Gets a call to `xml.etree.ElementTree.XMLParser`. */ - private class XMLEtreeParser extends DataFlow::CallCfgNode, XMLParser::Range { - XMLEtreeParser() { this = xmlEtree().getMember("XMLParser").getACall() } - - override DataFlow::Node getAnInput() { none() } - - override predicate mayBeDangerous() { any() } - } - - /** - * Gets a call to `xml.etree.ElementTree.fromstring`, `xml.etree.ElementTree.fromstringlist`, - * `xml.etree.ElementTree.XML` or `xml.etree.ElementTree.parse`. - * - * Given the following example: - * - * ```py - * parser = lxml.etree.XMLParser() - * parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser).text - * ``` - * - * `this` would be `xml.etree.ElementTree.fromstring(xml_content, parser=parser)` - * and `xml_content` would be the result of `getAnInput()`. - */ - private class XMLEtreeParsing extends DataFlow::CallCfgNode, XMLParsing::Range { - XMLEtreeParsing() { - this = xmlEtree().getMember(["fromstring", "fromstringlist", "XML", "parse"]).getACall() - } - - override DataFlow::Node getAnInput() { result = this.getArg(0) } - - override predicate mayBeDangerous() { - exists(XMLParser xmlParser | - xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser - ) - } - } - - /** Gets a reference to `xml.sax`. */ - private API::Node xmlSax() { result = API::moduleImport("xml").getMember("sax") } - - /** - * Gets a call to `xml.sax.make_parser` and following calls. - * - * Given the following example: - * - * ```py - * BadHandler = MainHandler() - * parser = xml.sax.make_parser() - * parser.setContentHandler(BadHandler) - * parser.setFeature(xml.sax.handler.feature_external_ges, False) - * parser.parse(StringIO(xml_content)) - * parsed_xml = BadHandler._result - * ``` - * - * `this` would be `xml.sax.make_parser()`, `getAnInput()` would return `StringIO(xml_content)` - * and `mayBeDangerous()` would not hold since `xml.sax.handler.feature_external_ges` is set to - * `False` and so is not vulnerable. - * see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges - */ - private class XMLSaxParser extends DataFlow::CallCfgNode, XMLParser::Range { - DataFlow::CallCfgNode attrCall; - - XMLSaxParser() { - this = xmlSax().getMember("make_parser").getACall() and - attrCall.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = this - } - - override DataFlow::Node getAnInput() { - attrCall.getFunction().(DataFlow::AttrRead).getAttributeName() = "parse" and - result = attrCall.getArg(0) - } - - override predicate mayBeDangerous() { - attrCall.getFunction().(DataFlow::AttrRead).getAttributeName() = "setFeature" and - attrCall.getArg(0) = xmlSax().getMember("handler").getMember("feature_external_ges").getAUse() and - DataFlow::localFlow(DataFlow::exprNode(any(True trueName)), attrCall.getArg(1)) - } - } - - /** Gets a reference to `lxml.etree`. */ - private API::Node lxmlEtree() { result = API::moduleImport("lxml").getMember("etree") } - - /** - * Gets a call to `lxml.etree.XMLParser` or `lxml.etree.get_default_parser` and `mayBeDangerous()` - * identifies whether the argument `no_network` is set to `False` or the arguments `huge_tree` - * or `resolve_entities` are set to True. Since `resolve_entities` default value is `True`, - * the predicate will also succeed if the argument is not set. - */ - private class LXMLParser extends DataFlow::CallCfgNode, XMLParser::Range { - LXMLParser() { this = lxmlEtree().getMember(["XMLParser", "get_default_parser"]).getACall() } - - override DataFlow::Node getAnInput() { none() } - - override predicate mayBeDangerous() { - DataFlow::localFlow(DataFlow::exprNode(any(False falseName)), this.getArgByName("no_network")) or - DataFlow::localFlow(DataFlow::exprNode(any(True trueName)), - this.getArgByName(["huge_tree", "resolve_entities"])) or - not exists(this.getArgByName("resolve_entities")) - } - } - - /** - * Gets a call to `lxml.etree.fromstring`, `xml.etree.fromstringlist`, - * `xml.etree.XML` or `xml.etree.parse`. - * - * Given the following example: - * - * ```py - * parser = lxml.etree.XMLParser() - * parsed_xml = lxml.etree.fromstring(xml_content, parser=parser).text - * ``` - * - * `this` would be `lxml.etree.fromstring(xml_content, parser=parser)` - * and `xml_content` would be the result of `getAnInput()`. - */ - private class LXMLParsing extends DataFlow::CallCfgNode, XMLParsing::Range { - LXMLParsing() { - this = lxmlEtree().getMember(["fromstring", "fromstringlist", "XML", "parse"]).getACall() - } - - override DataFlow::Node getAnInput() { result = this.getArg(0) } - - override predicate mayBeDangerous() { - exists(XMLParser xmlParser | - xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser - ) - or - not exists(this.getArgByName("parser")) - } - } - - /** Gets a reference to the `xmltodict` module. */ - private API::Node xmltodict() { result = API::moduleImport("xmltodict") } - - /** - * Gets a call to `xmltodict.parse` and `mayBeDangerous()` identifies - * whether the argument `disable_entities` is set to `False`. - */ - private class XMLtoDictParsing extends DataFlow::CallCfgNode, XMLParsing::Range { - XMLtoDictParsing() { this = xmltodict().getMember("parse").getACall() } - - override DataFlow::Node getAnInput() { result = this.getArg(0) } - - override predicate mayBeDangerous() { - DataFlow::localFlow(DataFlow::exprNode(any(False falseName)), - this.getArgByName("disable_entities")) - } - } - - /** Gets a reference to `xml.dom.minidom` or `xml.dom.pulldom`. */ - private API::Node xmlDom() { - result = API::moduleImport("xml").getMember("dom").getMember(["minidom", "pulldom"]) - } - - /** - * Gets a call to `xml.dom.minidom.parse` or `xml.dom.pulldom.parse`. - * - * Given the following example: - * - * ```py - * parser = xml.sax.make_parser() - * parser.setFeature(xml.sax.handler.feature_external_ges, True) - * parsed_xml = xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNod - * ``` - * - * `this` would be `xml.dom.minidom.parse(StringIO(xml_content), parser=parser)` - * and `StringIO(xml_content)` would be the result of `getAnInput()`. - */ - private class XMLDomParsing extends DataFlow::CallCfgNode, XMLParsing::Range { - XMLDomParsing() { this = xmlDom().getMember("parse").getACall() } - - override DataFlow::Node getAnInput() { result = this.getArg(0) } - - override predicate mayBeDangerous() { - exists(XMLParser xmlParser | - xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser - ) - } - } -} diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll new file mode 100644 index 000000000000..d2d1927e9539 --- /dev/null +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -0,0 +1,301 @@ +/** + * Provides class and predicates to track external data that + * may represent malicious XML objects. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import experimental.semmle.python.Concepts +private import semmle.python.ApiGraphs + +private module Xml { + /** + * Gets a call to `xml.etree.ElementTree.XMLParser`. + */ + private class XMLEtreeParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { + XMLEtreeParser() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLParser") + .getACall() + } + + override DataFlow::Node getAnInput() { none() } + + override predicate vulnerable(string kind) { none() } + } + + /** + * Gets a call to: + * * `xml.etree.ElementTree.fromstring` + * * `xml.etree.ElementTree.fromstringlist` + * * `xml.etree.ElementTree.XML` + * * `xml.etree.ElementTree.parse` + * + * Given the following example: + * + * ```py + * parser = lxml.etree.XMLParser() + * xml.etree.ElementTree.fromstring(xml_content, parser=parser).text + * ``` + * + * * `this` would be `xml.etree.ElementTree.fromstring(xml_content, parser=parser)`. + * * `getAnInput()`'s result would be `xml_content`. + * * `vulnerable(kind)`'s `kind` would be `XXE`. + */ + private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLEtreeParsing() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember(["fromstring", "fromstringlist", "XML", "parse"]) + .getACall() + } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + + override predicate vulnerable(string kind) { + exists(XML::XMLParser xmlParser | + xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) + ) + } + } + + /** Gets a reference to a `parser` that has been set a `feature`. */ + private DataFlow::Node trackSaxFeature( + DataFlow::TypeTracker t, DataFlow::CallCfgNode parser, API::Node feature + ) { + t.start() and + exists(DataFlow::MethodCallNode featureCall | + featureCall = parser.getAMethodCall("setFeature") and + featureCall.getArg(0).getALocalSource() = feature.getAUse() and + featureCall.getArg(1).getALocalSource() = DataFlow::exprNode(any(True t_)) and + result = featureCall.getObject() + ) + or + exists(DataFlow::TypeTracker t2 | + t = t2.smallstep(trackSaxFeature(t2, parser, feature), result) + ) + } + + /** Gets a reference to a `parser` that has been set a `feature`. */ + DataFlow::Node trackSaxFeature(DataFlow::CallCfgNode parser, API::Node feature) { + result = trackSaxFeature(DataFlow::TypeTracker::end(), parser, feature) + } + + /** + * Gets a call to `xml.sax.make_parser`. + * + * Given the following example: + * + * ```py + * BadHandler = MainHandler() + * parser = xml.sax.make_parser() + * parser.setContentHandler(BadHandler) + * parser.setFeature(xml.sax.handler.feature_external_ges, False) + * parser.parse(StringIO(xml_content)) + * parsed_xml = BadHandler._result + * ``` + * + * * `this` would be `xml.sax.make_parser()`. + * * `getAnInput()`'s result would be `StringIO(xml_content)`. + * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`. + */ + private class XMLSaxParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { + XMLSaxParser() { + this = API::moduleImport("xml").getMember("sax").getMember("make_parser").getACall() + } + + override DataFlow::Node getAnInput() { result = this.getAMethodCall("parse").getArg(0) } + + override predicate vulnerable(string kind) { + exists(DataFlow::MethodCallNode parse, API::Node handler, API::Node feature | + handler = API::moduleImport("xml").getMember("sax").getMember("handler") and + parse.calls(trackSaxFeature(this, feature), "parse") and + parse.getArg(0) = this.getAnInput() // enough to avoid FPs? + | + kind = ["XXE", "DTD retrieval"] and + feature = handler.getMember("feature_external_ges") + or + kind = ["Billion Laughs", "Quadratic Blowup"] + ) + } + + predicate vulnerable(DataFlow::Node n, string kind) { + exists(API::Node handler, API::Node feature | + handler = API::moduleImport("xml").getMember("sax").getMember("handler") and + DataFlow::exprNode(trackSaxFeature(this, feature).asExpr()) + .(DataFlow::LocalSourceNode) + .flowsTo(n) + | + kind = ["XXE", "DTD retrieval"] and + feature = handler.getMember("feature_external_ges") + ) + } + } + + /** + * Gets a call to: + * * `lxml.etree.XMLParser` + * * `lxml.etree.get_default_parser` + * + * Given the following example: + * + * ```py + * lxml.etree.XMLParser() + * ``` + * + * * `this` would be `lxml.etree.XMLParser(resolve_entities=False)`. + * * `vulnerable(kind)`'s `kind` would be `XXE` + */ + private class LXMLParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { + LXMLParser() { + this = + API::moduleImport("lxml") + .getMember("etree") + .getMember(["XMLParser", "get_default_parser"]) + .getACall() + } + + override DataFlow::Node getAnInput() { none() } + + override predicate vulnerable(string kind) { + kind = "XXE" and not this.getArgByName("resolve_entities").asExpr() = any(False f) + or + kind = ["Billion Laughs", "Quadratic Blowup"] and + ( + this.getArgByName("huge_tree").asExpr() = any(True t) and + not this.getArgByName("resolve_entities").asExpr() = any(False f) + ) + } + } + + /** + * Gets a call to: + * * `lxml.etree.fromstring` + * * `xml.etree.fromstringlist` + * * `xml.etree.XML` + * * `xml.etree.parse` + * + * Given the following example: + * + * ```py + * parser = lxml.etree.XMLParser() + * lxml.etree.fromstring(xml_content, parser=parser).text + * ``` + * + * * `this` would be `lxml.etree.fromstring(xml_content, parser=parser)`. + * * `getAnInput()`'s result would be `xml_content`. + * * `vulnerable(kind)`'s `kind` would be `XXE`. + */ + private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + LXMLParsing() { + this = + API::moduleImport("lxml") + .getMember("etree") + .getMember(["fromstring", "fromstringlist", "XML", "parse"]) + .getACall() + } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + + override predicate vulnerable(string kind) { + exists(XML::XMLParser xmlParser | + xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) + ) + or + kind = "XXE" and not exists(this.getArgByName("parser")) + } + } + + /** + * Gets a call to `xmltodict.parse`. + * + * Given the following example: + * + * ```py + * xmltodict.parse(xml_content, disable_entities=False) + * ``` + * + * * `this` would be `xmltodict.parse(xml_content, disable_entities=False)`. + * * `getAnInput()`'s result would be `xml_content`. + * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`. + */ + private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + + override predicate vulnerable(string kind) { + kind = ["Billion Laughs", "Quadratic Blowup"] and + this.getAMethodCall("disable_entities").asExpr() = any(False f) + } + } + + /** + * Gets a call to: + * * `xml.dom.minidom.parse` + * * `xml.dom.pulldom.parse` + * + * Given the following example: + * + * ```py + * xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNode + * ``` + * + * * `this` would be `xml.dom.minidom.parse(StringIO(xml_content), parser=parser)`. + * * `getAnInput()`'s result would be `StringIO(xml_content)`. + * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`. + */ + private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLDomParsing() { + this = + API::moduleImport("xml") + .getMember("dom") + .getMember(["minidom", "pulldom"]) + .getMember(["parse", "parseString"]) + .getACall() + } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + + override predicate vulnerable(string kind) { + exists(XML::XMLParser xmlParser | + xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) + ) + or + kind = ["Billion Laughs", "Quadratic Blowup"] and not exists(this.getArgByName("parser")) + } + } + + /** + * Gets a call to `xmlrpc.server.SimpleXMLRPCServer`. + * + * Given the following example: + * + * ```py + * server = SimpleXMLRPCServer(("127.0.0.1", 8000)) + * server.register_function(foo, "foo") + * server.serve_forever() + * ``` + * + * * `this` would be `SimpleXMLRPCServer(("127.0.0.1", 8000))`. + * * `getAnInput()`'s result would be `foo`. + * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`. + */ + private class XMLRPCServer extends DataFlow::CallCfgNode, XML::XMLParser::Range { + XMLRPCServer() { + this = + API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() + } + + override DataFlow::Node getAnInput() { + result = this.getAMethodCall("register_function").getArg(0) + } + + override predicate vulnerable(string kind) { kind = ["Billion Laughs", "Quadratic Blowup"] } + } +} diff --git a/python/ql/src/experimental/semmle/python/security/XXE.qll b/python/ql/src/experimental/semmle/python/security/XXE.qll deleted file mode 100644 index 7998d4081db8..000000000000 --- a/python/ql/src/experimental/semmle/python/security/XXE.qll +++ /dev/null @@ -1,37 +0,0 @@ -import python -import experimental.semmle.python.Concepts -import semmle.python.dataflow.new.DataFlow -import semmle.python.dataflow.new.TaintTracking -import semmle.python.dataflow.new.RemoteFlowSources -import semmle.python.dataflow.new.BarrierGuards -import semmle.python.ApiGraphs - -/** - * A taint-tracking configuration for detecting XML External entities abuse. - * - * This configuration uses `RemoteFlowSource` as a source because there's no - * risk at parsing not user-supplied input without security options enabled. - */ -class XXEFlowConfig extends TaintTracking::Configuration { - XXEFlowConfig() { this = "XXEFlowConfig" } - - override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - - override predicate isSink(DataFlow::Node sink) { - exists(XMLParsing xmlParsing | xmlParsing.mayBeDangerous() and sink = xmlParsing.getAnInput()) - or - exists(XMLParser xmlParser | sink = xmlParser.getAnInput() and xmlParser.mayBeDangerous()) - } - - override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { - guard instanceof StringConstCompare - } - - override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - exists(DataFlow::CallCfgNode ioCalls | - ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and - nodeFrom = ioCalls.getArg(0) and - nodeTo = ioCalls - ) - } -} diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll new file mode 100644 index 000000000000..90e2c9bf342c --- /dev/null +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll @@ -0,0 +1,44 @@ +import python +import experimental.semmle.python.Concepts +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TaintTracking +import semmle.python.dataflow.new.RemoteFlowSources +import semmle.python.dataflow.new.BarrierGuards + +module XmlInjection { + import XmlInjectionCustomizations::XmlInjection + + class XMLInjectionConfiguration extends TaintTracking::Configuration { + XMLInjectionConfiguration() { this = "XMLInjectionConfiguration" } + + override predicate isSource(DataFlow::Node source) { + source instanceof RemoteFlowSourceAsSource + } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof SanitizerGuard + } + + override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + ioAdditionalTaintStep(nodeFrom, nodeTo) + } + } + + private import DataFlow::PathGraph + + /** Holds if there is an XML injection from `source` to `sink` */ + predicate xmlInjection(DataFlow::PathNode source, DataFlow::PathNode sink) { + any(XMLInjectionConfiguration xmlInjectionConfig).hasFlowPath(source, sink) + } + + /** Holds if there is an XML injection from `source` to `sink` vulnerable to `kind` */ + predicate xmlInjectionVulnerable(DataFlow::PathNode source, DataFlow::PathNode sink, string kind) { + xmlInjection(source, sink) and + ( + xmlParsingInputAsVulnerableSink(sink.getNode(), kind) or + xmlParserInputAsVulnerableSink(sink.getNode(), kind) + ) + } +} diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll new file mode 100644 index 000000000000..3e9dd22c69c0 --- /dev/null +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll @@ -0,0 +1,95 @@ +/** + * Provides default sources, sinks and sanitizers for detecting + * "ldap injection" + * vulnerabilities, as well as extension points for adding your own. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import experimental.semmle.python.Concepts +private import semmle.python.dataflow.new.RemoteFlowSources +private import semmle.python.dataflow.new.BarrierGuards +private import semmle.python.ApiGraphs + +/** + * Provides default sources, sinks and sanitizers for detecting "xml injection" + * vulnerabilities, as well as extension points for adding your own. + */ +module XmlInjection { + /** + * A data flow source for "xml injection" vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for "xml injection" vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer guard for "xml injection" vulnerabilities. + */ + abstract class SanitizerGuard extends DataFlow::BarrierGuard { } + + /** + * A data flow sink for XML parsing libraries. + * + * See `XML::XMLParsing`. + */ + abstract class XMLParsingSink extends Sink { } + + /** + * A data flow sink for XML parsers. + * + * See `XML::XMLParser` + */ + abstract class XMLParserSink extends Sink { } + + /** + * A source of remote user input, considered as a flow source. + */ + class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + + /** + * An xml parsing operation, considered as a flow sink. + */ + class XMLParsingInputAsSink extends XMLParsingSink { + XMLParsingInputAsSink() { this = any(XML::XMLParsing xmlParsing).getAnInput() } + } + + /** + * An xml parsing operation vulnerable to `kind`. + */ + predicate xmlParsingInputAsVulnerableSink(DataFlow::Node sink, string kind) { + exists(XML::XMLParsing xmlParsing | + sink = xmlParsing.getAnInput() and xmlParsing.vulnerable(kind) + ) + } + + /** + * An xml parser operation, considered as a flow sink. + */ + class XMLParserInputAsSink extends XMLParserSink { + XMLParserInputAsSink() { this = any(XML::XMLParser xmlParser).getAnInput() } + } + + /** + * An xml parser operation vulnerable to `kind`. + */ + predicate xmlParserInputAsVulnerableSink(DataFlow::Node sink, string kind) { + exists(XML::XMLParser xmlParser | sink = xmlParser.getAnInput() and xmlParser.vulnerable(kind)) + } + + /** + * A comparison with a constant string, considered as a sanitizer-guard. + */ + class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { } + + predicate ioAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + exists(DataFlow::CallCfgNode ioCalls | + ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and + nodeFrom = ioCalls.getArg(0) and + nodeTo = ioCalls + ) + } +} diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.qlref deleted file mode 100644 index ada2b1e52024..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.qlref +++ /dev/null @@ -1 +0,0 @@ -experimental/Security/CWE-611/XXE.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.expected similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/XXE.expected rename to python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.expected diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref new file mode 100644 index 000000000000..24d483666acc --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref @@ -0,0 +1 @@ +experimental/Security/CWE-611/XmlInjection.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py deleted file mode 100644 index c9f8cc984bda..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py +++ /dev/null @@ -1,73 +0,0 @@ -from flask import request, Flask -from io import StringIO, BytesIO -import xml.etree -import xml.etree.ElementTree -import lxml.etree -import xml.dom.minidom -import xml.dom.pulldom -import xmltodict - - -app = Flask(__name__) - -# xml_content = ']>&xxe;' - - -@app.route("/lxml.etree.fromstring") -def lxml_fromstring(): - xml_content = request.args['xml_content'] - - return lxml.etree.fromstring(xml_content).text - - -@app.route("/lxml.etree.XML") -def lxml_XML(): - xml_content = request.args['xml_content'] - - return lxml.etree.XML(xml_content).text - - -@app.route("/lxml.etree.parse") -def lxml_parse(): - xml_content = request.args['xml_content'] - - return lxml.etree.parse(StringIO(xml_content)).text - - -@app.route("/xmltodict.parse") -def xmltodict_parse(): - xml_content = request.args['xml_content'] - - return xmltodict.parse(xml_content, disable_entities=False) - - -@app.route("/lxml.etree.XMLParser+lxml.etree.fromstring") -def lxml_XMLParser_fromstring(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser() - return lxml.etree.fromstring(xml_content, parser=parser).text - - -@app.route("/lxml.etree.get_default_parser+lxml.etree.fromstring") -def lxml_defaultParser_fromstring(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.get_default_parser() - return lxml.etree.fromstring(xml_content, parser=parser).text - - -@app.route("/lxml.etree.XMLParser+xml.etree.ElementTree.fromstring") -def lxml_XMLParser_xml_fromstring(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser() - return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text - - -@app.route("/lxml.etree.XMLParser+xml.etree.ElementTree.parse") -def lxml_XMLParser_xml_parse(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser() - return xml.etree.ElementTree.parse(StringIO(xml_content), parser=parser).getroot().text diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py new file mode 100644 index 000000000000..2c3c6f5f2ffc --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py @@ -0,0 +1,76 @@ +from flask import request, Flask +from io import StringIO, BytesIO +import lxml.etree + +app = Flask(__name__) + +# Parsing + +@app.route("/lxml_etree_fromstring") +def lxml_etree_fromstring(): + xml_content = request.args['xml_content'] + + return lxml.etree.fromstring(xml_content).text + +@app.route("/lxml_etree_fromstringlist") +def lxml_etree_fromstringlist(): + xml_content = request.args['xml_content'] + + return lxml.etree.fromstringlist([xml_content]).text + +@app.route("/lxml_etree_XML") +def lxml_etree_XML(): + xml_content = request.args['xml_content'] + + return lxml.etree.XML(xml_content).text + +@app.route("/lxml_etree_parse") +def lxml_etree_parse(): + xml_content = request.args['xml_content'] + + return lxml.etree.parse(StringIO(xml_content)).getroot().text + +# With parsers - Default + +@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser") +def lxml_parser(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser() + return lxml.etree.fromstring(xml_content, parser=parser).text + +@app.route("/lxml_etree_fromstring-lxml.etree.get_default_parser") +def lxml_parser(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.get_default_parser() + return lxml.etree.fromstring(xml_content, parser=parser).text + +# With parsers - With options + +# XXE-safe +@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") +def lxml_parser(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser(resolve_entities=False) + return lxml.etree.fromstring(xml_content, parser=parser).text + +# Billion laughs and quadratic blowup (huge_tree) + +## Good (huge_tree=True but resolve_entities=False) + +@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") +def lxml_parser(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) + return lxml.etree.fromstring(xml_content, parser=parser).text + +## Bad +@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") +def lxml_parser(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser(huge_tree=True) + return lxml.etree.fromstring(xml_content, parser=parser).text diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py new file mode 100644 index 000000000000..428a2d645a1b --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py @@ -0,0 +1,44 @@ +from flask import request, Flask +from io import StringIO, BytesIO +import xml.dom.minidom +import xml.dom.pulldom +import xml.sax + +app = Flask(__name__) + +# Parsing + +@app.route("/xml_minidom_parse") +def xml_minidom_parse(): + xml_content = request.args['xml_content'] + + return xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNodes + +@app.route("/xml_minidom_parseString") +def xml_minidom_parseString(): + xml_content = request.args['xml_content'] + + return xml.dom.minidom.parseString(xml_content).documentElement.childNodes + +@app.route("/xml_pulldom_parse") +def xml_pulldom_parse(): + xml_content = request.args['xml_content'] + + return xml.dom.pulldom.parse(StringIO(xml_content))['START_DOCUMENT'][1].documentElement.childNodes + +@app.route("/xml_pulldom_parseString") +def xml_pulldom_parseString(): + xml_content = request.args['xml_content'] + + return xml.dom.pulldom.parseString(xml_content)['START_DOCUMENT'][1].documentElement.childNodes + +# With parsers + +@app.route("/xml_minidom_parse_xml_sax_make_parser") +def xml_minidom_parse_xml_sax_make_parser(): + xml_content = request.args['xml_content'] + + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes + diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py new file mode 100644 index 000000000000..caa321c09260 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py @@ -0,0 +1,66 @@ +from flask import request, Flask +from io import StringIO, BytesIO +import xml.etree +import xml.etree.ElementTree +import lxml.etree + +app = Flask(__name__) + +# xxe = ']>&xxe;' + +# Parsing + +@app.route("/xml_etree_fromstring") +def xml_etree_fromstring(): + xml_content = request.args['xml_content'] + + return xml.etree.ElementTree.fromstring(xml_content).text + +@app.route("/xml_etree_fromstringlist") +def xml_etree_fromstringlist(): + xml_content = request.args['xml_content'] + + return xml.etree.ElementTree.fromstringlist(xml_content).text + +@app.route("/xml_etree_XML") +def xml_etree_XML(): + xml_content = request.args['xml_content'] + + return xml.etree.ElementTree.XML(xml_content).text + +@app.route("/xml_etree_parse") +def xml_etree_parse(): + xml_content = request.args['xml_content'] + + return xml.etree.ElementTree.parse(StringIO(xml_content)).getroot().text + +# With parsers + +@app.route("/xml_etree_fromstring-xml_etree_XMLParser") +def xml_parser_1(): + xml_content = request.args['xml_content'] + + parser = xml.etree.ElementTree.XMLParser() + return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text + +@app.route("/xml_etree_fromstring-lxml_etree_XMLParser") +def xml_parser_2(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser() + return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text + +@app.route("/xml_etree_fromstring-lxml_get_default_parser") +def xml_parser_3(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.get_default_parser() + return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text + +@app.route("/xml_etree_fromstring-lxml_get_default_parser") +def xml_parser_4(): + xml_content = request.args['xml_content'] + + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text \ No newline at end of file diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py index b48dee89abe2..0d7bbcaee61d 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py @@ -2,7 +2,7 @@ from io import StringIO import xml.sax -# xml_content = ']>&xxe;' +# xxe = ']>&xxe;' app = Flask(__name__) @@ -74,12 +74,28 @@ def xml_makeparser_minidom_entitiesTrue(): parser.setFeature(xml.sax.handler.feature_external_ges, True) return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes -# Forward Type Tracker test +# Forward Type Tracking test + +@app.route("forward_tracking1") +def forward_tracking1(action): + xml_content = request.args['xml_content'] -def contrived_example(user_input, action): parser = xml.sax.make_parser() if action == 'load-config': parser.setFeature(xml.sax.handler.feature_external_ges, False) parser.parse("/not-user-controlled/default_config.xml") else: - parser.parse(StringIO(user_input)) \ No newline at end of file + parser.parse(StringIO(xml_content)) + return + +@app.route("forward_tracking2") +def forward_tracking2(action): + xml_content = request.args['xml_content'] + + parser = xml.sax.make_parser() + if action == 'load-config': + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse("/not-user-controlled/default_config.xml") + else: + parser.parse(StringIO(xml_content)) + return diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py new file mode 100644 index 000000000000..2b91a22e1a22 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py @@ -0,0 +1,17 @@ +from flask import request, Flask +from io import StringIO, BytesIO +import xmltodict + +app = Flask(__name__) + +@app.route("/xmltodict.parse") +def xmltodict_parse(): + xml_content = request.args['xml_content'] + + return xmltodict.parse(xml_content) + +@app.route("/xmltodict.parse2") +def xmltodict_parse2(): + xml_content = request.args['xml_content'] + + return xmltodict.parse(xml_content, disable_entities=False) \ No newline at end of file diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py new file mode 100644 index 000000000000..baa433c4a8ab --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py @@ -0,0 +1,10 @@ +from xmlrpc.server import SimpleXMLRPCServer + +def foo(n): + return n + +server = SimpleXMLRPCServer(("127.0.0.1", 8000)) +server.register_function(foo, "foo") +server.serve_forever() + +# billion_laughs -> curl 127.0.0.1:8000 --data-raw ']>foo&lol9;' From 7c4a6a12b0292b16376b4d2f50c03fc9b9450f2a Mon Sep 17 00:00:00 2001 From: jorgectf Date: Tue, 8 Feb 2022 17:50:39 +0100 Subject: [PATCH 30/79] Test polish --- .../test/experimental/query-tests/Security/CWE-611/xml_etree.py | 2 -- .../query-tests/Security/CWE-611/xml_sax_make_parser.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py index caa321c09260..b9c980045e2a 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py @@ -6,8 +6,6 @@ app = Flask(__name__) -# xxe = ']>&xxe;' - # Parsing @app.route("/xml_etree_fromstring") diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py index 0d7bbcaee61d..9f858d99ddd2 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py @@ -2,8 +2,6 @@ from io import StringIO import xml.sax -# xxe = ']>&xxe;' - app = Flask(__name__) From 01ad25f3f06776bc967bfbf0f5c9a67e19e5cc48 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Tue, 8 Feb 2022 17:51:09 +0100 Subject: [PATCH 31/79] Apply `.getALocalSource()` and fix `xmltodict`'s `vulnerable` predicate --- .../experimental/semmle/python/frameworks/Xml.qll | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index d2d1927e9539..cf4abbac995b 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -163,12 +163,16 @@ private module Xml { override DataFlow::Node getAnInput() { none() } override predicate vulnerable(string kind) { - kind = "XXE" and not this.getArgByName("resolve_entities").asExpr() = any(False f) + kind = "XXE" and + not ( + exists(this.getArgByName("resolve_entities")) or + this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f) + ) or kind = ["Billion Laughs", "Quadratic Blowup"] and ( - this.getArgByName("huge_tree").asExpr() = any(True t) and - not this.getArgByName("resolve_entities").asExpr() = any(False f) + this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and + not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f) ) } } @@ -231,7 +235,7 @@ private module Xml { override predicate vulnerable(string kind) { kind = ["Billion Laughs", "Quadratic Blowup"] and - this.getAMethodCall("disable_entities").asExpr() = any(False f) + this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) } } From b00051e4abfb2fd11a743e360dac4982656dd849 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Tue, 8 Feb 2022 17:52:37 +0100 Subject: [PATCH 32/79] Update `.expected` --- .../Security/CWE-611/XmlInjection.expected | 319 +++++++++++++----- 1 file changed, 239 insertions(+), 80 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.expected index 5471b1329e79..081a8c6e6af8 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.expected @@ -1,84 +1,243 @@ edges -| general.py:18:19:18:25 | ControlFlowNode for request | general.py:18:19:18:30 | ControlFlowNode for Attribute | -| general.py:18:19:18:30 | ControlFlowNode for Attribute | general.py:18:19:18:45 | ControlFlowNode for Subscript | -| general.py:18:19:18:45 | ControlFlowNode for Subscript | general.py:20:34:20:44 | ControlFlowNode for xml_content | -| general.py:25:19:25:25 | ControlFlowNode for request | general.py:25:19:25:30 | ControlFlowNode for Attribute | -| general.py:25:19:25:30 | ControlFlowNode for Attribute | general.py:25:19:25:45 | ControlFlowNode for Subscript | -| general.py:25:19:25:45 | ControlFlowNode for Subscript | general.py:27:27:27:37 | ControlFlowNode for xml_content | -| general.py:32:19:32:25 | ControlFlowNode for request | general.py:32:19:32:30 | ControlFlowNode for Attribute | -| general.py:32:19:32:30 | ControlFlowNode for Attribute | general.py:32:19:32:45 | ControlFlowNode for Subscript | -| general.py:32:19:32:45 | ControlFlowNode for Subscript | general.py:34:29:34:49 | ControlFlowNode for StringIO() | -| general.py:39:19:39:25 | ControlFlowNode for request | general.py:39:19:39:30 | ControlFlowNode for Attribute | -| general.py:39:19:39:30 | ControlFlowNode for Attribute | general.py:39:19:39:45 | ControlFlowNode for Subscript | -| general.py:39:19:39:45 | ControlFlowNode for Subscript | general.py:41:28:41:38 | ControlFlowNode for xml_content | -| general.py:46:19:46:25 | ControlFlowNode for request | general.py:46:19:46:30 | ControlFlowNode for Attribute | -| general.py:46:19:46:30 | ControlFlowNode for Attribute | general.py:46:19:46:45 | ControlFlowNode for Subscript | -| general.py:46:19:46:45 | ControlFlowNode for Subscript | general.py:49:34:49:44 | ControlFlowNode for xml_content | -| general.py:54:19:54:25 | ControlFlowNode for request | general.py:54:19:54:30 | ControlFlowNode for Attribute | -| general.py:54:19:54:30 | ControlFlowNode for Attribute | general.py:54:19:54:45 | ControlFlowNode for Subscript | -| general.py:54:19:54:45 | ControlFlowNode for Subscript | general.py:57:34:57:44 | ControlFlowNode for xml_content | -| general.py:62:19:62:25 | ControlFlowNode for request | general.py:62:19:62:30 | ControlFlowNode for Attribute | -| general.py:62:19:62:30 | ControlFlowNode for Attribute | general.py:62:19:62:45 | ControlFlowNode for Subscript | -| general.py:62:19:62:45 | ControlFlowNode for Subscript | general.py:65:45:65:55 | ControlFlowNode for xml_content | -| general.py:70:19:70:25 | ControlFlowNode for request | general.py:70:19:70:30 | ControlFlowNode for Attribute | -| general.py:70:19:70:30 | ControlFlowNode for Attribute | general.py:70:19:70:45 | ControlFlowNode for Subscript | -| general.py:70:19:70:45 | ControlFlowNode for Subscript | general.py:73:40:73:60 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:59:19:59:25 | ControlFlowNode for request | xml_sax_make_parser.py:59:19:59:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:59:19:59:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:59:19:59:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:59:19:59:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:65:18:65:38 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:71:19:71:25 | ControlFlowNode for request | xml_sax_make_parser.py:71:19:71:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:71:19:71:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:71:19:71:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:71:19:71:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:75:34:75:54 | ControlFlowNode for StringIO() | +| lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:11:19:11:30 | ControlFlowNode for Attribute | +| lxml_etree.py:11:19:11:30 | ControlFlowNode for Attribute | lxml_etree.py:11:19:11:45 | ControlFlowNode for Subscript | +| lxml_etree.py:11:19:11:45 | ControlFlowNode for Subscript | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | +| lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:17:19:17:30 | ControlFlowNode for Attribute | +| lxml_etree.py:17:19:17:30 | ControlFlowNode for Attribute | lxml_etree.py:17:19:17:45 | ControlFlowNode for Subscript | +| lxml_etree.py:17:19:17:45 | ControlFlowNode for Subscript | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | +| lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:23:19:23:30 | ControlFlowNode for Attribute | +| lxml_etree.py:23:19:23:30 | ControlFlowNode for Attribute | lxml_etree.py:23:19:23:45 | ControlFlowNode for Subscript | +| lxml_etree.py:23:19:23:45 | ControlFlowNode for Subscript | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | +| lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:29:19:29:30 | ControlFlowNode for Attribute | +| lxml_etree.py:29:19:29:30 | ControlFlowNode for Attribute | lxml_etree.py:29:19:29:45 | ControlFlowNode for Subscript | +| lxml_etree.py:29:19:29:45 | ControlFlowNode for Subscript | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | +| lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:37:19:37:30 | ControlFlowNode for Attribute | +| lxml_etree.py:37:19:37:30 | ControlFlowNode for Attribute | lxml_etree.py:37:19:37:45 | ControlFlowNode for Subscript | +| lxml_etree.py:37:19:37:45 | ControlFlowNode for Subscript | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | +| lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:44:19:44:30 | ControlFlowNode for Attribute | +| lxml_etree.py:44:19:44:30 | ControlFlowNode for Attribute | lxml_etree.py:44:19:44:45 | ControlFlowNode for Subscript | +| lxml_etree.py:44:19:44:45 | ControlFlowNode for Subscript | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | +| lxml_etree.py:54:19:54:25 | ControlFlowNode for request | lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | +| lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | +| lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content | +| lxml_etree.py:65:19:65:25 | ControlFlowNode for request | lxml_etree.py:65:19:65:30 | ControlFlowNode for Attribute | +| lxml_etree.py:65:19:65:30 | ControlFlowNode for Attribute | lxml_etree.py:65:19:65:45 | ControlFlowNode for Subscript | +| lxml_etree.py:65:19:65:45 | ControlFlowNode for Subscript | lxml_etree.py:68:34:68:44 | ControlFlowNode for xml_content | +| lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | +| lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | +| lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | +| xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | +| xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | +| xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | +| xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:19:19:19:30 | ControlFlowNode for Attribute | +| xml_dom.py:19:19:19:30 | ControlFlowNode for Attribute | xml_dom.py:19:19:19:45 | ControlFlowNode for Subscript | +| xml_dom.py:19:19:19:45 | ControlFlowNode for Subscript | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | +| xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:25:19:25:30 | ControlFlowNode for Attribute | +| xml_dom.py:25:19:25:30 | ControlFlowNode for Attribute | xml_dom.py:25:19:25:45 | ControlFlowNode for Subscript | +| xml_dom.py:25:19:25:45 | ControlFlowNode for Subscript | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | +| xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:31:19:31:30 | ControlFlowNode for Attribute | +| xml_dom.py:31:19:31:30 | ControlFlowNode for Attribute | xml_dom.py:31:19:31:45 | ControlFlowNode for Subscript | +| xml_dom.py:31:19:31:45 | ControlFlowNode for Subscript | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | +| xml_dom.py:39:19:39:25 | ControlFlowNode for request | xml_dom.py:39:19:39:30 | ControlFlowNode for Attribute | +| xml_dom.py:39:19:39:30 | ControlFlowNode for Attribute | xml_dom.py:39:19:39:45 | ControlFlowNode for Subscript | +| xml_dom.py:39:19:39:45 | ControlFlowNode for Subscript | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | +| xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:13:19:13:30 | ControlFlowNode for Attribute | +| xml_etree.py:13:19:13:30 | ControlFlowNode for Attribute | xml_etree.py:13:19:13:45 | ControlFlowNode for Subscript | +| xml_etree.py:13:19:13:45 | ControlFlowNode for Subscript | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | +| xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:19:19:19:30 | ControlFlowNode for Attribute | +| xml_etree.py:19:19:19:30 | ControlFlowNode for Attribute | xml_etree.py:19:19:19:45 | ControlFlowNode for Subscript | +| xml_etree.py:19:19:19:45 | ControlFlowNode for Subscript | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | +| xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:25:19:25:30 | ControlFlowNode for Attribute | +| xml_etree.py:25:19:25:30 | ControlFlowNode for Attribute | xml_etree.py:25:19:25:45 | ControlFlowNode for Subscript | +| xml_etree.py:25:19:25:45 | ControlFlowNode for Subscript | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | +| xml_etree.py:31:19:31:25 | ControlFlowNode for request | xml_etree.py:31:19:31:30 | ControlFlowNode for Attribute | +| xml_etree.py:31:19:31:30 | ControlFlowNode for Attribute | xml_etree.py:31:19:31:45 | ControlFlowNode for Subscript | +| xml_etree.py:31:19:31:45 | ControlFlowNode for Subscript | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | +| xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:39:19:39:30 | ControlFlowNode for Attribute | +| xml_etree.py:39:19:39:30 | ControlFlowNode for Attribute | xml_etree.py:39:19:39:45 | ControlFlowNode for Subscript | +| xml_etree.py:39:19:39:45 | ControlFlowNode for Subscript | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | +| xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:46:19:46:30 | ControlFlowNode for Attribute | +| xml_etree.py:46:19:46:30 | ControlFlowNode for Attribute | xml_etree.py:46:19:46:45 | ControlFlowNode for Subscript | +| xml_etree.py:46:19:46:45 | ControlFlowNode for Subscript | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | +| xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:53:19:53:30 | ControlFlowNode for Attribute | +| xml_etree.py:53:19:53:30 | ControlFlowNode for Attribute | xml_etree.py:53:19:53:45 | ControlFlowNode for Subscript | +| xml_etree.py:53:19:53:45 | ControlFlowNode for Subscript | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | +| xml_etree.py:60:19:60:25 | ControlFlowNode for request | xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | +| xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | +| xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content | +| xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:31:19:31:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:31:19:31:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:31:19:31:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:31:19:31:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:57:19:57:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:57:19:57:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:57:19:57:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:57:19:57:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:69:19:69:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:19:69:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:69:19:69:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:69:19:69:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:69:19:69:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:73:34:73:54 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:79:19:79:25 | ControlFlowNode for request | xml_sax_make_parser.py:79:19:79:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:79:19:79:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:79:19:79:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:79:19:79:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:86:22:86:42 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:19:91:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:91:19:91:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:91:19:91:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:91:19:91:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | +| xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | +| xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | +| xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content | +| xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:15:19:15:30 | ControlFlowNode for Attribute | +| xml_to_dict.py:15:19:15:30 | ControlFlowNode for Attribute | xml_to_dict.py:15:19:15:45 | ControlFlowNode for Subscript | +| xml_to_dict.py:15:19:15:45 | ControlFlowNode for Subscript | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | nodes -| general.py:18:19:18:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| general.py:18:19:18:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| general.py:18:19:18:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| general.py:20:34:20:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| general.py:25:19:25:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| general.py:25:19:25:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| general.py:25:19:25:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| general.py:27:27:27:37 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| general.py:32:19:32:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| general.py:32:19:32:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| general.py:32:19:32:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| general.py:34:29:34:49 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| general.py:39:19:39:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| general.py:39:19:39:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| general.py:39:19:39:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| general.py:41:28:41:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| general.py:46:19:46:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| general.py:46:19:46:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| general.py:46:19:46:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| general.py:49:34:49:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| general.py:54:19:54:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| general.py:54:19:54:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| general.py:54:19:54:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| general.py:57:34:57:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| general.py:62:19:62:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| general.py:62:19:62:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| general.py:62:19:62:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| general.py:65:45:65:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| general.py:70:19:70:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| general.py:70:19:70:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| general.py:70:19:70:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| general.py:73:40:73:60 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:59:19:59:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:59:19:59:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:59:19:59:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:65:18:65:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:71:19:71:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:71:19:71:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:71:19:71:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:75:34:75:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| lxml_etree.py:11:19:11:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:11:19:11:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:11:19:11:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:17:19:17:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:17:19:17:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:17:19:17:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | semmle.label | ControlFlowNode for List | +| lxml_etree.py:23:19:23:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:23:19:23:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:23:19:23:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:29:19:29:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:29:19:29:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:29:19:29:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| lxml_etree.py:37:19:37:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:37:19:37:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:37:19:37:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:44:19:44:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:44:19:44:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:44:19:44:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:54:19:54:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:65:19:65:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:65:19:65:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:65:19:65:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:68:34:68:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:73:19:73:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| xml_dom.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_dom.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_dom.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_dom.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| xml_dom.py:25:19:25:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_dom.py:25:19:25:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_dom.py:25:19:25:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_dom.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_dom.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_dom.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| xml_dom.py:39:19:39:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_dom.py:39:19:39:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_dom.py:39:19:39:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_etree.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_etree.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_etree.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| xml_etree.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_etree.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_etree.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| xml_etree.py:25:19:25:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_etree.py:25:19:25:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_etree.py:25:19:25:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| xml_etree.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_etree.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_etree.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_etree.py:39:19:39:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_etree.py:39:19:39:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_etree.py:39:19:39:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| xml_etree.py:46:19:46:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_etree.py:46:19:46:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_etree.py:46:19:46:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| xml_etree.py:53:19:53:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_etree.py:53:19:53:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_etree.py:53:19:53:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| xml_etree.py:60:19:60:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:57:19:57:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:57:19:57:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:69:19:69:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:69:19:69:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:69:19:69:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:73:34:73:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:79:19:79:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:79:19:79:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:79:19:79:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:86:22:86:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:91:19:91:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:91:19:91:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_to_dict.py:15:19:15:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_to_dict.py:15:19:15:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | subpaths #select -| general.py:20:34:20:44 | ControlFlowNode for xml_content | general.py:18:19:18:25 | ControlFlowNode for request | general.py:20:34:20:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:20:34:20:44 | ControlFlowNode for xml_content | This | general.py:18:19:18:25 | ControlFlowNode for request | user-provided value | -| general.py:27:27:27:37 | ControlFlowNode for xml_content | general.py:25:19:25:25 | ControlFlowNode for request | general.py:27:27:27:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:27:27:27:37 | ControlFlowNode for xml_content | This | general.py:25:19:25:25 | ControlFlowNode for request | user-provided value | -| general.py:34:29:34:49 | ControlFlowNode for StringIO() | general.py:32:19:32:25 | ControlFlowNode for request | general.py:34:29:34:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:34:29:34:49 | ControlFlowNode for StringIO() | This | general.py:32:19:32:25 | ControlFlowNode for request | user-provided value | -| general.py:41:28:41:38 | ControlFlowNode for xml_content | general.py:39:19:39:25 | ControlFlowNode for request | general.py:41:28:41:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:41:28:41:38 | ControlFlowNode for xml_content | This | general.py:39:19:39:25 | ControlFlowNode for request | user-provided value | -| general.py:49:34:49:44 | ControlFlowNode for xml_content | general.py:46:19:46:25 | ControlFlowNode for request | general.py:49:34:49:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:49:34:49:44 | ControlFlowNode for xml_content | This | general.py:46:19:46:25 | ControlFlowNode for request | user-provided value | -| general.py:57:34:57:44 | ControlFlowNode for xml_content | general.py:54:19:54:25 | ControlFlowNode for request | general.py:57:34:57:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:57:34:57:44 | ControlFlowNode for xml_content | This | general.py:54:19:54:25 | ControlFlowNode for request | user-provided value | -| general.py:65:45:65:55 | ControlFlowNode for xml_content | general.py:62:19:62:25 | ControlFlowNode for request | general.py:65:45:65:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:65:45:65:55 | ControlFlowNode for xml_content | This | general.py:62:19:62:25 | ControlFlowNode for request | user-provided value | -| general.py:73:40:73:60 | ControlFlowNode for StringIO() | general.py:70:19:70:25 | ControlFlowNode for request | general.py:73:40:73:60 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | general.py:73:40:73:60 | ControlFlowNode for StringIO() | This | general.py:70:19:70:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:65:18:65:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:59:19:59:25 | ControlFlowNode for request | xml_sax_make_parser.py:65:18:65:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | xml_sax_make_parser.py:65:18:65:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:59:19:59:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:75:34:75:54 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:71:19:71:25 | ControlFlowNode for request | xml_sax_make_parser.py:75:34:75:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse | xml_sax_make_parser.py:75:34:75:54 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:71:19:71:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | This | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | This | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | This | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value | +| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | +| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | From 85b5ef36ae8dcd53868e10204151d56265622610 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Wed, 9 Feb 2022 13:28:56 +0100 Subject: [PATCH 33/79] `XmlInjection` -> `XmlEntityInjection` --- .../{XmlInjection.py => XmlEntityInjection.py} | 0 ...njection.qhelp => XmlEntityInjection.qhelp} | 2 +- .../{XmlInjection.ql => XmlEntityInjection.ql} | 10 +++++----- ...XmlInjection.qll => XmlEntityInjection.qll} | 18 ++++++++++-------- ...ll => XmlEntityInjectionCustomizations.qll} | 2 +- ...on.expected => XmlEntityInjection.expected} | 0 .../Security/CWE-611/XmlEntityInjection.qlref | 1 + .../Security/CWE-611/XmlInjection.qlref | 1 - 8 files changed, 18 insertions(+), 16 deletions(-) rename python/ql/src/experimental/Security/CWE-611/{XmlInjection.py => XmlEntityInjection.py} (100%) rename python/ql/src/experimental/Security/CWE-611/{XmlInjection.qhelp => XmlEntityInjection.qhelp} (98%) rename python/ql/src/experimental/Security/CWE-611/{XmlInjection.ql => XmlEntityInjection.ql} (62%) rename python/ql/src/experimental/semmle/python/security/dataflow/{XmlInjection.qll => XmlEntityInjection.qll} (65%) rename python/ql/src/experimental/semmle/python/security/dataflow/{XmlInjectionCustomizations.qll => XmlEntityInjectionCustomizations.qll} (99%) rename python/ql/test/experimental/query-tests/Security/CWE-611/{XmlInjection.expected => XmlEntityInjection.expected} (100%) create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref diff --git a/python/ql/src/experimental/Security/CWE-611/XmlInjection.py b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py similarity index 100% rename from python/ql/src/experimental/Security/CWE-611/XmlInjection.py rename to python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py diff --git a/python/ql/src/experimental/Security/CWE-611/XmlInjection.qhelp b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp similarity index 98% rename from python/ql/src/experimental/Security/CWE-611/XmlInjection.qhelp rename to python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp index e617835bdef2..6da1bf1d3063 100644 --- a/python/ql/src/experimental/Security/CWE-611/XmlInjection.qhelp +++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp @@ -26,7 +26,7 @@ to prevent any potentially malicious operation. The following example calls xml.etree.ElementTree.fromstring using a parser (lxml.etree.XMLParser) that is not safely configured on untrusted data, and is therefore inherently unsafe.

    - +

    Providing an input (xml_content) like the following XML content against /bad, the request response would contain the contents of /etc/passwd. diff --git a/python/ql/src/experimental/Security/CWE-611/XmlInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql similarity index 62% rename from python/ql/src/experimental/Security/CWE-611/XmlInjection.ql rename to python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql index 78213f624eaf..8f22ded4b157 100644 --- a/python/ql/src/experimental/Security/CWE-611/XmlInjection.ql +++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql @@ -1,9 +1,9 @@ /** - * @name XML injection - * @description User input should not be parsed without security options enabled. + * @name XML Entity injection + * @description User input should not be parsed allowing the injection of entities. * @kind path-problem * @problem.severity error - * @id py/xml-injection + * @id py/xml-entity-injection * @tags security * external/cwe/cwe-611 * external/cwe/cwe-776 @@ -12,11 +12,11 @@ // determine precision above import python -import experimental.semmle.python.security.dataflow.XmlInjection +import experimental.semmle.python.security.dataflow.XmlEntityInjection import DataFlow::PathGraph from DataFlow::PathNode source, DataFlow::PathNode sink, string kind -where XmlInjection::xmlInjectionVulnerable(source, sink, kind) +where XmlEntityInjection::xmlEntityInjectionVulnerable(source, sink, kind) select sink.getNode(), source, sink, "$@ XML input is constructed from a $@ and is vulnerable to " + kind + ".", sink.getNode(), "This", source.getNode(), "user-provided value" diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll similarity index 65% rename from python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll rename to python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll index 90e2c9bf342c..4669e0e430d4 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll @@ -5,11 +5,11 @@ import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.RemoteFlowSources import semmle.python.dataflow.new.BarrierGuards -module XmlInjection { - import XmlInjectionCustomizations::XmlInjection +module XmlEntityInjection { + import XmlEntityInjectionCustomizations::XmlEntityInjection - class XMLInjectionConfiguration extends TaintTracking::Configuration { - XMLInjectionConfiguration() { this = "XMLInjectionConfiguration" } + class XmlEntityInjectionConfiguration extends TaintTracking::Configuration { + XmlEntityInjectionConfiguration() { this = "XmlEntityInjectionConfiguration" } override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSourceAsSource @@ -29,13 +29,15 @@ module XmlInjection { private import DataFlow::PathGraph /** Holds if there is an XML injection from `source` to `sink` */ - predicate xmlInjection(DataFlow::PathNode source, DataFlow::PathNode sink) { - any(XMLInjectionConfiguration xmlInjectionConfig).hasFlowPath(source, sink) + predicate xmlEntityInjection(DataFlow::PathNode source, DataFlow::PathNode sink) { + any(XmlEntityInjectionConfiguration x).hasFlowPath(source, sink) } /** Holds if there is an XML injection from `source` to `sink` vulnerable to `kind` */ - predicate xmlInjectionVulnerable(DataFlow::PathNode source, DataFlow::PathNode sink, string kind) { - xmlInjection(source, sink) and + predicate xmlEntityInjectionVulnerable( + DataFlow::PathNode source, DataFlow::PathNode sink, string kind + ) { + xmlEntityInjection(source, sink) and ( xmlParsingInputAsVulnerableSink(sink.getNode(), kind) or xmlParserInputAsVulnerableSink(sink.getNode(), kind) diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll similarity index 99% rename from python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll rename to python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll index 3e9dd22c69c0..177f89799563 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll @@ -15,7 +15,7 @@ private import semmle.python.ApiGraphs * Provides default sources, sinks and sanitizers for detecting "xml injection" * vulnerabilities, as well as extension points for adding your own. */ -module XmlInjection { +module XmlEntityInjection { /** * A data flow source for "xml injection" vulnerabilities. */ diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.expected rename to python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref new file mode 100644 index 000000000000..36a7c8845fb7 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref @@ -0,0 +1 @@ +experimental/Security/CWE-611/XmlEntityInjection.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref deleted file mode 100644 index 24d483666acc..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref +++ /dev/null @@ -1 +0,0 @@ -experimental/Security/CWE-611/XmlInjection.ql From c5f30d99d5f01f5819e9653d1115a65d82d9b6f0 Mon Sep 17 00:00:00 2001 From: jorgectf Date: Sun, 20 Feb 2022 17:34:12 +0100 Subject: [PATCH 34/79] Create an extendable `AdditionalTaintStep` class in customizations --- .../security/dataflow/XmlEntityInjection.qll | 2 +- .../XmlEntityInjectionCustomizations.qll | 31 +++++++++++++++---- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll index 4669e0e430d4..087c3057640e 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll @@ -22,7 +22,7 @@ module XmlEntityInjection { } override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - ioAdditionalTaintStep(nodeFrom, nodeTo) + any(AdditionalTaintStep s).step(nodeFrom, nodeTo) } } diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll index 177f89799563..8f8b3ae2c6ab 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll @@ -31,6 +31,20 @@ module XmlEntityInjection { */ abstract class SanitizerGuard extends DataFlow::BarrierGuard { } + /** + * A unit class for adding additional taint steps. + * + * Extend this class to add additional taint steps that should apply to `XmlEntityInjection` + * taint configuration. + */ + class AdditionalTaintStep extends Unit { + /** + * Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint + * step for `XmlEntityInjection` configuration. + */ + abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo); + } + /** * A data flow sink for XML parsing libraries. * @@ -85,11 +99,16 @@ module XmlEntityInjection { */ class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { } - predicate ioAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - exists(DataFlow::CallCfgNode ioCalls | - ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and - nodeFrom = ioCalls.getArg(0) and - nodeTo = ioCalls - ) + /** + * A taint step for `io`'s `StringIO` and `BytesIO` methods. + */ + class IoAdditionalTaintStep extends AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + exists(DataFlow::CallCfgNode ioCalls | + ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and + nodeFrom = ioCalls.getArg(0) and + nodeTo = ioCalls + ) + } } } From 500e0aced6e9a9e2fd426966e85109ce26ab5f1d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 1 Mar 2022 17:14:35 +0100 Subject: [PATCH 35/79] Python: Rewrite sax XML tests The tests for type-trackers were not that interesting, since they did not have XML input in both cases, which is the problem we were trying hard to solve. I did keep the test-case of not-user-supplied url alive as well though :+1: I added OK/NOT OK annotations. Notice that we report all 4 kinds of vulnerabilities on line 93 --- .../CWE-611/XmlEntityInjection.expected | 82 +++++++++++-------- .../Security/CWE-611/xml_sax_make_parser.py | 37 +++++---- 2 files changed, 67 insertions(+), 52 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 081a8c6e6af8..2e291875ce81 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -71,18 +71,19 @@ edges | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | | xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | | xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:57:19:57:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:57:19:57:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:57:19:57:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:57:19:57:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:69:19:69:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:19:69:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:69:19:69:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:69:19:69:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:69:19:69:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:73:34:73:54 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:79:19:79:25 | ControlFlowNode for request | xml_sax_make_parser.py:79:19:79:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:79:19:79:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:79:19:79:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:79:19:79:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:86:22:86:42 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:19:91:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:91:19:91:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:91:19:91:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:91:19:91:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | | xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content | @@ -186,22 +187,23 @@ nodes | xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:57:19:57:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:57:19:57:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:69:19:69:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:69:19:69:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:69:19:69:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:73:34:73:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:79:19:79:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:79:19:79:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:79:19:79:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:86:22:86:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:91:19:91:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:91:19:91:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | | xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | @@ -231,13 +233,21 @@ subpaths | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py index 9f858d99ddd2..e95abf753796 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py @@ -13,7 +13,7 @@ def characters(self, data): self._result.append(data) def parse(self, f): - xml.sax.parse(f, self) + xml.sax.parse(f, self) # OK for XXE/DTD, NOT OK for billion laughs/quadratic return self._result # GOOD @@ -33,7 +33,7 @@ def xml_makeparser_MainHandler(): GoodHandler = MainHandler() parser = xml.sax.make_parser() parser.setContentHandler(GoodHandler) - parser.parse(StringIO(xml_content)) + parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic return GoodHandler._result @@ -46,11 +46,17 @@ def xml_makeparser_MainHandler_entitiesFalse(): parser.setContentHandler(GoodHandler) # https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(xml_content)) + parser.parse(StringIO(xml_content)) # # OK for XXE/DTD, NOT OK for billion laughs/quadratic return GoodHandler._result -# BAD +@app.route("not-user-controlled") +def not_user_controlled(): + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse("/not-user-controlled/default_config.xml") # OK + return +# BAD @app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_True") def xml_makeparser_MainHandler_entitiesTrue(): @@ -60,7 +66,7 @@ def xml_makeparser_MainHandler_entitiesTrue(): parser = xml.sax.make_parser() parser.setContentHandler(BadHandler) parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(xml_content)) + parser.parse(StringIO(xml_content)) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic return BadHandler._result @@ -70,7 +76,8 @@ def xml_makeparser_minidom_entitiesTrue(): parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) - return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes + doc = xml.dom.minidom.parse(StringIO(xml_content), parser=parser) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic + return doc.documentElement.childNodes # Forward Type Tracking test @@ -80,20 +87,18 @@ def forward_tracking1(action): parser = xml.sax.make_parser() if action == 'load-config': - parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse("/not-user-controlled/default_config.xml") + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(xml_content)) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic else: - parser.parse(StringIO(xml_content)) - return + parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic + return @app.route("forward_tracking2") def forward_tracking2(action): xml_content = request.args['xml_content'] parser = xml.sax.make_parser() - if action == 'load-config': - parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse("/not-user-controlled/default_config.xml") - else: - parser.parse(StringIO(xml_content)) - return + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.setFeature(xml.sax.handler.feature_external_ges, False) + parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic + return From ee23c05489deb55626fe0402760ff89535856c84 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 1 Mar 2022 17:15:19 +0100 Subject: [PATCH 36/79] Python: XML: Expose vuln kind on sink --- .../Security/CWE-611/XmlEntityInjection.ql | 8 ++- .../security/dataflow/XmlEntityInjection.qll | 18 ------- .../XmlEntityInjectionCustomizations.qll | 53 +++++++------------ 3 files changed, 25 insertions(+), 54 deletions(-) diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql index 8f22ded4b157..03f0c7b1c0e9 100644 --- a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql +++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql @@ -15,8 +15,12 @@ import python import experimental.semmle.python.security.dataflow.XmlEntityInjection import DataFlow::PathGraph -from DataFlow::PathNode source, DataFlow::PathNode sink, string kind -where XmlEntityInjection::xmlEntityInjectionVulnerable(source, sink, kind) +from + XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source, + DataFlow::PathNode sink, string kind +where + config.hasFlowPath(source, sink) and + kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind() select sink.getNode(), source, sink, "$@ XML input is constructed from a $@ and is vulnerable to " + kind + ".", sink.getNode(), "This", source.getNode(), "user-provided value" diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll index 087c3057640e..35220e153d12 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll @@ -25,22 +25,4 @@ module XmlEntityInjection { any(AdditionalTaintStep s).step(nodeFrom, nodeTo) } } - - private import DataFlow::PathGraph - - /** Holds if there is an XML injection from `source` to `sink` */ - predicate xmlEntityInjection(DataFlow::PathNode source, DataFlow::PathNode sink) { - any(XmlEntityInjectionConfiguration x).hasFlowPath(source, sink) - } - - /** Holds if there is an XML injection from `source` to `sink` vulnerable to `kind` */ - predicate xmlEntityInjectionVulnerable( - DataFlow::PathNode source, DataFlow::PathNode sink, string kind - ) { - xmlEntityInjection(source, sink) and - ( - xmlParsingInputAsVulnerableSink(sink.getNode(), kind) or - xmlParserInputAsVulnerableSink(sink.getNode(), kind) - ) - } } diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll index 8f8b3ae2c6ab..7de0c0c4b9c2 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll @@ -24,7 +24,10 @@ module XmlEntityInjection { /** * A data flow sink for "xml injection" vulnerabilities. */ - abstract class Sink extends DataFlow::Node { } + abstract class Sink extends DataFlow::Node { + /** Gets the kind of XML injection that this sink is vulnerable to. */ + abstract string getVulnerableKind(); + } /** * A sanitizer guard for "xml injection" vulnerabilities. @@ -46,53 +49,35 @@ module XmlEntityInjection { } /** - * A data flow sink for XML parsing libraries. + * An input to a direct XML parsing function, considered as a flow sink. * * See `XML::XMLParsing`. */ - abstract class XMLParsingSink extends Sink { } + class XMLParsingInputAsSink extends Sink { + XML::XMLParsing xmlParsing; - /** - * A data flow sink for XML parsers. - * - * See `XML::XMLParser` - */ - abstract class XMLParserSink extends Sink { } + XMLParsingInputAsSink() { this = xmlParsing.getAnInput() } - /** - * A source of remote user input, considered as a flow source. - */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } - - /** - * An xml parsing operation, considered as a flow sink. - */ - class XMLParsingInputAsSink extends XMLParsingSink { - XMLParsingInputAsSink() { this = any(XML::XMLParsing xmlParsing).getAnInput() } + override string getVulnerableKind() { xmlParsing.vulnerable(result) } } /** - * An xml parsing operation vulnerable to `kind`. + * An input to an XML parser, considered as a flow sink. + * + * See `XML::XMLParser` */ - predicate xmlParsingInputAsVulnerableSink(DataFlow::Node sink, string kind) { - exists(XML::XMLParsing xmlParsing | - sink = xmlParsing.getAnInput() and xmlParsing.vulnerable(kind) - ) - } + class XMLParserInputAsSink extends Sink { + XML::XMLParser xmlParser; - /** - * An xml parser operation, considered as a flow sink. - */ - class XMLParserInputAsSink extends XMLParserSink { - XMLParserInputAsSink() { this = any(XML::XMLParser xmlParser).getAnInput() } + XMLParserInputAsSink() { this = xmlParser.getAnInput() } + + override string getVulnerableKind() { xmlParser.vulnerable(result) } } /** - * An xml parser operation vulnerable to `kind`. + * A source of remote user input, considered as a flow source. */ - predicate xmlParserInputAsVulnerableSink(DataFlow::Node sink, string kind) { - exists(XML::XMLParser xmlParser | sink = xmlParser.getAnInput() and xmlParser.vulnerable(kind)) - } + class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } /** * A comparison with a constant string, considered as a sanitizer-guard. From aaf55b21c46dbd3e6a84204d0a43f39ec32d85fe Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 2 Mar 2022 10:58:58 +0100 Subject: [PATCH 37/79] Python: Add XMLVulnerabilityKind This gives some freedom in changing the name presented, and not worrying about whether you have made a typo that makes everything break :| --- .../experimental/semmle/python/Concepts.qll | 31 ++++++++++++++-- .../semmle/python/frameworks/Xml.qll | 37 ++++++++++--------- 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 09f8e7897c58..4cdd803c9327 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -15,6 +15,29 @@ private import semmle.python.dataflow.new.TaintTracking private import experimental.semmle.python.Frameworks module XML { + /** + * A kind of XML vulnerability. + * + * See https://pypi.org/project/defusedxml/#python-xml-libraries + */ + class XMLVulnerabilityKind extends string { + XMLVulnerabilityKind() { + this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval",] + } + + /** Holds for Billion Laughs vulnerability kind. */ + predicate isBillionLaughs() { this = "Billion Laughs" } + + /** Holds for Quadratic Blowup vulnerability kind. */ + predicate isQuadraticBlowup() { this = "Quadratic Blowup" } + + /** Holds for XXE vulnerability kind. */ + predicate isXxe() { this = "XXE" } + + /** Holds for DTD retrieval vulnerability kind. */ + predicate isDtdRetrieval() { this = "DTD retrieval" } + } + /** * A data-flow node that collects functions parsing XML. * @@ -30,7 +53,7 @@ module XML { /** * Holds if the parsing method or the parser holding it is vulnerable to `kind`. */ - predicate vulnerable(string kind) { super.vulnerable(kind) } + predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) } } /** Provides classes for modeling XML parsing APIs. */ @@ -50,7 +73,7 @@ module XML { /** * Holds if the parsing method or the parser holding it is vulnerable to `kind`. */ - abstract predicate vulnerable(string kind); + abstract predicate vulnerable(XMLVulnerabilityKind kind); } } @@ -69,7 +92,7 @@ module XML { /** * Holds if the parser is vulnerable to `kind`. */ - predicate vulnerable(string kind) { super.vulnerable(kind) } + predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) } } /** Provides classes for modeling XML parsers. */ @@ -89,7 +112,7 @@ module XML { /** * Holds if the parser is vulnerable to `kind`. */ - abstract predicate vulnerable(string kind); + abstract predicate vulnerable(XMLVulnerabilityKind kind); } } } diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index cf4abbac995b..ffd8d44ba35c 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -24,7 +24,7 @@ private module Xml { override DataFlow::Node getAnInput() { none() } - override predicate vulnerable(string kind) { none() } + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { none() } } /** @@ -57,7 +57,7 @@ private module Xml { override DataFlow::Node getAnInput() { result = this.getArg(0) } - override predicate vulnerable(string kind) { + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { exists(XML::XMLParser xmlParser | xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) ) @@ -111,27 +111,27 @@ private module Xml { override DataFlow::Node getAnInput() { result = this.getAMethodCall("parse").getArg(0) } - override predicate vulnerable(string kind) { + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { exists(DataFlow::MethodCallNode parse, API::Node handler, API::Node feature | handler = API::moduleImport("xml").getMember("sax").getMember("handler") and parse.calls(trackSaxFeature(this, feature), "parse") and parse.getArg(0) = this.getAnInput() // enough to avoid FPs? | - kind = ["XXE", "DTD retrieval"] and + (kind.isXxe() or kind.isDtdRetrieval()) and feature = handler.getMember("feature_external_ges") or - kind = ["Billion Laughs", "Quadratic Blowup"] + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) ) } - predicate vulnerable(DataFlow::Node n, string kind) { + predicate vulnerable(DataFlow::Node n, XML::XMLVulnerabilityKind kind) { exists(API::Node handler, API::Node feature | handler = API::moduleImport("xml").getMember("sax").getMember("handler") and DataFlow::exprNode(trackSaxFeature(this, feature).asExpr()) .(DataFlow::LocalSourceNode) .flowsTo(n) | - kind = ["XXE", "DTD retrieval"] and + (kind.isXxe() or kind.isDtdRetrieval()) and feature = handler.getMember("feature_external_ges") ) } @@ -162,14 +162,14 @@ private module Xml { override DataFlow::Node getAnInput() { none() } - override predicate vulnerable(string kind) { - kind = "XXE" and + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + kind.isXxe() and not ( exists(this.getArgByName("resolve_entities")) or this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f) ) or - kind = ["Billion Laughs", "Quadratic Blowup"] and + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and ( this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f) @@ -206,12 +206,12 @@ private module Xml { override DataFlow::Node getAnInput() { result = this.getArg(0) } - override predicate vulnerable(string kind) { + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { exists(XML::XMLParser xmlParser | xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) ) or - kind = "XXE" and not exists(this.getArgByName("parser")) + kind.isXxe() and not exists(this.getArgByName("parser")) } } @@ -233,8 +233,8 @@ private module Xml { override DataFlow::Node getAnInput() { result = this.getArg(0) } - override predicate vulnerable(string kind) { - kind = ["Billion Laughs", "Quadratic Blowup"] and + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) } } @@ -266,12 +266,13 @@ private module Xml { override DataFlow::Node getAnInput() { result = this.getArg(0) } - override predicate vulnerable(string kind) { + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { exists(XML::XMLParser xmlParser | xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) ) or - kind = ["Billion Laughs", "Quadratic Blowup"] and not exists(this.getArgByName("parser")) + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + not exists(this.getArgByName("parser")) } } @@ -300,6 +301,8 @@ private module Xml { result = this.getAMethodCall("register_function").getArg(0) } - override predicate vulnerable(string kind) { kind = ["Billion Laughs", "Quadratic Blowup"] } + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + kind.isBillionLaughs() or kind.isQuadraticBlowup() + } } } From 16e482bf6fcb07bc11a77ca1e82c65baf41c1ac8 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 2 Mar 2022 11:53:02 +0100 Subject: [PATCH 38/79] Python: Improve QLDoc for XML parsing/parsers --- python/ql/src/experimental/semmle/python/Concepts.qll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 4cdd803c9327..22616c0a5d2b 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -39,7 +39,7 @@ module XML { } /** - * A data-flow node that collects functions parsing XML. + * A data-flow node that parses XML. * * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParsing` instead. @@ -59,7 +59,7 @@ module XML { /** Provides classes for modeling XML parsing APIs. */ module XMLParsing { /** - * A data-flow node that collects functions parsing XML. + * A data-flow node that parses XML. * * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParsing` instead. @@ -78,7 +78,7 @@ module XML { } /** - * A data-flow node that collects XML parsers. + * A data-flow node that constructs an XML parser. * * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParser` instead. @@ -98,7 +98,7 @@ module XML { /** Provides classes for modeling XML parsers. */ module XMLParser { /** - * A data-flow node that collects XML parsers. + * A data-flow node that constructs an XML parser. * * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParser` instead. From 6dd776b2de9e6eede27d2cc22d9781db4fe8a83d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 2 Mar 2022 14:52:11 +0100 Subject: [PATCH 39/79] Python: Only produce one alert per vulnerable XML sink This made it much easier to debug the current alerts on tests at least. Notice that it's important that we have `strictconcat` and not just `concat`, since `concat` will also allow flow to sinks that are not vulnerable to any kind of XML vulnerability :| --- .../Security/CWE-611/XmlEntityInjection.ql | 11 +++- .../CWE-611/XmlEntityInjection.expected | 55 ++++++------------- 2 files changed, 26 insertions(+), 40 deletions(-) diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql index 03f0c7b1c0e9..922ca346b173 100644 --- a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql +++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql @@ -17,10 +17,15 @@ import DataFlow::PathGraph from XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source, - DataFlow::PathNode sink, string kind + DataFlow::PathNode sink, string kinds where config.hasFlowPath(source, sink) and - kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind() + kinds = + strictconcat(string kind | + kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind() + | + kind, ", " + ) select sink.getNode(), source, sink, - "$@ XML input is constructed from a $@ and is vulnerable to " + kind + ".", sink.getNode(), + "$@ XML input is constructed from a $@ and is vulnerable to: " + kinds + ".", sink.getNode(), "This", source.getNode(), "user-provided value" diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 2e291875ce81..6c342ef223ea 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -214,40 +214,21 @@ nodes | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | subpaths #select -| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | This | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | This | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | This | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | -| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | -| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | This | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | This | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | This | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | +| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | From 7f7758b83dc1ae6a3e528cf6b3b7349e60fd3e56 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 2 Mar 2022 13:57:28 +0100 Subject: [PATCH 40/79] Python: rewrite xml sax modeling --- .../semmle/python/frameworks/Xml.qll | 134 ++++++++++++------ .../CWE-611/XmlEntityInjection.expected | 6 +- 2 files changed, 93 insertions(+), 47 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index ffd8d44ba35c..1a01bf4c5c8f 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -64,32 +64,90 @@ private module Xml { } } - /** Gets a reference to a `parser` that has been set a `feature`. */ - private DataFlow::Node trackSaxFeature( - DataFlow::TypeTracker t, DataFlow::CallCfgNode parser, API::Node feature + /** + * A call to the `setFeature` method on a XML sax parser. + * + * See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature + */ + class SaxParserSetFeatureCall extends DataFlow::MethodCallNode { + SaxParserSetFeatureCall() { + this = + API::moduleImport("xml") + .getMember("sax") + .getMember("make_parser") + .getReturn() + .getMember("setFeature") + .getACall() + } + + // The keyword argument names does not match documentation. I checked (with Python + // 3.9.5) that the names used here actually works. + DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] } + + DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] } + } + + /** Gets a back-reference to the `setFeature` state argument `arg`. */ + private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker( + DataFlow::TypeBackTracker t, DataFlow::Node arg ) { t.start() and - exists(DataFlow::MethodCallNode featureCall | - featureCall = parser.getAMethodCall("setFeature") and - featureCall.getArg(0).getALocalSource() = feature.getAUse() and - featureCall.getArg(1).getALocalSource() = DataFlow::exprNode(any(True t_)) and - result = featureCall.getObject() + arg = any(SaxParserSetFeatureCall c).getStateArg() and + result = arg.getALocalSource() + or + exists(DataFlow::TypeBackTracker t2 | + result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t) + ) + } + + /** Gets a back-reference to the `setFeature` state argument `arg`. */ + DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) { + result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg) + } + + /** Gets a reference to a XML sax parser that has `feature_external_ges` turned on */ + private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) { + t.start() and + exists(SaxParserSetFeatureCall call | + call.getFeatureArg() = + API::moduleImport("xml") + .getMember("sax") + .getMember("handler") + .getMember("feature_external_ges") + .getAUse() and + saxParserSetFeatureStateArgBacktracker(call.getStateArg()) + .asExpr() + .(BooleanLiteral) + .booleanValue() = true and + result = call.getObject() ) or exists(DataFlow::TypeTracker t2 | - t = t2.smallstep(trackSaxFeature(t2, parser, feature), result) + t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result) + ) and + // take account of that we can set the feature to False, which makes the parser safe again + not exists(SaxParserSetFeatureCall call | + call.getObject() = result and + call.getFeatureArg() = + API::moduleImport("xml") + .getMember("sax") + .getMember("handler") + .getMember("feature_external_ges") + .getAUse() and + saxParserSetFeatureStateArgBacktracker(call.getStateArg()) + .asExpr() + .(BooleanLiteral) + .booleanValue() = false ) } - /** Gets a reference to a `parser` that has been set a `feature`. */ - DataFlow::Node trackSaxFeature(DataFlow::CallCfgNode parser, API::Node feature) { - result = trackSaxFeature(DataFlow::TypeTracker::end(), parser, feature) + /** Gets a reference to a XML sax parser that has been made unsafe for `kind`. */ + DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() { + result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end()) } /** - * Gets a call to `xml.sax.make_parser`. - * - * Given the following example: + * A XML parsing call with a sax parser. * * ```py * BadHandler = MainHandler() @@ -99,41 +157,27 @@ private module Xml { * parser.parse(StringIO(xml_content)) * parsed_xml = BadHandler._result * ``` - * - * * `this` would be `xml.sax.make_parser()`. - * * `getAnInput()`'s result would be `StringIO(xml_content)`. - * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`. */ - private class XMLSaxParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { - XMLSaxParser() { - this = API::moduleImport("xml").getMember("sax").getMember("make_parser").getACall() + private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLSaxParsing() { + this = + API::moduleImport("xml") + .getMember("sax") + .getMember("make_parser") + .getReturn() + .getMember("parse") + .getACall() } - override DataFlow::Node getAnInput() { result = this.getAMethodCall("parse").getArg(0) } + override DataFlow::Node getAnInput() { result = this.getArg(0) } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - exists(DataFlow::MethodCallNode parse, API::Node handler, API::Node feature | - handler = API::moduleImport("xml").getMember("sax").getMember("handler") and - parse.calls(trackSaxFeature(this, feature), "parse") and - parse.getArg(0) = this.getAnInput() // enough to avoid FPs? - | - (kind.isXxe() or kind.isDtdRetrieval()) and - feature = handler.getMember("feature_external_ges") - or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - ) - } - - predicate vulnerable(DataFlow::Node n, XML::XMLVulnerabilityKind kind) { - exists(API::Node handler, API::Node feature | - handler = API::moduleImport("xml").getMember("sax").getMember("handler") and - DataFlow::exprNode(trackSaxFeature(this, feature).asExpr()) - .(DataFlow::LocalSourceNode) - .flowsTo(n) - | - (kind.isXxe() or kind.isDtdRetrieval()) and - feature = handler.getMember("feature_external_ges") - ) + // always vuln to these + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + or + // can be vuln to other things if features has been turned on + this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 6c342ef223ea..0109566be06d 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -227,8 +227,10 @@ subpaths | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | From 515b824b3cd857dc29a3484817a1a0d170bae2f6 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 09:42:19 +0100 Subject: [PATCH 41/79] Python: Add lxml positive test --- .../CWE-611/XmlEntityInjection.expected | 23 ++++++++++++------- .../Security/CWE-611/lxml_etree.py | 8 +++++++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 0109566be06d..634e7dd28d7c 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -20,12 +20,15 @@ edges | lxml_etree.py:54:19:54:25 | ControlFlowNode for request | lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | | lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | | lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content | -| lxml_etree.py:65:19:65:25 | ControlFlowNode for request | lxml_etree.py:65:19:65:30 | ControlFlowNode for Attribute | -| lxml_etree.py:65:19:65:30 | ControlFlowNode for Attribute | lxml_etree.py:65:19:65:45 | ControlFlowNode for Subscript | -| lxml_etree.py:65:19:65:45 | ControlFlowNode for Subscript | lxml_etree.py:68:34:68:44 | ControlFlowNode for xml_content | +| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | +| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | +| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | | lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | | lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | +| lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | +| lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | +| lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | @@ -119,14 +122,18 @@ nodes | lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:65:19:65:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:65:19:65:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:65:19:65:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:68:34:68:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:81:19:81:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | | xml_dom.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | @@ -220,7 +227,7 @@ subpaths | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | This | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py index 2c3c6f5f2ffc..231116c2b720 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py @@ -56,6 +56,14 @@ def lxml_parser(): parser = lxml.etree.XMLParser(resolve_entities=False) return lxml.etree.fromstring(xml_content, parser=parser).text +# XXE-vuln +@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") +def lxml_parser(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser(resolve_entities=True) + return lxml.etree.fromstring(xml_content, parser=parser).text + # Billion laughs and quadratic blowup (huge_tree) ## Good (huge_tree=True but resolve_entities=False) From 661d8bf553778aa49054347e215015877c3876a5 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 09:45:42 +0100 Subject: [PATCH 42/79] Python: Better handling of `resolve_entities` arg in lxml --- .../ql/src/experimental/semmle/python/frameworks/Xml.qll | 9 ++++++--- .../Security/CWE-611/XmlEntityInjection.expected | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 1a01bf4c5c8f..6f865e13cdb1 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -206,11 +206,14 @@ private module Xml { override DataFlow::Node getAnInput() { none() } + // NOTE: it's not possible to change settings of a parser after constructing it override predicate vulnerable(XML::XMLVulnerabilityKind kind) { kind.isXxe() and - not ( - exists(this.getArgByName("resolve_entities")) or - this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f) + ( + // resolve_entities has default True + not exists(this.getArgByName("resolve_entities")) + or + this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t) ) or (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 634e7dd28d7c..86edcb89d4da 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -227,6 +227,7 @@ subpaths | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | This | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | From 52891cb4763bc3714a2e3cc95ea240145d55d910 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 12:36:24 +0100 Subject: [PATCH 43/79] Python: Add PoC for XML vulns --- .../Security/CWE-611/dont_extract/PoC.py | 449 ++++++++++++++++++ .../Security/CWE-611/dont_extract/flag | 1 + 2 files changed, 450 insertions(+) create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py new file mode 100644 index 000000000000..85301c32bff6 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py @@ -0,0 +1,449 @@ +#!/usr/bin/env python3 + +# this file doesn't have a .py extension so the extractor doesn't pick it up, so it +# doesn't have to be annotated + +# This file shows the ways to make exploit vulnerable XML parsing +# see +# https://pypi.org/project/defusedxml/#python-xml-libraries +# https://docs.python.org/3.10/library/xml.html#xml-vulnerabilities + +import pathlib +from flask import Flask +import threading +import multiprocessing +import time +from io import StringIO +import pytest + +HOST = "localhost" +PORT = 8080 + + +FLAG_PATH = pathlib.Path(__file__).with_name("flag") + +# ============================================================================== +# xml samples + +ok_xml = f""" +hello world +""" + +local_xxe = f""" + +]> +&xxe; +""" + +remote_xxe = f""" + +]> +&remote_xxe; +""" + +billion_laughs = """ + + + + + + + + + + + +]> +&lol9;""" + +quadratic_blowup = f""" + +]> +{"&oops;"*20000}""" + +dtd_retrieval = f""" + +bar +""" + +# ============================================================================== +# other setup + +# we set up local Flask application so we can tests whether loading external resources +# works (such as SSRF from DTD-retrival works) +app = Flask(__name__) + +@app.route("/alive") +def alive(): + return "ok" + +hit_dtd = False +@app.route("/test.dtd") +def test_dtd(): + global hit_dtd + hit_dtd = True + return """""" + +hit_xxe = False +@app.route("/xxe") +def test_xxe(): + global hit_xxe + hit_xxe = True + return "ok" + +def run_app(): + app.run(host=HOST, port=PORT) + +@pytest.fixture(scope="session", autouse=True) +def flask_app_running(): + # run flask in other thread + flask_thread = threading.Thread(target=run_app, daemon=True) + flask_thread.start() + + # give flask a bit of time to start + time.sleep(0.1) + + # ensure that the server works + import requests + requests.get(f"http://{HOST}:{PORT}/alive") + + yield + +def expects_timeout(func): + def inner(): + proc = multiprocessing.Process(target=func) + proc.start() + time.sleep(0.1) + assert proc.exitcode == None + proc.kill() + proc.join() + return inner + + +class TestExpectsTimeout: + "test that expects_timeout works as expected" + + @staticmethod + @expects_timeout + def test_slow(): + time.sleep(1000) + + @staticmethod + def test_fast(): + @expects_timeout + def fast_func(): + return "done!" + + with pytest.raises(AssertionError): + fast_func() + +# ============================================================================== +import xml.sax + +class SimpleHandler(xml.sax.ContentHandler): + def __init__(self): + self.result = [] + + def characters(self, data): + self.result.append(data) + +class TestSax(): + # always vuln to billion laughs, quadratic + + @staticmethod + @expects_timeout + def test_billion_laughs_allowed_by_default(): + parser = xml.sax.make_parser() + parser.parse(StringIO(billion_laughs)) + + @staticmethod + @expects_timeout + def test_quardratic_blowup_allowed_by_default(): + parser = xml.sax.make_parser() + parser.parse(StringIO(quadratic_blowup)) + + @staticmethod + def test_ok_xml(): + handler = SimpleHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.parse(StringIO(ok_xml)) + assert handler.result == ["hello world"], handler.result + + @staticmethod + def test_xxe_disabled_by_default(): + handler = SimpleHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.parse(StringIO(local_xxe)) + assert handler.result == [], handler.result + + @staticmethod + def test_local_xxe_manually_enabled(): + handler = SimpleHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(local_xxe)) + assert handler.result[0] == "SECRET_FLAG", handler.result + + @staticmethod + def test_remote_xxe_manually_enabled(): + global hit_xxe + hit_xxe = False + + handler = SimpleHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(remote_xxe)) + assert handler.result == ["ok"], handler.result + assert hit_xxe == True + + @staticmethod + def test_dtd_disabled_by_default(): + global hit_dtd + hit_dtd = False + + parser = xml.sax.make_parser() + parser.parse(StringIO(dtd_retrieval)) + assert hit_dtd == False + + @staticmethod + def test_dtd_manually_enabled(): + global hit_dtd + hit_dtd = False + + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(dtd_retrieval)) + assert hit_dtd == True + + +# ============================================================================== +import xml.etree.ElementTree + +class TestEtree: + + # always vuln to billion laughs, quadratic + @staticmethod + @expects_timeout + def test_billion_laughs_allowed_by_default(): + parser = xml.etree.ElementTree.XMLParser() + _root = xml.etree.ElementTree.fromstring(billion_laughs, parser=parser) + + @staticmethod + @expects_timeout + def test_quardratic_blowup_allowed_by_default(): + parser = xml.etree.ElementTree.XMLParser() + _root = xml.etree.ElementTree.fromstring(quadratic_blowup, parser=parser) + + @staticmethod + def test_ok_xml(): + parser = xml.etree.ElementTree.XMLParser() + root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser) + assert root.tag == "test" + assert root.text == "hello world" + + @staticmethod + def test_xxe_not_possible(): + parser = xml.etree.ElementTree.XMLParser() + try: + _root = xml.etree.ElementTree.fromstring(local_xxe, parser=parser) + assert False + except xml.etree.ElementTree.ParseError as e: + assert "undefined entity &xxe" in str(e) + + @staticmethod + def test_dtd_not_possible(): + global hit_dtd + hit_dtd = False + + parser = xml.etree.ElementTree.XMLParser() + _root = xml.etree.ElementTree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == False + +# ============================================================================== +import lxml.etree + +class TestLxml: + # see https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + @staticmethod + def test_billion_laughs_disabled_by_default(): + parser = lxml.etree.XMLParser() + try: + _root = lxml.etree.fromstring(billion_laughs, parser=parser) + assert False + except lxml.etree.XMLSyntaxError as e: + assert "Detected an entity reference loop" in str(e) + + @staticmethod + def test_quardratic_blowup_disabled_by_default(): + parser = lxml.etree.XMLParser() + try: + _root = lxml.etree.fromstring(quadratic_blowup, parser=parser) + assert False + except lxml.etree.XMLSyntaxError as e: + assert "Detected an entity reference loop" in str(e) + + @staticmethod + @expects_timeout + def test_billion_laughs_manually_enabled(): + parser = lxml.etree.XMLParser(huge_tree=True) + root = lxml.etree.fromstring(billion_laughs, parser=parser) + + @staticmethod + @expects_timeout + def test_quadratic_blowup_manually_enabled(): + parser = lxml.etree.XMLParser(huge_tree=True) + try: + _root = lxml.etree.fromstring(quadratic_blowup, parser=parser) + assert False + except lxml.etree.XMLSyntaxError as e: + assert "Detected an entity reference loop" in str(e) + + @staticmethod + def test_ok_xml(): + parser = lxml.etree.XMLParser() + root = lxml.etree.fromstring(ok_xml, parser=parser) + assert root.tag == "test" + assert root.text == "hello world" + + @staticmethod + def test_local_xxe_enabled_by_default(): + parser = lxml.etree.XMLParser() + root = lxml.etree.fromstring(local_xxe, parser=parser) + assert root.tag == "test" + assert root.text == "SECRET_FLAG\n", root.text + + @staticmethod + def test_local_xxe_disabled(): + parser = lxml.etree.XMLParser(resolve_entities=False) + root = lxml.etree.fromstring(local_xxe, parser=parser) + assert root.tag == "test" + assert root.text == None + + @staticmethod + def test_remote_xxe_disabled_by_default(): + global hit_xxe + hit_xxe = False + + parser = lxml.etree.XMLParser() + try: + root = lxml.etree.fromstring(remote_xxe, parser=parser) + assert False + except lxml.etree.XMLSyntaxError as e: + assert "Failure to process entity remote_xxe" in str(e) + assert hit_xxe == False + + @staticmethod + def test_remote_xxe_manually_enabled(): + global hit_xxe + hit_xxe = False + + parser = lxml.etree.XMLParser(no_network=False) + root = lxml.etree.fromstring(remote_xxe, parser=parser) + assert root.tag == "test" + assert root.text == "ok" + assert hit_xxe == True + + @staticmethod + def test_dtd_disabled_by_default(): + global hit_dtd + hit_dtd = False + + parser = lxml.etree.XMLParser() + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == False + + @staticmethod + def test_dtd_manually_enabled(): + global hit_dtd + hit_dtd = False + + # Need to set BOTH load_dtd and no_network + parser = lxml.etree.XMLParser(load_dtd=True) + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == False + + parser = lxml.etree.XMLParser(no_network=False) + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == False + + parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == True + + hit_dtd = False + + # Setting dtd_validation also does not allow the remote access + parser = lxml.etree.XMLParser(dtd_validation=True, load_dtd=True) + try: + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + except lxml.etree.XMLSyntaxError: + pass + assert hit_dtd == False + + +# ============================================================================== + +import xmltodict + +class TestXmltodict: + @staticmethod + def test_billion_laughs_disabled_by_default(): + d = xmltodict.parse(billion_laughs) + assert d == {"lolz": None}, d + + @staticmethod + def test_quardratic_blowup_disabled_by_default(): + d = xmltodict.parse(quadratic_blowup) + assert d == {"foo": None}, d + + @staticmethod + @expects_timeout + def test_billion_laughs_manually_enabled(): + xmltodict.parse(billion_laughs, disable_entities=False) + + @staticmethod + @expects_timeout + def test_quardratic_blowup_manually_enabled(): + xmltodict.parse(quadratic_blowup, disable_entities=False) + + @staticmethod + def test_ok_xml(): + d = xmltodict.parse(ok_xml) + assert d == {"test": "hello world"}, d + + @staticmethod + def test_local_xxe_not_possible(): + d = xmltodict.parse(local_xxe) + assert d == {"test": None} + + d = xmltodict.parse(local_xxe, disable_entities=False) + assert d == {"test": None} + + @staticmethod + def test_remote_xxe_not_possible(): + global hit_xxe + hit_xxe = False + + d = xmltodict.parse(remote_xxe) + assert d == {"test": None} + assert hit_xxe == False + + d = xmltodict.parse(remote_xxe, disable_entities=False) + assert d == {"test": None} + assert hit_xxe == False + + @staticmethod + def test_dtd_not_possible(): + global hit_dtd + hit_dtd = False + + d = xmltodict.parse(dtd_retrieval) + assert hit_dtd == False diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag new file mode 100644 index 000000000000..45c9436ee9f2 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag @@ -0,0 +1 @@ +SECRET_FLAG From 3c321dd98dcd62193844f61c03eaa81ca5d4ee43 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 13:49:17 +0100 Subject: [PATCH 44/79] Python: Model `lxml.etree.get_default_parser` in own class --- .../semmle/python/frameworks/Xml.qll | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 6f865e13cdb1..4ecd2d8a99e9 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -182,26 +182,35 @@ private module Xml { } /** - * Gets a call to: - * * `lxml.etree.XMLParser` - * * `lxml.etree.get_default_parser` - * - * Given the following example: + * A call to `lxml.etree.get_default_parser`. * - * ```py - * lxml.etree.XMLParser() - * ``` + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser + */ + private class LXMLDefaultParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { + LXMLDefaultParser() { + this = API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() + } + + override DataFlow::Node getAnInput() { none() } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + // as highlighted by + // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + // by default XXE is allow. so as long as the default parser has not been + // overridden, the result is also vuln to XXE. + kind.isXxe() + // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`. + } + } + + /** + * A call to `lxml.etree.XMLParser`. * - * * `this` would be `lxml.etree.XMLParser(resolve_entities=False)`. - * * `vulnerable(kind)`'s `kind` would be `XXE` + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser */ private class LXMLParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { LXMLParser() { - this = - API::moduleImport("lxml") - .getMember("etree") - .getMember(["XMLParser", "get_default_parser"]) - .getACall() + this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() } override DataFlow::Node getAnInput() { none() } From 124c03c15c9df8b14d143d3e38a410d3f60cb8e3 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 14:38:41 +0100 Subject: [PATCH 45/79] Python: Expand lxml tests And add annotations, see PoC.py for reference Some of these needs fixing though --- .../CWE-611/XmlEntityInjection.expected | 37 +++++++++++-------- .../Security/CWE-611/lxml_etree.py | 32 +++++++++------- 2 files changed, 41 insertions(+), 28 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 86edcb89d4da..b29a6d049db1 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -23,12 +23,15 @@ edges | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | -| lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | -| lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | -| lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | -| lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | -| lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | -| lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | +| lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | +| lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | +| lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | +| lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | +| lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | +| lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | +| lxml_etree.py:87:19:87:25 | ControlFlowNode for request | lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | +| lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | +| lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | @@ -126,14 +129,18 @@ nodes | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:73:19:73:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:81:19:81:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:71:19:71:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:78:19:78:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:87:19:87:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | | xml_dom.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | @@ -228,7 +235,7 @@ subpaths | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | This | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | This | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py index 231116c2b720..2c2712098503 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py @@ -10,25 +10,25 @@ def lxml_etree_fromstring(): xml_content = request.args['xml_content'] - return lxml.etree.fromstring(xml_content).text + return lxml.etree.fromstring(xml_content).text # NOT OK for XXE @app.route("/lxml_etree_fromstringlist") def lxml_etree_fromstringlist(): xml_content = request.args['xml_content'] - return lxml.etree.fromstringlist([xml_content]).text + return lxml.etree.fromstringlist([xml_content]).text # NOT OK for XXE @app.route("/lxml_etree_XML") def lxml_etree_XML(): xml_content = request.args['xml_content'] - return lxml.etree.XML(xml_content).text + return lxml.etree.XML(xml_content).text # NOT OK for XXE @app.route("/lxml_etree_parse") def lxml_etree_parse(): xml_content = request.args['xml_content'] - return lxml.etree.parse(StringIO(xml_content)).getroot().text + return lxml.etree.parse(StringIO(xml_content)).getroot().text # NOT OK for XXE # With parsers - Default @@ -37,14 +37,14 @@ def lxml_parser(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser() - return lxml.etree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE @app.route("/lxml_etree_fromstring-lxml.etree.get_default_parser") def lxml_parser(): xml_content = request.args['xml_content'] parser = lxml.etree.get_default_parser() - return lxml.etree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE # With parsers - With options @@ -54,7 +54,7 @@ def lxml_parser(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser(resolve_entities=False) - return lxml.etree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE # XXE-vuln @app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") @@ -62,23 +62,29 @@ def lxml_parser(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser(resolve_entities=True) - return lxml.etree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE # Billion laughs and quadratic blowup (huge_tree) -## Good (huge_tree=True but resolve_entities=False) - @app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") def lxml_parser(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) - return lxml.etree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE, NOT OK for billion laughs/quadratic -## Bad @app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") def lxml_parser(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser(huge_tree=True) - return lxml.etree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE, NOT OK for billion laughs/quadratic + +# DTD retrival + +@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") +def lxml_parser(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser(resolve_entities=False, load_dtd=True, no_network=False) + return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for DTD, OK for rest From e295399f7096f92592ea7aa4d1286619bf39f8d0 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 14:43:37 +0100 Subject: [PATCH 46/79] Python: Properly handle `huge_tree` in lxml --- python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 5 +---- .../query-tests/Security/CWE-611/XmlEntityInjection.expected | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 4ecd2d8a99e9..58b7edc327be 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -226,10 +226,7 @@ private module Xml { ) or (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - ( - this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and - not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f) - ) + this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index b29a6d049db1..fc6f8c9ad899 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -235,6 +235,7 @@ subpaths | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | This | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | This | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | From 703e3e8a0f9f81df20a924b25412baf4b9189086 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 14:46:48 +0100 Subject: [PATCH 47/79] Python: Handle DTD retrieval vuln in lxml --- python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 4 ++++ .../query-tests/Security/CWE-611/XmlEntityInjection.expected | 1 + 2 files changed, 5 insertions(+) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 58b7edc327be..315199e748c1 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -227,6 +227,10 @@ private module Xml { or (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) + or + kind.isDtdRetrieval() and + this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and + this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index fc6f8c9ad899..3c5ad70b23b9 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -237,6 +237,7 @@ subpaths | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | This | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | This | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | lxml_etree.py:87:19:87:25 | ControlFlowNode for request | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: DTD retrieval. | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | This | lxml_etree.py:87:19:87:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | From 61291936bfcb2667647f330ca0a512b33c80e82c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 15:06:55 +0100 Subject: [PATCH 48/79] Python: Properly model `xml.etree` --- .../ql/src/experimental/semmle/python/frameworks/Xml.qll | 7 ++++++- .../Security/CWE-611/XmlEntityInjection.expected | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 315199e748c1..5140915e0792 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -24,7 +24,9 @@ private module Xml { override DataFlow::Node getAnInput() { none() } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { none() } + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + kind.isBillionLaughs() or kind.isQuadraticBlowup() + } } /** @@ -58,6 +60,9 @@ private module Xml { override DataFlow::Node getAnInput() { result = this.getArg(0) } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + not exists(this.getArgByName("parser")) and + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + or exists(XML::XMLParser xmlParser | xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) ) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 3c5ad70b23b9..bf43d01cec1e 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -242,6 +242,11 @@ subpaths | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | This | xml_etree.py:13:19:13:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | This | xml_etree.py:19:19:19:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | This | xml_etree.py:25:19:25:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | xml_etree.py:31:19:31:25 | ControlFlowNode for request | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | This | xml_etree.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | This | xml_etree.py:39:19:39:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value | From 3affa6cf3abd3e88fed8722f959b0b5851936809 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 15:08:56 +0100 Subject: [PATCH 49/79] Python: Annotate xmltodict tests --- .../experimental/query-tests/Security/CWE-611/xml_to_dict.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py index 2b91a22e1a22..8f43d2e1cc1f 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py @@ -8,10 +8,10 @@ def xmltodict_parse(): xml_content = request.args['xml_content'] - return xmltodict.parse(xml_content) + return xmltodict.parse(xml_content) # OK @app.route("/xmltodict.parse2") def xmltodict_parse2(): xml_content = request.args['xml_content'] - return xmltodict.parse(xml_content, disable_entities=False) \ No newline at end of file + return xmltodict.parse(xml_content, disable_entities=False) # NOT OK for billion laughs/quadratic From c4d08db62aafec4a020f4836a4bcb86329cc517b Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 17:30:16 +0100 Subject: [PATCH 50/79] Python: Expand XML PoC with minidom/pulldom/expat --- .../Security/CWE-611/dont_extract/PoC.py | 201 ++++++++++++++++++ 1 file changed, 201 insertions(+) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py index 85301c32bff6..862346de3e8a 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py @@ -143,6 +143,7 @@ def fast_func(): # ============================================================================== import xml.sax +import xml.sax.handler class SimpleHandler(xml.sax.ContentHandler): def __init__(self): @@ -447,3 +448,203 @@ def test_dtd_not_possible(): d = xmltodict.parse(dtd_retrieval) assert hit_dtd == False + +# ============================================================================== +import xml.dom.minidom + +class TestMinidom: + @staticmethod + @expects_timeout + def test_billion_laughs(): + xml.dom.minidom.parseString(billion_laughs) + + @staticmethod + @expects_timeout + def test_quardratic_blowup(): + xml.dom.minidom.parseString(quadratic_blowup) + + @staticmethod + def test_ok_xml(): + doc = xml.dom.minidom.parseString(ok_xml) + assert doc.documentElement.tagName == "test" + assert doc.documentElement.childNodes[0].data == "hello world" + + @staticmethod + def test_xxe(): + # disabled by default + doc = xml.dom.minidom.parseString(local_xxe) + assert doc.documentElement.tagName == "test" + assert doc.documentElement.childNodes == [] + + # but can be turned on + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + doc = xml.dom.minidom.parseString(local_xxe, parser=parser) + assert doc.documentElement.tagName == "test" + assert doc.documentElement.childNodes[0].data == "SECRET_FLAG" + + # which also works remotely + global hit_xxe + hit_xxe = False + + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + _doc = xml.dom.minidom.parseString(remote_xxe, parser=parser) + assert hit_xxe == True + + @staticmethod + def test_dtd(): + # not possible by default + global hit_dtd + hit_dtd = False + + _doc = xml.dom.minidom.parseString(dtd_retrieval) + assert hit_dtd == False + + # but can be turned on + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + _doc = xml.dom.minidom.parseString(dtd_retrieval, parser=parser) + assert hit_dtd == True + +# ============================================================================== +import xml.dom.pulldom + +class TestPulldom: + @staticmethod + @expects_timeout + def test_billion_laughs(): + doc = xml.dom.pulldom.parseString(billion_laughs) + # you NEED to iterate over the items for it to take long + for event, node in doc: + pass + + @staticmethod + @expects_timeout + def test_quardratic_blowup(): + doc = xml.dom.pulldom.parseString(quadratic_blowup) + for event, node in doc: + pass + + @staticmethod + def test_ok_xml(): + doc = xml.dom.pulldom.parseString(ok_xml) + for event, node in doc: + if event == xml.dom.pulldom.START_ELEMENT: + assert node.tagName == "test" + elif event == xml.dom.pulldom.CHARACTERS: + assert node.data == "hello world" + + @staticmethod + def test_xxe(): + # disabled by default + doc = xml.dom.pulldom.parseString(local_xxe) + found_flag = False + for event, node in doc: + if event == xml.dom.pulldom.START_ELEMENT: + assert node.tagName == "test" + elif event == xml.dom.pulldom.CHARACTERS: + if node.data == "SECRET_FLAG": + found_flag = True + assert found_flag == False + + # but can be turned on + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + doc = xml.dom.pulldom.parseString(local_xxe, parser=parser) + found_flag = False + for event, node in doc: + if event == xml.dom.pulldom.START_ELEMENT: + assert node.tagName == "test" + elif event == xml.dom.pulldom.CHARACTERS: + if node.data == "SECRET_FLAG": + found_flag = True + assert found_flag == True + + # which also works remotely + global hit_xxe + hit_xxe = False + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + doc = xml.dom.pulldom.parseString(remote_xxe, parser=parser) + assert hit_xxe == False + for event, node in doc: + pass + assert hit_xxe == True + + @staticmethod + def test_dtd(): + # not possible by default + global hit_dtd + hit_dtd = False + + doc = xml.dom.pulldom.parseString(dtd_retrieval) + for event, node in doc: + pass + assert hit_dtd == False + + # but can be turned on + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + doc = xml.dom.pulldom.parseString(dtd_retrieval, parser=parser) + for event, node in doc: + pass + assert hit_dtd == True + +# ============================================================================== +import xml.parsers.expat + +class TestExpat: + # this is the underlying parser implementation used by the rest of the Python + # standard library. But people are probably not using this directly. + + @staticmethod + @expects_timeout + def test_billion_laughs(): + parser = xml.parsers.expat.ParserCreate() + parser.Parse(billion_laughs, True) + + @staticmethod + @expects_timeout + def test_quardratic_blowup(): + parser = xml.parsers.expat.ParserCreate() + parser.Parse(quadratic_blowup, True) + + @staticmethod + def test_ok_xml(): + char_data_recv = [] + def char_data_handler(data): + char_data_recv.append(data) + + parser = xml.parsers.expat.ParserCreate() + parser.CharacterDataHandler = char_data_handler + parser.Parse(ok_xml, True) + + assert char_data_recv == ["hello world"] + + @staticmethod + def test_xxe(): + # not vuln by default + char_data_recv = [] + def char_data_handler(data): + char_data_recv.append(data) + + parser = xml.parsers.expat.ParserCreate() + parser.CharacterDataHandler = char_data_handler + parser.Parse(local_xxe, True) + + assert char_data_recv == [] + + # there might be ways to make it vuln, but I did not investigate futher. + + @staticmethod + def test_dtd(): + # not vuln by default + global hit_dtd + hit_dtd = False + + parser = xml.parsers.expat.ParserCreate() + parser.Parse(dtd_retrieval, True) + assert hit_dtd == False + + # there might be ways to make it vuln, but I did not investigate futher. From 5a652480b1f26eba9ed9c82e338c5058330bf51a Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 17:37:25 +0100 Subject: [PATCH 51/79] Python: Annotate xml.dom tests --- .../query-tests/Security/CWE-611/xml_dom.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py index 428a2d645a1b..55762ece0be5 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py @@ -12,25 +12,25 @@ def xml_minidom_parse(): xml_content = request.args['xml_content'] - return xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNodes + return xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic @app.route("/xml_minidom_parseString") def xml_minidom_parseString(): xml_content = request.args['xml_content'] - return xml.dom.minidom.parseString(xml_content).documentElement.childNodes + return xml.dom.minidom.parseString(xml_content).documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic @app.route("/xml_pulldom_parse") def xml_pulldom_parse(): xml_content = request.args['xml_content'] - return xml.dom.pulldom.parse(StringIO(xml_content))['START_DOCUMENT'][1].documentElement.childNodes + return xml.dom.pulldom.parse(StringIO(xml_content))['START_DOCUMENT'][1].documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic @app.route("/xml_pulldom_parseString") def xml_pulldom_parseString(): xml_content = request.args['xml_content'] - return xml.dom.pulldom.parseString(xml_content)['START_DOCUMENT'][1].documentElement.childNodes + return xml.dom.pulldom.parseString(xml_content)['START_DOCUMENT'][1].documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic # With parsers @@ -40,5 +40,4 @@ def xml_minidom_parse_xml_sax_make_parser(): parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) - return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes - + return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic From 9406a972cdbf24ab8c0e5608490042ffc12b297f Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 17:52:11 +0100 Subject: [PATCH 52/79] Python: Fix vuln detection for xml.minidom with parser arg --- .../semmle/python/frameworks/Xml.qll | 28 +++++++------------ .../CWE-611/XmlEntityInjection.expected | 2 ++ 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 5140915e0792..d7e27f35b0d9 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -302,19 +302,9 @@ private module Xml { } /** - * Gets a call to: - * * `xml.dom.minidom.parse` - * * `xml.dom.pulldom.parse` - * - * Given the following example: + * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`. * - * ```py - * xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNode - * ``` - * - * * `this` would be `xml.dom.minidom.parse(StringIO(xml_content), parser=parser)`. - * * `getAnInput()`'s result would be `StringIO(xml_content)`. - * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`. + * Both of these modules are based on SAX parsers. */ private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { XMLDomParsing() { @@ -326,15 +316,17 @@ private module Xml { .getACall() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { + result in [this.getArg(0), this.getArgByName("string"), this.getArgByName("file")] + } + + DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - exists(XML::XMLParser xmlParser | - xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) - ) + this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - not exists(this.getArgByName("parser")) + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index bf43d01cec1e..b08e7dd727e4 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -242,6 +242,7 @@ subpaths | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | xml_dom.py:39:19:39:25 | ControlFlowNode for request | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | This | xml_dom.py:39:19:39:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | This | xml_etree.py:13:19:13:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | This | xml_etree.py:19:19:19:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | This | xml_etree.py:25:19:25:25 | ControlFlowNode for request | user-provided value | @@ -252,6 +253,7 @@ subpaths | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | From 7cda901da21b814d96c326d8499d6d9b2ca3de9f Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 19:35:33 +0100 Subject: [PATCH 53/79] Python: Add separate query for SimpleXMLRPCServer This was a rough quick-n-dirty query, and should get some qhelp as well at some point. --- .../Security/CWE-611/SimpleXmlRpcServer.ql | 27 +++++++++++++++++ .../semmle/python/frameworks/Xml.qll | 30 ------------------- .../CWE-611/SimpleXmlRpcServer.expected | 1 + .../Security/CWE-611/SimpleXmlRpcServer.qlref | 1 + .../Security/CWE-611/xmlrpc_server.py | 8 +++-- 5 files changed, 34 insertions(+), 33 deletions(-) create mode 100644 python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql new file mode 100644 index 000000000000..0e3deebf6016 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql @@ -0,0 +1,27 @@ +/** + * @name SimpleXMLRPCServer DoS vulnerability + * @description SimpleXMLRPCServer is vulnerable to DoS attacks from untrusted user input + * @kind path-problem + * @problem.severity warning + * @precision high + * @id py/simple-xml-rpc-server + * @tags security + * external/cwe/cwe-776 + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.Concepts +private import experimental.semmle.python.Concepts +private import semmle.python.ApiGraphs + +from DataFlow::CallCfgNode call, string kinds +where + call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and + kinds = + strictconcat(XML::XMLVulnerabilityKind kind | + kind.isBillionLaughs() or kind.isQuadraticBlowup() + | + kind, ", " + ) +select call, "SimpleXMLRPCServer is vulnerable to: " + kinds + "." diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index d7e27f35b0d9..bf481a1f2a34 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -329,34 +329,4 @@ private module Xml { (kind.isBillionLaughs() or kind.isQuadraticBlowup()) } } - - /** - * Gets a call to `xmlrpc.server.SimpleXMLRPCServer`. - * - * Given the following example: - * - * ```py - * server = SimpleXMLRPCServer(("127.0.0.1", 8000)) - * server.register_function(foo, "foo") - * server.serve_forever() - * ``` - * - * * `this` would be `SimpleXMLRPCServer(("127.0.0.1", 8000))`. - * * `getAnInput()`'s result would be `foo`. - * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`. - */ - private class XMLRPCServer extends DataFlow::CallCfgNode, XML::XMLParser::Range { - XMLRPCServer() { - this = - API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() - } - - override DataFlow::Node getAnInput() { - result = this.getAMethodCall("register_function").getArg(0) - } - - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - kind.isBillionLaughs() or kind.isQuadraticBlowup() - } - } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected new file mode 100644 index 000000000000..4a08d61c47af --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected @@ -0,0 +1 @@ +| xmlrpc_server.py:7:10:7:48 | ControlFlowNode for SimpleXMLRPCServer() | SimpleXMLRPCServer is vulnerable to: Billion Laughs, Quadratic Blowup. | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref new file mode 100644 index 000000000000..a0b30e6d69b8 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref @@ -0,0 +1 @@ +experimental/Security/CWE-611/SimpleXmlRpcServer.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py index baa433c4a8ab..83c18b549b3d 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py @@ -1,10 +1,12 @@ from xmlrpc.server import SimpleXMLRPCServer -def foo(n): - return n +def foo(n: str): + print("foo called with arg:", n, type(n)) + return "ok" server = SimpleXMLRPCServer(("127.0.0.1", 8000)) server.register_function(foo, "foo") server.serve_forever() -# billion_laughs -> curl 127.0.0.1:8000 --data-raw ']>foo&lol9;' +# normal: curl 127.0.0.1:8000 --data-raw 'foo42' +# billion_laughs: curl 127.0.0.1:8000 --data-raw ']>foo&lol9;' From 4b03f5c72400106027dd034a95079573904c1a12 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 19:38:31 +0100 Subject: [PATCH 54/79] Python: Rename xml.sax test for consistency --- .../CWE-611/XmlEntityInjection.expected | 102 +++++++++--------- .../{xml_sax_make_parser.py => xml_sax.py} | 0 2 files changed, 51 insertions(+), 51 deletions(-) rename python/ql/test/experimental/query-tests/Security/CWE-611/{xml_sax_make_parser.py => xml_sax.py} (100%) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index b08e7dd727e4..f5f85bf178b8 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -71,25 +71,25 @@ edges | xml_etree.py:60:19:60:25 | ControlFlowNode for request | xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | | xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | | xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content | -| xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:31:19:31:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:31:19:31:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:31:19:31:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:31:19:31:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | +| xml_sax.py:31:19:31:25 | ControlFlowNode for request | xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | +| xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | +| xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | +| xml_sax.py:42:19:42:25 | ControlFlowNode for request | xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | +| xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | +| xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | +| xml_sax.py:63:19:63:25 | ControlFlowNode for request | xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | +| xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | +| xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | +| xml_sax.py:75:19:75:25 | ControlFlowNode for request | xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | +| xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | +| xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | +| xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | +| xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | +| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | +| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | +| xml_sax.py:98:19:98:25 | ControlFlowNode for request | xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | +| xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | +| xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | | xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content | @@ -193,31 +193,31 @@ nodes | xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:42:19:42:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:63:19:63:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:75:19:75:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:86:19:86:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:98:19:98:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | | xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | @@ -250,11 +250,11 @@ subpaths | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | This | xml_etree.py:39:19:39:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax.py:31:19:31:25 | ControlFlowNode for request | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax.py:42:19:42:25 | ControlFlowNode for request | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax.py:42:19:42:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax.py:63:19:63:25 | ControlFlowNode for request | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | xml_sax.py:75:19:75:25 | ControlFlowNode for request | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | This | xml_sax.py:75:19:75:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax.py:98:19:98:25 | ControlFlowNode for request | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax.py:98:19:98:25 | ControlFlowNode for request | user-provided value | | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py rename to python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py From faebaee141c10ace600153d84a2d8d1952beb73a Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 20:36:16 +0100 Subject: [PATCH 55/79] Python: Use concept tests for XML Parsing I was loosing my mind from looking through those .expected files Just going to take it one file at time, to make reviewing easier --- .../XML/ExperimentalXmlConceptsTests.expected | 0 .../XML/ExperimentalXmlConceptsTests.ql | 33 +++++++ .../frameworks/XML/lxml_etree.py | 40 +++++++++ .../Security/CWE-611/lxml_etree.py | 90 ------------------- 4 files changed, 73 insertions(+), 90 deletions(-) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql new file mode 100644 index 000000000000..8ca33765d64f --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql @@ -0,0 +1,33 @@ +import python +import experimental.semmle.python.Concepts +import experimental.semmle.python.frameworks.Xml +import semmle.python.dataflow.new.DataFlow +import TestUtilities.InlineExpectationsTest +private import semmle.python.dataflow.new.internal.PrintNode + +class XmlParsingTest extends InlineExpectationsTest { + XmlParsingTest() { this = "XmlParsingTest" } + + override string getARelevantTag() { result in ["input", "vuln"] } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(location.getFile().getRelativePath()) and + exists(XML::XMLParsing parsing | + exists(DataFlow::Node input | + input = parsing.getAnInput() and + location = input.getLocation() and + element = input.toString() and + value = prettyNodeForInlineTest(input) and + tag = "input" + ) + or + exists(XML::XMLVulnerabilityKind kind | + parsing.vulnerable(kind) and + location = parsing.getLocation() and + element = parsing.toString() and + value = "'" + kind + "'" and + tag = "vuln" + ) + ) + } +} diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py new file mode 100644 index 000000000000..64fa51175837 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -0,0 +1,40 @@ +from io import StringIO +import lxml.etree + +x = "some xml" + +# different parsing methods +lxml.etree.fromstring(x) # $ input=x vuln='XXE' + +lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE' + +lxml.etree.XML(x) # $ input=x vuln='XXE' + +lxml.etree.parse(StringIO(x)).getroot() # $ input=StringIO(..) vuln='XXE' + +# With default parsers (nothing changed) +parser = lxml.etree.XMLParser() +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' + +parser = lxml.etree.get_default_parser() +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' + +# XXE-safe +parser = lxml.etree.XMLParser(resolve_entities=False) +lxml.etree.fromstring(x, parser=parser) # $ input=x + +# XXE-vuln +parser = lxml.etree.XMLParser(resolve_entities=True) +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' + +# Billion laughs vuln (also XXE) +parser = lxml.etree.XMLParser(huge_tree=True) +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' vuln='XXE' + +# Billion laughs, but not XXE +parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +# DTD retrival vuln (also XXE) +parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='DTD retrieval' vuln='XXE' diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py deleted file mode 100644 index 2c2712098503..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py +++ /dev/null @@ -1,90 +0,0 @@ -from flask import request, Flask -from io import StringIO, BytesIO -import lxml.etree - -app = Flask(__name__) - -# Parsing - -@app.route("/lxml_etree_fromstring") -def lxml_etree_fromstring(): - xml_content = request.args['xml_content'] - - return lxml.etree.fromstring(xml_content).text # NOT OK for XXE - -@app.route("/lxml_etree_fromstringlist") -def lxml_etree_fromstringlist(): - xml_content = request.args['xml_content'] - - return lxml.etree.fromstringlist([xml_content]).text # NOT OK for XXE - -@app.route("/lxml_etree_XML") -def lxml_etree_XML(): - xml_content = request.args['xml_content'] - - return lxml.etree.XML(xml_content).text # NOT OK for XXE - -@app.route("/lxml_etree_parse") -def lxml_etree_parse(): - xml_content = request.args['xml_content'] - - return lxml.etree.parse(StringIO(xml_content)).getroot().text # NOT OK for XXE - -# With parsers - Default - -@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser() - return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE - -@app.route("/lxml_etree_fromstring-lxml.etree.get_default_parser") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.get_default_parser() - return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE - -# With parsers - With options - -# XXE-safe -@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser(resolve_entities=False) - return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE - -# XXE-vuln -@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser(resolve_entities=True) - return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE - -# Billion laughs and quadratic blowup (huge_tree) - -@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) - return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE, NOT OK for billion laughs/quadratic - -@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser(huge_tree=True) - return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE, NOT OK for billion laughs/quadratic - -# DTD retrival - -@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser(resolve_entities=False, load_dtd=True, no_network=False) - return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for DTD, OK for rest From a7134cac2eb339c76f3f299c77b927742e5e0320 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 20:39:56 +0100 Subject: [PATCH 56/79] Python: Port xml.dom tests --- .../library-tests/frameworks/XML/xml_dom.py | 19 ++++++++ .../query-tests/Security/CWE-611/xml_dom.py | 43 ------------------- 2 files changed, 19 insertions(+), 43 deletions(-) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py new file mode 100644 index 000000000000..ade6ece910d8 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py @@ -0,0 +1,19 @@ +from io import StringIO +import xml.dom.minidom +import xml.dom.pulldom +import xml.sax + +x = "some xml" + +# minidom +xml.dom.minidom.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.minidom.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +# pulldom +xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/) +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, True) +xml.dom.minidom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py deleted file mode 100644 index 55762ece0be5..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py +++ /dev/null @@ -1,43 +0,0 @@ -from flask import request, Flask -from io import StringIO, BytesIO -import xml.dom.minidom -import xml.dom.pulldom -import xml.sax - -app = Flask(__name__) - -# Parsing - -@app.route("/xml_minidom_parse") -def xml_minidom_parse(): - xml_content = request.args['xml_content'] - - return xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic - -@app.route("/xml_minidom_parseString") -def xml_minidom_parseString(): - xml_content = request.args['xml_content'] - - return xml.dom.minidom.parseString(xml_content).documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic - -@app.route("/xml_pulldom_parse") -def xml_pulldom_parse(): - xml_content = request.args['xml_content'] - - return xml.dom.pulldom.parse(StringIO(xml_content))['START_DOCUMENT'][1].documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic - -@app.route("/xml_pulldom_parseString") -def xml_pulldom_parseString(): - xml_content = request.args['xml_content'] - - return xml.dom.pulldom.parseString(xml_content)['START_DOCUMENT'][1].documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic - -# With parsers - -@app.route("/xml_minidom_parse_xml_sax_make_parser") -def xml_minidom_parse_xml_sax_make_parser(): - xml_content = request.args['xml_content'] - - parser = xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) - return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic From 5fb4c4d1524f8a6bae5a8a3ff1c40b35b66f0998 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 20:50:45 +0100 Subject: [PATCH 57/79] Python: Port xml.etree tests --- .../library-tests/frameworks/XML/xml_etree.py | 19 ++++++ .../Security/CWE-611/dont_extract/PoC.py | 17 +++++ .../query-tests/Security/CWE-611/xml_etree.py | 64 ------------------- 3 files changed, 36 insertions(+), 64 deletions(-) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py new file mode 100644 index 000000000000..e2b81b3ad529 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -0,0 +1,19 @@ +from io import StringIO +import xml.etree.ElementTree + +x = "some xml" + +# Parsing in different ways +xml.etree.ElementTree.fromstring(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.fromstringlist(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.XML(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +# With parsers (no options available to disable/enable security features) +parser = xml.etree.ElementTree.XMLParser() +xml.etree.ElementTree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +# note: it's technically possible to use the thing wrapper func `fromstring` with an +# `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it +# seems very unlikely that anyone would do this, so we have intentionally not added any +# tests for this. diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py index 862346de3e8a..b38ff9889e9d 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py @@ -250,6 +250,23 @@ def test_ok_xml(): assert root.tag == "test" assert root.text == "hello world" + @staticmethod + def test_ok_xml_sax_parser(): + # you _can_ pass a SAX parser to xml.etree... but it doesn't give you the output :| + parser = xml.sax.make_parser() + root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser) + assert root == None + + @staticmethod + def test_ok_xml_lxml_parser(): + # this is technically possible, since parsers follow the same API, and the + # `fromstring` function is just a thin wrapper... seems very unlikely that + # anyone would do this though :| + parser = lxml.etree.XMLParser() + root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser) + assert root.tag == "test" + assert root.text == "hello world" + @staticmethod def test_xxe_not_possible(): parser = xml.etree.ElementTree.XMLParser() diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py deleted file mode 100644 index b9c980045e2a..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py +++ /dev/null @@ -1,64 +0,0 @@ -from flask import request, Flask -from io import StringIO, BytesIO -import xml.etree -import xml.etree.ElementTree -import lxml.etree - -app = Flask(__name__) - -# Parsing - -@app.route("/xml_etree_fromstring") -def xml_etree_fromstring(): - xml_content = request.args['xml_content'] - - return xml.etree.ElementTree.fromstring(xml_content).text - -@app.route("/xml_etree_fromstringlist") -def xml_etree_fromstringlist(): - xml_content = request.args['xml_content'] - - return xml.etree.ElementTree.fromstringlist(xml_content).text - -@app.route("/xml_etree_XML") -def xml_etree_XML(): - xml_content = request.args['xml_content'] - - return xml.etree.ElementTree.XML(xml_content).text - -@app.route("/xml_etree_parse") -def xml_etree_parse(): - xml_content = request.args['xml_content'] - - return xml.etree.ElementTree.parse(StringIO(xml_content)).getroot().text - -# With parsers - -@app.route("/xml_etree_fromstring-xml_etree_XMLParser") -def xml_parser_1(): - xml_content = request.args['xml_content'] - - parser = xml.etree.ElementTree.XMLParser() - return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text - -@app.route("/xml_etree_fromstring-lxml_etree_XMLParser") -def xml_parser_2(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser() - return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text - -@app.route("/xml_etree_fromstring-lxml_get_default_parser") -def xml_parser_3(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.get_default_parser() - return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text - -@app.route("/xml_etree_fromstring-lxml_get_default_parser") -def xml_parser_4(): - xml_content = request.args['xml_content'] - - parser = xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) - return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text \ No newline at end of file From 0b12d918171ee00b8a40f576d75c65b38193ebf0 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 20:57:04 +0100 Subject: [PATCH 58/79] Python: Port xml.sax tests --- .../library-tests/frameworks/XML/xml_sax.py | 47 ++++++++ .../query-tests/Security/CWE-611/xml_sax.py | 104 ------------------ 2 files changed, 47 insertions(+), 104 deletions(-) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py new file mode 100644 index 000000000000..47f6600b153e --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py @@ -0,0 +1,47 @@ +from io import StringIO +import xml.sax + +x = "some xml" + +class MainHandler(xml.sax.ContentHandler): + def __init__(self): + self._result = [] + + def characters(self, data): + self._result.append(data) + + def parse(self, f): + xml.sax.parse(f, self) # $ MISSING: input=f vuln='Billion Laughs' vuln='Quadratic Blowup' + self._result + +MainHandler().parse(StringIO(x)) + +parser = xml.sax.make_parser() +parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +# You can make it vuln to both XXE and DTD retrieval by setting this flag +# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, True) +parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, False) +parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +# Forward Type Tracking test + +def func(cond): + parser = xml.sax.make_parser() + if cond: + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + else: + parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +# make it vuln, then making it safe +# a bit of an edge-case, but is nice to be able to handle. +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, True) +parser.setFeature(xml.sax.handler.feature_external_ges, False) +parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py deleted file mode 100644 index e95abf753796..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py +++ /dev/null @@ -1,104 +0,0 @@ -from flask import request, Flask -from io import StringIO -import xml.sax - -app = Flask(__name__) - - -class MainHandler(xml.sax.ContentHandler): - def __init__(self): - self._result = [] - - def characters(self, data): - self._result.append(data) - - def parse(self, f): - xml.sax.parse(f, self) # OK for XXE/DTD, NOT OK for billion laughs/quadratic - return self._result - -# GOOD - - -@app.route("/MainHandler") -def mainHandler(): - xml_content = request.args['xml_content'] - - return MainHandler().parse(StringIO(xml_content)) - - -@app.route("/xml.sax.make_parser()+MainHandler") -def xml_makeparser_MainHandler(): - xml_content = request.args['xml_content'] - - GoodHandler = MainHandler() - parser = xml.sax.make_parser() - parser.setContentHandler(GoodHandler) - parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic - return GoodHandler._result - - -@app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_False") -def xml_makeparser_MainHandler_entitiesFalse(): - xml_content = request.args['xml_content'] - - GoodHandler = MainHandler() - parser = xml.sax.make_parser() - parser.setContentHandler(GoodHandler) - # https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges - parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(xml_content)) # # OK for XXE/DTD, NOT OK for billion laughs/quadratic - return GoodHandler._result - -@app.route("not-user-controlled") -def not_user_controlled(): - parser = xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse("/not-user-controlled/default_config.xml") # OK - return - -# BAD - -@app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_True") -def xml_makeparser_MainHandler_entitiesTrue(): - xml_content = request.args['xml_content'] - - BadHandler = MainHandler() - parser = xml.sax.make_parser() - parser.setContentHandler(BadHandler) - parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(xml_content)) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic - return BadHandler._result - - -@app.route("/xml.sax.make_parser()+xml.dom.minidom.parse-xml.sax.handler.feature_external_ges_True") -def xml_makeparser_minidom_entitiesTrue(): - xml_content = request.args['xml_content'] - - parser = xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) - doc = xml.dom.minidom.parse(StringIO(xml_content), parser=parser) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic - return doc.documentElement.childNodes - -# Forward Type Tracking test - -@app.route("forward_tracking1") -def forward_tracking1(action): - xml_content = request.args['xml_content'] - - parser = xml.sax.make_parser() - if action == 'load-config': - parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(xml_content)) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic - else: - parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic - return - -@app.route("forward_tracking2") -def forward_tracking2(action): - xml_content = request.args['xml_content'] - - parser = xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic - return From c739ae40b60ef5644d0c0e9c1a8238742f2207e2 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 20:59:00 +0100 Subject: [PATCH 59/79] Python: Port `xmltodict` tests --- .../library-tests/frameworks/XML/xmltodict.py | 6 ++++++ .../query-tests/Security/CWE-611/xml_to_dict.py | 17 ----------------- 2 files changed, 6 insertions(+), 17 deletions(-) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py new file mode 100644 index 000000000000..ee0b38719125 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py @@ -0,0 +1,6 @@ +import xmltodict + +x = "some xml" + +xmltodict.parse(x) # $ input=x +xmltodict.parse(x, disable_entities=False) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py deleted file mode 100644 index 8f43d2e1cc1f..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py +++ /dev/null @@ -1,17 +0,0 @@ -from flask import request, Flask -from io import StringIO, BytesIO -import xmltodict - -app = Flask(__name__) - -@app.route("/xmltodict.parse") -def xmltodict_parse(): - xml_content = request.args['xml_content'] - - return xmltodict.parse(xml_content) # OK - -@app.route("/xmltodict.parse2") -def xmltodict_parse2(): - xml_content = request.args['xml_content'] - - return xmltodict.parse(xml_content, disable_entities=False) # NOT OK for billion laughs/quadratic From 2451123c6712e566fe0256f9349952e8ef738cd2 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 20:59:45 +0100 Subject: [PATCH 60/79] Python: Move XML PoC to new test dir --- .../dont_extract => library-tests/frameworks/XML/poc}/PoC.py | 0 .../dont_extract => library-tests/frameworks/XML/poc}/flag | 0 .../library-tests/frameworks/XML/poc/this-dir-is-not-extracted | 1 + 3 files changed, 1 insertion(+) rename python/ql/test/experimental/{query-tests/Security/CWE-611/dont_extract => library-tests/frameworks/XML/poc}/PoC.py (100%) rename python/ql/test/experimental/{query-tests/Security/CWE-611/dont_extract => library-tests/frameworks/XML/poc}/flag (100%) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py rename to python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag b/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag rename to python/ql/test/experimental/library-tests/frameworks/XML/poc/flag diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted b/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted new file mode 100644 index 000000000000..b1925ade1d3a --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted @@ -0,0 +1 @@ +just FYI From 32787939721e9478e4075b6c7d2f10a96b2a2cb1 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:05:44 +0100 Subject: [PATCH 61/79] Python: Handle more functions and kw-args --- .../semmle/python/frameworks/Xml.qll | 81 ++++++++++++++++--- .../frameworks/XML/lxml_etree.py | 9 ++- .../library-tests/frameworks/XML/xml_dom.py | 12 +++ .../library-tests/frameworks/XML/xml_etree.py | 16 +++- .../library-tests/frameworks/XML/xml_sax.py | 10 +-- .../library-tests/frameworks/XML/xmltodict.py | 2 + 6 files changed, 114 insertions(+), 16 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index bf481a1f2a34..b0e7592c3936 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -53,11 +53,21 @@ private module Xml { API::moduleImport("xml") .getMember("etree") .getMember("ElementTree") - .getMember(["fromstring", "fromstringlist", "XML", "parse"]) + .getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"]) .getACall() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // fromstring / XML / XMLID + this.getArgByName("text"), + // fromstringlist + this.getArgByName("sequence"), + // parse / iterparse + this.getArgByName("source"), + ] + } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { not exists(this.getArgByName("parser")) and @@ -163,8 +173,8 @@ private module Xml { * parsed_xml = BadHandler._result * ``` */ - private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { - XMLSaxParsing() { + private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLSaxInstanceParsing() { this = API::moduleImport("xml") .getMember("sax") @@ -174,7 +184,40 @@ private module Xml { .getACall() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + // always vuln to these + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + or + // can be vuln to other things if features has been turned on + this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) + } + } + + /** + * A call to either `parse` or `parseString` from `xml.sax` module. + * + * See: + * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse + * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString + */ + private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLSaxParsing() { + this = + API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // parseString + this.getArgByName("string"), + // parse + this.getArgByName("source"), + ] + } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { // always vuln to these @@ -262,11 +305,21 @@ private module Xml { this = API::moduleImport("lxml") .getMember("etree") - .getMember(["fromstring", "fromstringlist", "XML", "parse"]) + .getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"]) .getACall() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // fromstring / XML + this.getArgByName("text"), + // fromstringlist + this.getArgByName("strings"), + // parse / parseid + this.getArgByName("source"), + ] + } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { exists(XML::XMLParser xmlParser | @@ -293,7 +346,9 @@ private module Xml { private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { + result in [this.getArg(0), this.getArgByName("xml_input")] + } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and @@ -317,7 +372,15 @@ private module Xml { } override DataFlow::Node getAnInput() { - result in [this.getArg(0), this.getArgByName("string"), this.getArgByName("file")] + result in [ + this.getArg(0), + // parseString + this.getArgByName("string"), + // minidom.parse + this.getArgByName("file"), + // pulldom.parse + this.getArgByName("stream_or_string"), + ] } DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index 64fa51175837..3e6e6fb08e7f 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -5,12 +5,19 @@ # different parsing methods lxml.etree.fromstring(x) # $ input=x vuln='XXE' +lxml.etree.fromstring(text=x) # $ input=x vuln='XXE' lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE' +lxml.etree.fromstringlist(strings=[x]) # $ input=List vuln='XXE' lxml.etree.XML(x) # $ input=x vuln='XXE' +lxml.etree.XML(text=x) # $ input=x vuln='XXE' -lxml.etree.parse(StringIO(x)).getroot() # $ input=StringIO(..) vuln='XXE' +lxml.etree.parse(StringIO(x)) # $ input=StringIO(..) vuln='XXE' +lxml.etree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE' + +lxml.etree.parseid(StringIO(x)) # $ input=StringIO(..) vuln='XXE' +lxml.etree.parseid(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE' # With default parsers (nothing changed) parser = lxml.etree.XMLParser() diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py index ade6ece910d8..7dce29fc7b9d 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py @@ -7,13 +7,25 @@ # minidom xml.dom.minidom.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.minidom.parse(file=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + xml.dom.minidom.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.minidom.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + # pulldom xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + # These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/) parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) +xml.dom.minidom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' xml.dom.minidom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + +xml.dom.pulldom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' +xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py index e2b81b3ad529..23ac3784cbc3 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -5,9 +5,23 @@ # Parsing in different ways xml.etree.ElementTree.fromstring(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.fromstringlist(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.fromstring(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.etree.ElementTree.fromstringlist([x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup' + xml.etree.ElementTree.XML(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.XML(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.etree.ElementTree.XMLID(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.XMLID(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + xml.etree.ElementTree.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.etree.ElementTree.iterparse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + # With parsers (no options available to disable/enable security features) parser = xml.etree.ElementTree.XMLParser() diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py index 47f6600b153e..89bbec3f1f57 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py @@ -10,14 +10,15 @@ def __init__(self): def characters(self, data): self._result.append(data) - def parse(self, f): - xml.sax.parse(f, self) # $ MISSING: input=f vuln='Billion Laughs' vuln='Quadratic Blowup' - self._result +xml.sax.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.sax.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -MainHandler().parse(StringIO(x)) +xml.sax.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.sax.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' parser = xml.sax.make_parser() parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' # You can make it vuln to both XXE and DTD retrieval by setting this flag # see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges @@ -30,7 +31,6 @@ def parse(self, f): parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' # Forward Type Tracking test - def func(cond): parser = xml.sax.make_parser() if cond: diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py index ee0b38719125..473e51c9fe66 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py @@ -3,4 +3,6 @@ x = "some xml" xmltodict.parse(x) # $ input=x +xmltodict.parse(xml_input=x) # $ input=x + xmltodict.parse(x, disable_entities=False) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' From f72f673e7ee82e5fd4156d2d6a5a4e8144d371d7 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:09:29 +0100 Subject: [PATCH 62/79] Python: Update `XmlEntityInjection.expected` I had forgotten about this, but better late than never... also added a small representative test --- .../CWE-611/XmlEntityInjection.expected | 279 ++---------------- .../query-tests/Security/CWE-611/test.py | 30 ++ 2 files changed, 53 insertions(+), 256 deletions(-) create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/test.py diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index f5f85bf178b8..25594b4ddaaf 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -1,260 +1,27 @@ edges -| lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:11:19:11:30 | ControlFlowNode for Attribute | -| lxml_etree.py:11:19:11:30 | ControlFlowNode for Attribute | lxml_etree.py:11:19:11:45 | ControlFlowNode for Subscript | -| lxml_etree.py:11:19:11:45 | ControlFlowNode for Subscript | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | -| lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:17:19:17:30 | ControlFlowNode for Attribute | -| lxml_etree.py:17:19:17:30 | ControlFlowNode for Attribute | lxml_etree.py:17:19:17:45 | ControlFlowNode for Subscript | -| lxml_etree.py:17:19:17:45 | ControlFlowNode for Subscript | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | -| lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:23:19:23:30 | ControlFlowNode for Attribute | -| lxml_etree.py:23:19:23:30 | ControlFlowNode for Attribute | lxml_etree.py:23:19:23:45 | ControlFlowNode for Subscript | -| lxml_etree.py:23:19:23:45 | ControlFlowNode for Subscript | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | -| lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:29:19:29:30 | ControlFlowNode for Attribute | -| lxml_etree.py:29:19:29:30 | ControlFlowNode for Attribute | lxml_etree.py:29:19:29:45 | ControlFlowNode for Subscript | -| lxml_etree.py:29:19:29:45 | ControlFlowNode for Subscript | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | -| lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:37:19:37:30 | ControlFlowNode for Attribute | -| lxml_etree.py:37:19:37:30 | ControlFlowNode for Attribute | lxml_etree.py:37:19:37:45 | ControlFlowNode for Subscript | -| lxml_etree.py:37:19:37:45 | ControlFlowNode for Subscript | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | -| lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:44:19:44:30 | ControlFlowNode for Attribute | -| lxml_etree.py:44:19:44:30 | ControlFlowNode for Attribute | lxml_etree.py:44:19:44:45 | ControlFlowNode for Subscript | -| lxml_etree.py:44:19:44:45 | ControlFlowNode for Subscript | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | -| lxml_etree.py:54:19:54:25 | ControlFlowNode for request | lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | -| lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | -| lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content | -| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | -| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | -| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | -| lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | -| lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | -| lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | -| lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | -| lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | -| lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | -| lxml_etree.py:87:19:87:25 | ControlFlowNode for request | lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | -| lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | -| lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | -| xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | -| xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | -| xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | -| xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:19:19:19:30 | ControlFlowNode for Attribute | -| xml_dom.py:19:19:19:30 | ControlFlowNode for Attribute | xml_dom.py:19:19:19:45 | ControlFlowNode for Subscript | -| xml_dom.py:19:19:19:45 | ControlFlowNode for Subscript | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | -| xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:25:19:25:30 | ControlFlowNode for Attribute | -| xml_dom.py:25:19:25:30 | ControlFlowNode for Attribute | xml_dom.py:25:19:25:45 | ControlFlowNode for Subscript | -| xml_dom.py:25:19:25:45 | ControlFlowNode for Subscript | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | -| xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:31:19:31:30 | ControlFlowNode for Attribute | -| xml_dom.py:31:19:31:30 | ControlFlowNode for Attribute | xml_dom.py:31:19:31:45 | ControlFlowNode for Subscript | -| xml_dom.py:31:19:31:45 | ControlFlowNode for Subscript | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | -| xml_dom.py:39:19:39:25 | ControlFlowNode for request | xml_dom.py:39:19:39:30 | ControlFlowNode for Attribute | -| xml_dom.py:39:19:39:30 | ControlFlowNode for Attribute | xml_dom.py:39:19:39:45 | ControlFlowNode for Subscript | -| xml_dom.py:39:19:39:45 | ControlFlowNode for Subscript | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | -| xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:13:19:13:30 | ControlFlowNode for Attribute | -| xml_etree.py:13:19:13:30 | ControlFlowNode for Attribute | xml_etree.py:13:19:13:45 | ControlFlowNode for Subscript | -| xml_etree.py:13:19:13:45 | ControlFlowNode for Subscript | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | -| xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:19:19:19:30 | ControlFlowNode for Attribute | -| xml_etree.py:19:19:19:30 | ControlFlowNode for Attribute | xml_etree.py:19:19:19:45 | ControlFlowNode for Subscript | -| xml_etree.py:19:19:19:45 | ControlFlowNode for Subscript | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | -| xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:25:19:25:30 | ControlFlowNode for Attribute | -| xml_etree.py:25:19:25:30 | ControlFlowNode for Attribute | xml_etree.py:25:19:25:45 | ControlFlowNode for Subscript | -| xml_etree.py:25:19:25:45 | ControlFlowNode for Subscript | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | -| xml_etree.py:31:19:31:25 | ControlFlowNode for request | xml_etree.py:31:19:31:30 | ControlFlowNode for Attribute | -| xml_etree.py:31:19:31:30 | ControlFlowNode for Attribute | xml_etree.py:31:19:31:45 | ControlFlowNode for Subscript | -| xml_etree.py:31:19:31:45 | ControlFlowNode for Subscript | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | -| xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:39:19:39:30 | ControlFlowNode for Attribute | -| xml_etree.py:39:19:39:30 | ControlFlowNode for Attribute | xml_etree.py:39:19:39:45 | ControlFlowNode for Subscript | -| xml_etree.py:39:19:39:45 | ControlFlowNode for Subscript | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | -| xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:46:19:46:30 | ControlFlowNode for Attribute | -| xml_etree.py:46:19:46:30 | ControlFlowNode for Attribute | xml_etree.py:46:19:46:45 | ControlFlowNode for Subscript | -| xml_etree.py:46:19:46:45 | ControlFlowNode for Subscript | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | -| xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:53:19:53:30 | ControlFlowNode for Attribute | -| xml_etree.py:53:19:53:30 | ControlFlowNode for Attribute | xml_etree.py:53:19:53:45 | ControlFlowNode for Subscript | -| xml_etree.py:53:19:53:45 | ControlFlowNode for Subscript | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | -| xml_etree.py:60:19:60:25 | ControlFlowNode for request | xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | -| xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | -| xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content | -| xml_sax.py:31:19:31:25 | ControlFlowNode for request | xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | -| xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | -| xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | -| xml_sax.py:42:19:42:25 | ControlFlowNode for request | xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | -| xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | -| xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | -| xml_sax.py:63:19:63:25 | ControlFlowNode for request | xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | -| xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | -| xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | -| xml_sax.py:75:19:75:25 | ControlFlowNode for request | xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | -| xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | -| xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | -| xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | -| xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | -| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | -| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | -| xml_sax.py:98:19:98:25 | ControlFlowNode for request | xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | -| xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | -| xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | -| xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | -| xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | -| xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content | -| xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:15:19:15:30 | ControlFlowNode for Attribute | -| xml_to_dict.py:15:19:15:30 | ControlFlowNode for Attribute | xml_to_dict.py:15:19:15:45 | ControlFlowNode for Subscript | -| xml_to_dict.py:15:19:15:45 | ControlFlowNode for Subscript | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | +| test.py:8:19:8:25 | ControlFlowNode for request | test.py:8:19:8:30 | ControlFlowNode for Attribute | +| test.py:8:19:8:30 | ControlFlowNode for Attribute | test.py:8:19:8:45 | ControlFlowNode for Subscript | +| test.py:8:19:8:45 | ControlFlowNode for Subscript | test.py:9:34:9:44 | ControlFlowNode for xml_content | +| test.py:13:19:13:25 | ControlFlowNode for request | test.py:13:19:13:30 | ControlFlowNode for Attribute | +| test.py:13:19:13:30 | ControlFlowNode for Attribute | test.py:13:19:13:45 | ControlFlowNode for Subscript | +| test.py:13:19:13:45 | ControlFlowNode for Subscript | test.py:15:34:15:44 | ControlFlowNode for xml_content | +| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute | +| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript | +| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content | nodes -| lxml_etree.py:11:19:11:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:11:19:11:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:11:19:11:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:17:19:17:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:17:19:17:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:17:19:17:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | semmle.label | ControlFlowNode for List | -| lxml_etree.py:23:19:23:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:23:19:23:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:23:19:23:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:29:19:29:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:29:19:29:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:29:19:29:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| lxml_etree.py:37:19:37:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:37:19:37:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:37:19:37:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:44:19:44:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:44:19:44:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:44:19:44:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:54:19:54:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:71:19:71:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:78:19:78:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:87:19:87:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_dom.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_dom.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_dom.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_dom.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_dom.py:25:19:25:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_dom.py:25:19:25:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_dom.py:25:19:25:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_dom.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_dom.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_dom.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_dom.py:39:19:39:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_dom.py:39:19:39:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_dom.py:39:19:39:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_etree.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_etree.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_etree.py:25:19:25:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:25:19:25:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:25:19:25:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_etree.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_etree.py:39:19:39:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:39:19:39:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:39:19:39:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_etree.py:46:19:46:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:46:19:46:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:46:19:46:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_etree.py:53:19:53:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:53:19:53:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:53:19:53:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_etree.py:60:19:60:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_sax.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax.py:42:19:42:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax.py:63:19:63:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax.py:75:19:75:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax.py:86:19:86:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax.py:98:19:98:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_to_dict.py:15:19:15:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_to_dict.py:15:19:15:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| test.py:8:19:8:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:8:19:8:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:8:19:8:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:9:34:9:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| test.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:15:34:15:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | subpaths #select -| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | This | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | This | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | This | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | This | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | This | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | lxml_etree.py:87:19:87:25 | ControlFlowNode for request | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: DTD retrieval. | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | This | lxml_etree.py:87:19:87:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | xml_dom.py:39:19:39:25 | ControlFlowNode for request | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | This | xml_dom.py:39:19:39:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | This | xml_etree.py:13:19:13:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | This | xml_etree.py:19:19:19:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | This | xml_etree.py:25:19:25:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | xml_etree.py:31:19:31:25 | ControlFlowNode for request | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | This | xml_etree.py:31:19:31:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | This | xml_etree.py:39:19:39:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax.py:31:19:31:25 | ControlFlowNode for request | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax.py:31:19:31:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax.py:42:19:42:25 | ControlFlowNode for request | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax.py:42:19:42:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax.py:63:19:63:25 | ControlFlowNode for request | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax.py:63:19:63:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | xml_sax.py:75:19:75:25 | ControlFlowNode for request | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | This | xml_sax.py:75:19:75:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax.py:98:19:98:25 | ControlFlowNode for request | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax.py:98:19:98:25 | ControlFlowNode for request | user-provided value | -| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | +| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:8:19:8:25 | ControlFlowNode for request | test.py:9:34:9:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | test.py:9:34:9:44 | ControlFlowNode for xml_content | This | test.py:8:19:8:25 | ControlFlowNode for request | user-provided value | +| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | test.py:30:34:30:44 | ControlFlowNode for xml_content | This | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/test.py b/python/ql/test/experimental/query-tests/Security/CWE-611/test.py new file mode 100644 index 000000000000..d9181c4cf346 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/test.py @@ -0,0 +1,30 @@ +from flask import Flask, request +import lxml.etree + +app = Flask(__name__) + +@app.route("/vuln-handler") +def vuln_handler(): + xml_content = request.args['xml_content'] + return lxml.etree.fromstring(xml_content).text + +@app.route("/safe-handler") +def safe_handler(): + xml_content = request.args['xml_content'] + parser = lxml.etree.XMLParser(resolve_entities=False) + return lxml.etree.fromstring(xml_content, parser=parser).text + +@app.route("/super-vuln-handler") +def super_vuln_handler(): + xml_content = request.args['xml_content'] + parser = lxml.etree.XMLParser( + # allows XXE + resolve_entities=True, + # allows remote XXE + no_network=False, + # together with `no_network=False`, allows DTD-retrival + load_dtd=True, + # allows DoS attacks + huge_tree=True, + ) + return lxml.etree.fromstring(xml_content, parser=parser).text From 33ebcdf43715f10d1deafc493e9fd568f7b78bea Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:26:06 +0100 Subject: [PATCH 63/79] Python: Support feed method of lxml/xml.etree Parsers --- .../semmle/python/frameworks/Xml.qll | 50 +++++++++++++++++++ .../frameworks/XML/lxml_etree.py | 6 +++ .../library-tests/frameworks/XML/xml_etree.py | 6 +++ 3 files changed, 62 insertions(+) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index b0e7592c3936..caf5a3b434ab 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -79,6 +79,28 @@ private module Xml { } } + /** + * A call to the `feed` method of an `xml.etree` parser. + */ + private class XMLEtreeParserFeedCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLEtreeParserFeedCall() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLParser") + .getReturn() + .getMember("feed") + .getACall() + } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + kind.isBillionLaughs() or kind.isQuadraticBlowup() + } + } + /** * A call to the `setFeature` method on a XML sax parser. * @@ -322,6 +344,7 @@ private module Xml { } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + // TODO: This should be done with type-tracking exists(XML::XMLParser xmlParser | xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) ) @@ -330,6 +353,33 @@ private module Xml { } } + /** + * A call to the `feed` method of an `lxml.etree` parser. + */ + private class LXMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + LXMLEtreeParserFeedCall() { + exists(API::Node parserInstance | + parserInstance = + API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getReturn() + or + parserInstance = + API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getReturn() + | + this = parserInstance.getMember("feed").getACall() + ) + } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + // TODO: This should be done with type-tracking + exists(XML::XMLParser xmlParser | + xmlParser = this.getObject().getALocalSource() and + xmlParser.vulnerable(kind) + ) + } + } + /** * Gets a call to `xmltodict.parse`. * diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index 3e6e6fb08e7f..db8b667e70a5 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -26,6 +26,12 @@ parser = lxml.etree.get_default_parser() lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' +# manual use of feed method +parser = lxml.etree.XMLParser() +parser.feed(x) # $ input=x vuln='XXE' +parser.feed(data=x) # $ input=x vuln='XXE' +parser.close() + # XXE-safe parser = lxml.etree.XMLParser(resolve_entities=False) lxml.etree.fromstring(x, parser=parser) # $ input=x diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py index 23ac3784cbc3..3220d95c624c 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -27,6 +27,12 @@ parser = xml.etree.ElementTree.XMLParser() xml.etree.ElementTree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +# manual use of feed method +parser = xml.etree.ElementTree.XMLParser() +parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.close() + # note: it's technically possible to use the thing wrapper func `fromstring` with an # `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it # seems very unlikely that anyone would do this, so we have intentionally not added any From 46238d5ea049e5b51f99f4b66366957852a649c8 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:27:52 +0100 Subject: [PATCH 64/79] Python: Add test for XMLPullParser But handling this in a nice way will require some restructuring --- .../experimental/library-tests/frameworks/XML/xml_etree.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py index 3220d95c624c..ee452c11853d 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -33,6 +33,12 @@ parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' parser.close() +# manual use of feed method on XMLPullParser +parser = xml.etree.ElementTree.XMLPullParser() +parser.feed(x) # $ MISSING: input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.feed(data=x) # $ MISSING: input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.close() + # note: it's technically possible to use the thing wrapper func `fromstring` with an # `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it # seems very unlikely that anyone would do this, so we have intentionally not added any From de0e67f327de078af5c6574445e82f7574f52984 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:31:15 +0100 Subject: [PATCH 65/79] Python: Restructure overall XML modeling --- .../semmle/python/frameworks/Xml.qll | 82 ++++++++++--------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index caf5a3b434ab..55f45df99ca7 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -8,7 +8,7 @@ private import semmle.python.dataflow.new.DataFlow private import experimental.semmle.python.Concepts private import semmle.python.ApiGraphs -private module Xml { +private module XmlEtree { /** * Gets a call to `xml.etree.ElementTree.XMLParser`. */ @@ -100,7 +100,9 @@ private module Xml { kind.isBillionLaughs() or kind.isQuadraticBlowup() } } +} +private module SaxBasedParsing { /** * A call to the `setFeature` method on a XML sax parser. * @@ -251,6 +253,45 @@ private module Xml { } } + /** + * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`. + * + * Both of these modules are based on SAX parsers. + */ + private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLDomParsing() { + this = + API::moduleImport("xml") + .getMember("dom") + .getMember(["minidom", "pulldom"]) + .getMember(["parse", "parseString"]) + .getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // parseString + this.getArgByName("string"), + // minidom.parse + this.getArgByName("file"), + // pulldom.parse + this.getArgByName("stream_or_string"), + ] + } + + DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) + or + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + } + } +} + +private module Lxml { /** * A call to `lxml.etree.get_default_parser`. * @@ -379,7 +420,9 @@ private module Xml { ) } } +} +private module Xmltodict { /** * Gets a call to `xmltodict.parse`. * @@ -405,41 +448,4 @@ private module Xml { this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) } } - - /** - * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`. - * - * Both of these modules are based on SAX parsers. - */ - private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { - XMLDomParsing() { - this = - API::moduleImport("xml") - .getMember("dom") - .getMember(["minidom", "pulldom"]) - .getMember(["parse", "parseString"]) - .getACall() - } - - override DataFlow::Node getAnInput() { - result in [ - this.getArg(0), - // parseString - this.getArgByName("string"), - // minidom.parse - this.getArgByName("file"), - // pulldom.parse - this.getArgByName("stream_or_string"), - ] - } - - DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and - (kind.isXxe() or kind.isDtdRetrieval()) - or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - } - } } From a033b71eaf16dce055696ca7f1485c7f079ad2ed Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:34:46 +0100 Subject: [PATCH 66/79] Python: Align QLdocs of XML modeling --- .../semmle/python/frameworks/Xml.qll | 72 +++++-------------- 1 file changed, 18 insertions(+), 54 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 55f45df99ca7..6c3b86f426e3 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -10,7 +10,7 @@ private import semmle.python.ApiGraphs private module XmlEtree { /** - * Gets a call to `xml.etree.ElementTree.XMLParser`. + * A call to `xml.etree.ElementTree.XMLParser`. */ private class XMLEtreeParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { XMLEtreeParser() { @@ -30,22 +30,13 @@ private module XmlEtree { } /** - * Gets a call to: - * * `xml.etree.ElementTree.fromstring` - * * `xml.etree.ElementTree.fromstringlist` - * * `xml.etree.ElementTree.XML` - * * `xml.etree.ElementTree.parse` - * - * Given the following example: - * - * ```py - * parser = lxml.etree.XMLParser() - * xml.etree.ElementTree.fromstring(xml_content, parser=parser).text - * ``` - * - * * `this` would be `xml.etree.ElementTree.fromstring(xml_content, parser=parser)`. - * * `getAnInput()`'s result would be `xml_content`. - * * `vulnerable(kind)`'s `kind` would be `XXE`. + * A call to either of: + * - `xml.etree.ElementTree.fromstring` + * - `xml.etree.ElementTree.fromstringlist` + * - `xml.etree.ElementTree.XML` + * - `xml.etree.ElementTree.XMLID` + * - `xml.etree.ElementTree.parse` + * - `xml.etree.ElementTree.iterparse` */ private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { XMLEtreeParsing() { @@ -186,16 +177,7 @@ private module SaxBasedParsing { } /** - * A XML parsing call with a sax parser. - * - * ```py - * BadHandler = MainHandler() - * parser = xml.sax.make_parser() - * parser.setContentHandler(BadHandler) - * parser.setFeature(xml.sax.handler.feature_external_ges, False) - * parser.parse(StringIO(xml_content)) - * parsed_xml = BadHandler._result - * ``` + * A call to the `parse` method on a SAX XML parser. */ private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { XMLSaxInstanceParsing() { @@ -346,22 +328,14 @@ private module Lxml { } /** - * Gets a call to: - * * `lxml.etree.fromstring` - * * `xml.etree.fromstringlist` - * * `xml.etree.XML` - * * `xml.etree.parse` - * - * Given the following example: + * A call to either of: + * - `lxml.etree.fromstring` + * - `lxml.etree.fromstringlist` + * - `lxml.etree.XML` + * - `lxml.etree.parse` + * - `lxml.etree.parseid` * - * ```py - * parser = lxml.etree.XMLParser() - * lxml.etree.fromstring(xml_content, parser=parser).text - * ``` - * - * * `this` would be `lxml.etree.fromstring(xml_content, parser=parser)`. - * * `getAnInput()`'s result would be `xml_content`. - * * `vulnerable(kind)`'s `kind` would be `XXE`. + * See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring */ private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { LXMLParsing() { @@ -395,7 +369,7 @@ private module Lxml { } /** - * A call to the `feed` method of an `lxml.etree` parser. + * A call to the `feed` method of an `lxml` parser. */ private class LXMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { LXMLEtreeParserFeedCall() { @@ -424,17 +398,7 @@ private module Lxml { private module Xmltodict { /** - * Gets a call to `xmltodict.parse`. - * - * Given the following example: - * - * ```py - * xmltodict.parse(xml_content, disable_entities=False) - * ``` - * - * * `this` would be `xmltodict.parse(xml_content, disable_entities=False)`. - * * `getAnInput()`'s result would be `xml_content`. - * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`. + * A call to `xmltodict.parse`. */ private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() } From c0a2c25f5a712967ea5d067907e7c5be7b71a144 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:38:52 +0100 Subject: [PATCH 67/79] Python: Restructure modeling of `xml.etree` parsers --- .../semmle/python/frameworks/Xml.qll | 96 +++++++++++-------- .../library-tests/frameworks/XML/xml_etree.py | 4 +- 2 files changed, 59 insertions(+), 41 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 6c3b86f426e3..e6a52fc19be6 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -10,22 +10,65 @@ private import semmle.python.ApiGraphs private module XmlEtree { /** - * A call to `xml.etree.ElementTree.XMLParser`. + * Provides models for `xml.etree` parsers + * + * See + * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser + * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser */ - private class XMLEtreeParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { - XMLEtreeParser() { - this = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("XMLParser") - .getACall() + module XMLParser { + /** + * A source of instances of `xml.etree` parsers, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** A direct instantiation of `xml.etree` parsers. */ + private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { + ClassInstantiation() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLParser") + .getACall() + or + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLPullParser") + .getACall() + } } - override DataFlow::Node getAnInput() { none() } + /** Gets a reference to an `xml.etree` parser instance. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - kind.isBillionLaughs() or kind.isQuadraticBlowup() + /** Gets a reference to an `xml.etree` parser instance. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + /** + * A call to the `feed` method of an `xml.etree` parser. + */ + private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLEtreeParserFeedCall() { this.calls(instance(), "feed") } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + kind.isBillionLaughs() or kind.isQuadraticBlowup() + } } } @@ -61,33 +104,8 @@ private module XmlEtree { } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - not exists(this.getArgByName("parser")) and - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - or - exists(XML::XMLParser xmlParser | - xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) - ) - } - } - - /** - * A call to the `feed` method of an `xml.etree` parser. - */ - private class XMLEtreeParserFeedCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range { - XMLEtreeParserFeedCall() { - this = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("XMLParser") - .getReturn() - .getMember("feed") - .getACall() - } - - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + // note: it does not matter what `xml.etree` parser you are using, you cannot + // change the security features anyway :| kind.isBillionLaughs() or kind.isQuadraticBlowup() } } diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py index ee452c11853d..df126e458e2d 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -35,8 +35,8 @@ # manual use of feed method on XMLPullParser parser = xml.etree.ElementTree.XMLPullParser() -parser.feed(x) # $ MISSING: input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -parser.feed(data=x) # $ MISSING: input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' parser.close() # note: it's technically possible to use the thing wrapper func `fromstring` with an From c0a6f9f3fdcd7d3b52d4da7fb5657ad839686322 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 22:00:55 +0100 Subject: [PATCH 68/79] Python: Restructure lxml modeling and handle parser being passed as positional argument --- .../semmle/python/frameworks/Xml.qll | 164 ++++++++++-------- .../frameworks/XML/lxml_etree.py | 1 + 2 files changed, 94 insertions(+), 71 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index e6a52fc19be6..4af068cad317 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -293,55 +293,104 @@ private module SaxBasedParsing { private module Lxml { /** - * A call to `lxml.etree.get_default_parser`. + * Provides models for `lxml.etree` parsers * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser */ - private class LXMLDefaultParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { - LXMLDefaultParser() { - this = API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() + module XMLParser { + /** + * A source of instances of `lxml.etree` parsers, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { + /** Holds if this instance is vulnerable to `kind`. */ + abstract predicate vulnerable(XML::XMLVulnerabilityKind kind); } - override DataFlow::Node getAnInput() { none() } + /** + * A call to `lxml.etree.XMLParser`. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + */ + private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode { + LXMLParser() { + this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() + } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - // as highlighted by - // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser - // by default XXE is allow. so as long as the default parser has not been - // overridden, the result is also vuln to XXE. - kind.isXxe() - // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`. + // NOTE: it's not possible to change settings of a parser after constructing it + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + kind.isXxe() and + ( + // resolve_entities has default True + not exists(this.getArgByName("resolve_entities")) + or + this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t) + ) + or + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) + or + kind.isDtdRetrieval() and + this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and + this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) + } } - } - /** - * A call to `lxml.etree.XMLParser`. - * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser - */ - private class LXMLParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { - LXMLParser() { - this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() - } + /** + * A call to `lxml.etree.get_default_parser`. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser + */ + private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode { + LXMLDefaultParser() { + this = + API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() + } - override DataFlow::Node getAnInput() { none() } + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + // as highlighted by + // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + // by default XXE is allow. so as long as the default parser has not been + // overridden, the result is also vuln to XXE. + kind.isXxe() + // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`. + } + } - // NOTE: it's not possible to change settings of a parser after constructing it - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - kind.isXxe() and - ( - // resolve_entities has default True - not exists(this.getArgByName("resolve_entities")) - or - this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t) - ) - or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) + /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) { + t.start() and + result = origin or - kind.isDtdRetrieval() and - this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and - this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) + exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t)) + } + + /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ + DataFlow::Node instance(InstanceSource origin) { + instance(DataFlow::TypeTracker::end(), origin).flowsTo(result) + } + + /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */ + DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) { + exists(InstanceSource origin | result = instance(origin) and origin.vulnerable(kind)) + } + + /** + * A call to the `feed` method of an `lxml` parser. + */ + private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + LXMLParserFeedCall() { this.calls(instance(_), "feed") } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + this.calls(instanceVulnerableTo(kind), "feed") + } } } @@ -376,40 +425,13 @@ private module Lxml { ] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - // TODO: This should be done with type-tracking - exists(XML::XMLParser xmlParser | - xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) - ) - or - kind.isXxe() and not exists(this.getArgByName("parser")) - } - } - - /** - * A call to the `feed` method of an `lxml` parser. - */ - private class LXMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { - LXMLEtreeParserFeedCall() { - exists(API::Node parserInstance | - parserInstance = - API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getReturn() - or - parserInstance = - API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getReturn() - | - this = parserInstance.getMember("feed").getACall() - ) - } - - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - // TODO: This should be done with type-tracking - exists(XML::XMLParser xmlParser | - xmlParser = this.getObject().getALocalSource() and - xmlParser.vulnerable(kind) - ) + this.getParserArg() = XMLParser::instanceVulnerableTo(kind) + or + kind.isXxe() and + not exists(this.getParserArg()) } } } diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index db8b667e70a5..47ade6431221 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -34,6 +34,7 @@ # XXE-safe parser = lxml.etree.XMLParser(resolve_entities=False) +lxml.etree.fromstring(x, parser) # $ input=x lxml.etree.fromstring(x, parser=parser) # $ input=x # XXE-vuln From df8e0fce68c1ea11bacaf789caebfbd7e5391376 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 22:02:48 +0100 Subject: [PATCH 69/79] Python: Minor fixup of qldoc --- .../experimental/semmle/python/frameworks/Xml.qll | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 4af068cad317..a3f79967b67c 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -153,7 +153,11 @@ private module SaxBasedParsing { result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg) } - /** Gets a reference to a XML sax parser that has `feature_external_ges` turned on */ + /** + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.class + * + * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges + */ private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) { t.start() and exists(SaxParserSetFeatureCall call | @@ -189,7 +193,11 @@ private module SaxBasedParsing { ) } - /** Gets a reference to a XML sax parser that has been made unsafe for `kind`. */ + /** + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.class + * + * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges + */ DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() { result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end()) } From 837daaae3b5f2fba2e405f8cf7900a9c51999769 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 22:04:48 +0100 Subject: [PATCH 70/79] Python: Remove XMLParser concept --- .../experimental/semmle/python/Concepts.qll | 39 ------------------- .../XmlEntityInjectionCustomizations.qll | 13 ------- 2 files changed, 52 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 22616c0a5d2b..29ce05501ca9 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -76,45 +76,6 @@ module XML { abstract predicate vulnerable(XMLVulnerabilityKind kind); } } - - /** - * A data-flow node that constructs an XML parser. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `XMLParser` instead. - */ - class XMLParser extends DataFlow::Node instanceof XMLParser::Range { - /** - * Gets the argument containing the content to parse. - */ - DataFlow::Node getAnInput() { result = super.getAnInput() } - - /** - * Holds if the parser is vulnerable to `kind`. - */ - predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) } - } - - /** Provides classes for modeling XML parsers. */ - module XMLParser { - /** - * A data-flow node that constructs an XML parser. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `XMLParser` instead. - */ - abstract class Range extends DataFlow::Node { - /** - * Gets the argument containing the content to parse. - */ - abstract DataFlow::Node getAnInput(); - - /** - * Holds if the parser is vulnerable to `kind`. - */ - abstract predicate vulnerable(XMLVulnerabilityKind kind); - } - } } /** Provides classes for modeling LDAP query execution-related APIs. */ diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll index 7de0c0c4b9c2..44c5da0bcea1 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll @@ -61,19 +61,6 @@ module XmlEntityInjection { override string getVulnerableKind() { xmlParsing.vulnerable(result) } } - /** - * An input to an XML parser, considered as a flow sink. - * - * See `XML::XMLParser` - */ - class XMLParserInputAsSink extends Sink { - XML::XMLParser xmlParser; - - XMLParserInputAsSink() { this = xmlParser.getAnInput() } - - override string getVulnerableKind() { xmlParser.vulnerable(result) } - } - /** * A source of remote user input, considered as a flow source. */ From 0d69dc854c47df55eddb11d500b8cbe9b04f1d75 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 22:06:26 +0100 Subject: [PATCH 71/79] Python: Minor qldoc improvement --- python/ql/src/experimental/semmle/python/Concepts.qll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 29ce05501ca9..e8837e233ebc 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -51,7 +51,7 @@ module XML { DataFlow::Node getAnInput() { result = super.getAnInput() } /** - * Holds if the parsing method or the parser holding it is vulnerable to `kind`. + * Holds if this XML parsing is vulnerable to `kind`. */ predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) } } @@ -71,7 +71,7 @@ module XML { abstract DataFlow::Node getAnInput(); /** - * Holds if the parsing method or the parser holding it is vulnerable to `kind`. + * Holds if this XML parsing is vulnerable to `kind`. */ abstract predicate vulnerable(XMLVulnerabilityKind kind); } From 3f6c55e8aeb3c930d730bb719b778811ffa6dbf1 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 22:09:31 +0100 Subject: [PATCH 72/79] Python: Rename `vulnerable` predicate => `vulnerableTo` --- .../experimental/semmle/python/Concepts.qll | 4 ++-- .../semmle/python/frameworks/Xml.qll | 24 +++++++++---------- .../XmlEntityInjectionCustomizations.qll | 2 +- .../XML/ExperimentalXmlConceptsTests.ql | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index e8837e233ebc..7ebe90969221 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -53,7 +53,7 @@ module XML { /** * Holds if this XML parsing is vulnerable to `kind`. */ - predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) } + predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) } } /** Provides classes for modeling XML parsing APIs. */ @@ -73,7 +73,7 @@ module XML { /** * Holds if this XML parsing is vulnerable to `kind`. */ - abstract predicate vulnerable(XMLVulnerabilityKind kind); + abstract predicate vulnerableTo(XMLVulnerabilityKind kind); } } } diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index a3f79967b67c..1d34e017f031 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -66,7 +66,7 @@ private module XmlEtree { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { kind.isBillionLaughs() or kind.isQuadraticBlowup() } } @@ -103,7 +103,7 @@ private module XmlEtree { ] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { // note: it does not matter what `xml.etree` parser you are using, you cannot // change the security features anyway :| kind.isBillionLaughs() or kind.isQuadraticBlowup() @@ -218,7 +218,7 @@ private module SaxBasedParsing { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { // always vuln to these (kind.isBillionLaughs() or kind.isQuadraticBlowup()) or @@ -251,7 +251,7 @@ private module SaxBasedParsing { ] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { // always vuln to these (kind.isBillionLaughs() or kind.isQuadraticBlowup()) or @@ -290,7 +290,7 @@ private module SaxBasedParsing { DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and (kind.isXxe() or kind.isDtdRetrieval()) or @@ -317,7 +317,7 @@ private module Lxml { */ abstract class InstanceSource extends DataFlow::LocalSourceNode { /** Holds if this instance is vulnerable to `kind`. */ - abstract predicate vulnerable(XML::XMLVulnerabilityKind kind); + abstract predicate vulnerableTo(XML::XMLVulnerabilityKind kind); } /** @@ -331,7 +331,7 @@ private module Lxml { } // NOTE: it's not possible to change settings of a parser after constructing it - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { kind.isXxe() and ( // resolve_entities has default True @@ -360,7 +360,7 @@ private module Lxml { API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { // as highlighted by // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser // by default XXE is allow. so as long as the default parser has not been @@ -385,7 +385,7 @@ private module Lxml { /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */ DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) { - exists(InstanceSource origin | result = instance(origin) and origin.vulnerable(kind)) + exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind)) } /** @@ -396,7 +396,7 @@ private module Lxml { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { this.calls(instanceVulnerableTo(kind), "feed") } } @@ -435,7 +435,7 @@ private module Lxml { DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { this.getParserArg() = XMLParser::instanceVulnerableTo(kind) or kind.isXxe() and @@ -455,7 +455,7 @@ private module Xmltodict { result in [this.getArg(0), this.getArgByName("xml_input")] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) } diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll index 44c5da0bcea1..745658bbce7b 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll @@ -58,7 +58,7 @@ module XmlEntityInjection { XMLParsingInputAsSink() { this = xmlParsing.getAnInput() } - override string getVulnerableKind() { xmlParsing.vulnerable(result) } + override string getVulnerableKind() { xmlParsing.vulnerableTo(result) } } /** diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql index 8ca33765d64f..81bc391d0e55 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql +++ b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql @@ -22,7 +22,7 @@ class XmlParsingTest extends InlineExpectationsTest { ) or exists(XML::XMLVulnerabilityKind kind | - parsing.vulnerable(kind) and + parsing.vulnerableTo(kind) and location = parsing.getLocation() and element = parsing.toString() and value = "'" + kind + "'" and From 683c2fa8254ebd56ec04a8e0fadb7bdb129c29e7 Mon Sep 17 00:00:00 2001 From: Jorge <46056498+jorgectf@users.noreply.github.com> Date: Fri, 4 Mar 2022 01:02:56 +0100 Subject: [PATCH 73/79] Apply suggestions from code review --- python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 1d34e017f031..86c17374e086 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -154,7 +154,7 @@ private module SaxBasedParsing { } /** - * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.class + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. * * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges */ @@ -194,7 +194,7 @@ private module SaxBasedParsing { } /** - * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.class + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. * * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges */ @@ -301,7 +301,7 @@ private module SaxBasedParsing { private module Lxml { /** - * Provides models for `lxml.etree` parsers + * Provides models for `lxml.etree` parsers. * * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser */ From 3cd165d5b757be7651f6f9ade20bca773b27e582 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 4 Mar 2022 10:15:50 +0100 Subject: [PATCH 74/79] Python: Apply suggestions from code review Co-authored-by: Jorge <46056498+jorgectf@users.noreply.github.com> --- .../src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql | 6 ++---- python/ql/src/experimental/semmle/python/Concepts.qll | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql index 0e3deebf6016..4177daf29c10 100644 --- a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql +++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql @@ -1,17 +1,15 @@ /** * @name SimpleXMLRPCServer DoS vulnerability * @description SimpleXMLRPCServer is vulnerable to DoS attacks from untrusted user input - * @kind path-problem + * @kind problem * @problem.severity warning * @precision high - * @id py/simple-xml-rpc-server + * @id py/simple-xml-rpc-server-dos * @tags security * external/cwe/cwe-776 */ private import python -private import semmle.python.dataflow.new.DataFlow -private import semmle.python.Concepts private import experimental.semmle.python.Concepts private import semmle.python.ApiGraphs diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 7ebe90969221..491267d057f2 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -22,7 +22,7 @@ module XML { */ class XMLVulnerabilityKind extends string { XMLVulnerabilityKind() { - this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval",] + this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"] } /** Holds for Billion Laughs vulnerability kind. */ From d6cbfec43435204bb5e7350d26d9c636073b6652 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 4 Mar 2022 09:46:49 +0100 Subject: [PATCH 75/79] Python: huge_tree tests were wrong Nice spotted @jorgectf! --- .../frameworks/XML/lxml_etree.py | 4 ++-- .../library-tests/frameworks/XML/poc/PoC.py | 20 ++++++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index 47ade6431221..7c62ed1ac6ae 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -45,9 +45,9 @@ parser = lxml.etree.XMLParser(huge_tree=True) lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' vuln='XXE' -# Billion laughs, but not XXE +# Safe for both Billion laughs and XXE parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +lxml.etree.fromstring(x, parser=parser) # $ input=x SPURIOUS: vuln='Billion Laughs' vuln='Quadratic Blowup' # DTD retrival vuln (also XXE) parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py index b38ff9889e9d..adcace1aa0a6 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py @@ -318,11 +318,21 @@ def test_billion_laughs_manually_enabled(): @expects_timeout def test_quadratic_blowup_manually_enabled(): parser = lxml.etree.XMLParser(huge_tree=True) - try: - _root = lxml.etree.fromstring(quadratic_blowup, parser=parser) - assert False - except lxml.etree.XMLSyntaxError as e: - assert "Detected an entity reference loop" in str(e) + root = lxml.etree.fromstring(quadratic_blowup, parser=parser) + + @staticmethod + def test_billion_laughs_huge_tree_not_enough(): + parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False) + root = lxml.etree.fromstring(billion_laughs, parser=parser) + assert root.tag == "lolz" + assert root.text == None + + @staticmethod + def test_quadratic_blowup_huge_tree_not_enough(): + parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False) + root = lxml.etree.fromstring(quadratic_blowup, parser=parser) + assert root.tag == "foo" + assert root.text == None @staticmethod def test_ok_xml(): From f0131afc5449459f1562862c557ed537b0ab3a4c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 4 Mar 2022 09:49:00 +0100 Subject: [PATCH 76/79] Python: Fix `huge_tree` modeling --- python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 3 ++- .../experimental/library-tests/frameworks/XML/lxml_etree.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 86c17374e086..533b97436512 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -341,7 +341,8 @@ private module Lxml { ) or (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) + this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and + not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t) or kind.isDtdRetrieval() and this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index 7c62ed1ac6ae..22930a58af37 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -47,7 +47,7 @@ # Safe for both Billion laughs and XXE parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ input=x SPURIOUS: vuln='Billion Laughs' vuln='Quadratic Blowup' +lxml.etree.fromstring(x, parser=parser) # $ input=x # DTD retrival vuln (also XXE) parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) From 1a9620a87a4aa9ae406774681431bb2e3274cc88 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 4 Mar 2022 10:01:02 +0100 Subject: [PATCH 77/79] Python: Add conditional assignment check for sax parser --- .../library-tests/frameworks/XML/xml_sax.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py index 89bbec3f1f57..158e62ffae6b 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py @@ -45,3 +45,20 @@ def func(cond): parser.setFeature(xml.sax.handler.feature_external_ges, True) parser.setFeature(xml.sax.handler.feature_external_ges, False) parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +def check_conditional_assignment(cond): + parser = xml.sax.make_parser() + if cond: + parser.setFeature(xml.sax.handler.feature_external_ges, True) + else: + parser.setFeature(xml.sax.handler.feature_external_ges, False) + parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + +def check_conditional_assignment2(cond): + parser = xml.sax.make_parser() + if cond: + flag_value = True + else: + flag_value = False + parser.setFeature(xml.sax.handler.feature_external_ges, flag_value) + parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' From ef045a6789cd4c7cbe04fba0e15b40461ba9ea75 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 4 Mar 2022 10:18:30 +0100 Subject: [PATCH 78/79] Python: Fix typo in set_default_parser --- python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 533b97436512..18ba6c5a572c 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -367,7 +367,7 @@ private module Lxml { // by default XXE is allow. so as long as the default parser has not been // overridden, the result is also vuln to XXE. kind.isXxe() - // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`. + // TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`. } } From 0e9da4aadb420f1b327403f991e7891bb962bfb6 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 8 Mar 2022 11:25:10 +0100 Subject: [PATCH 79/79] Python: Resolve name conflict over `XML` module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not the prettiest solution... but it works ¯\_(ツ)_/¯ --- .../src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql | 2 +- python/ql/src/experimental/semmle/python/Concepts.qll | 6 +++++- python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 2 ++ .../security/dataflow/XmlEntityInjectionCustomizations.qll | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql index 4177daf29c10..cda0633690c5 100644 --- a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql +++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql @@ -17,7 +17,7 @@ from DataFlow::CallCfgNode call, string kinds where call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and kinds = - strictconcat(XML::XMLVulnerabilityKind kind | + strictconcat(ExperimentalXML::XMLVulnerabilityKind kind | kind.isBillionLaughs() or kind.isQuadraticBlowup() | kind, ", " diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 491267d057f2..ce5617071845 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -14,7 +14,11 @@ private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking private import experimental.semmle.python.Frameworks -module XML { +/** + * Since there is both XML module in normal and experimental Concepts, + * we have to rename the experimental module as this. + */ +module ExperimentalXML { /** * A kind of XML vulnerability. * diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 18ba6c5a572c..a2f36f66f2e3 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -8,6 +8,8 @@ private import semmle.python.dataflow.new.DataFlow private import experimental.semmle.python.Concepts private import semmle.python.ApiGraphs +module XML = ExperimentalXML; + private module XmlEtree { /** * Provides models for `xml.etree` parsers diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll index 745658bbce7b..e420c738a978 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll @@ -54,7 +54,7 @@ module XmlEntityInjection { * See `XML::XMLParsing`. */ class XMLParsingInputAsSink extends Sink { - XML::XMLParsing xmlParsing; + ExperimentalXML::XMLParsing xmlParsing; XMLParsingInputAsSink() { this = xmlParsing.getAnInput() }