Date: Wed, 9 Feb 2022 13:28:56 +0100
Subject: [PATCH 33/79] `XmlInjection` -> `XmlEntityInjection`
---
.../{XmlInjection.py => XmlEntityInjection.py} | 0
...njection.qhelp => XmlEntityInjection.qhelp} | 2 +-
.../{XmlInjection.ql => XmlEntityInjection.ql} | 10 +++++-----
...XmlInjection.qll => XmlEntityInjection.qll} | 18 ++++++++++--------
...ll => XmlEntityInjectionCustomizations.qll} | 2 +-
...on.expected => XmlEntityInjection.expected} | 0
.../Security/CWE-611/XmlEntityInjection.qlref | 1 +
.../Security/CWE-611/XmlInjection.qlref | 1 -
8 files changed, 18 insertions(+), 16 deletions(-)
rename python/ql/src/experimental/Security/CWE-611/{XmlInjection.py => XmlEntityInjection.py} (100%)
rename python/ql/src/experimental/Security/CWE-611/{XmlInjection.qhelp => XmlEntityInjection.qhelp} (98%)
rename python/ql/src/experimental/Security/CWE-611/{XmlInjection.ql => XmlEntityInjection.ql} (62%)
rename python/ql/src/experimental/semmle/python/security/dataflow/{XmlInjection.qll => XmlEntityInjection.qll} (65%)
rename python/ql/src/experimental/semmle/python/security/dataflow/{XmlInjectionCustomizations.qll => XmlEntityInjectionCustomizations.qll} (99%)
rename python/ql/test/experimental/query-tests/Security/CWE-611/{XmlInjection.expected => XmlEntityInjection.expected} (100%)
create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref
delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref
diff --git a/python/ql/src/experimental/Security/CWE-611/XmlInjection.py b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py
similarity index 100%
rename from python/ql/src/experimental/Security/CWE-611/XmlInjection.py
rename to python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py
diff --git a/python/ql/src/experimental/Security/CWE-611/XmlInjection.qhelp b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp
similarity index 98%
rename from python/ql/src/experimental/Security/CWE-611/XmlInjection.qhelp
rename to python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp
index e617835bdef2..6da1bf1d3063 100644
--- a/python/ql/src/experimental/Security/CWE-611/XmlInjection.qhelp
+++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp
@@ -26,7 +26,7 @@ to prevent any potentially malicious operation.
The following example calls xml.etree.ElementTree.fromstring using a parser (lxml.etree.XMLParser)
that is not safely configured on untrusted data, and is therefore inherently unsafe.
-
+
Providing an input (xml_content) like the following XML content against /bad, the request response would contain the contents of
/etc/passwd.
diff --git a/python/ql/src/experimental/Security/CWE-611/XmlInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql
similarity index 62%
rename from python/ql/src/experimental/Security/CWE-611/XmlInjection.ql
rename to python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql
index 78213f624eaf..8f22ded4b157 100644
--- a/python/ql/src/experimental/Security/CWE-611/XmlInjection.ql
+++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql
@@ -1,9 +1,9 @@
/**
- * @name XML injection
- * @description User input should not be parsed without security options enabled.
+ * @name XML Entity injection
+ * @description User input should not be parsed allowing the injection of entities.
* @kind path-problem
* @problem.severity error
- * @id py/xml-injection
+ * @id py/xml-entity-injection
* @tags security
* external/cwe/cwe-611
* external/cwe/cwe-776
@@ -12,11 +12,11 @@
// determine precision above
import python
-import experimental.semmle.python.security.dataflow.XmlInjection
+import experimental.semmle.python.security.dataflow.XmlEntityInjection
import DataFlow::PathGraph
from DataFlow::PathNode source, DataFlow::PathNode sink, string kind
-where XmlInjection::xmlInjectionVulnerable(source, sink, kind)
+where XmlEntityInjection::xmlEntityInjectionVulnerable(source, sink, kind)
select sink.getNode(), source, sink,
"$@ XML input is constructed from a $@ and is vulnerable to " + kind + ".", sink.getNode(),
"This", source.getNode(), "user-provided value"
diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll
similarity index 65%
rename from python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll
rename to python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll
index 90e2c9bf342c..4669e0e430d4 100644
--- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll
+++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll
@@ -5,11 +5,11 @@ import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
-module XmlInjection {
- import XmlInjectionCustomizations::XmlInjection
+module XmlEntityInjection {
+ import XmlEntityInjectionCustomizations::XmlEntityInjection
- class XMLInjectionConfiguration extends TaintTracking::Configuration {
- XMLInjectionConfiguration() { this = "XMLInjectionConfiguration" }
+ class XmlEntityInjectionConfiguration extends TaintTracking::Configuration {
+ XmlEntityInjectionConfiguration() { this = "XmlEntityInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) {
source instanceof RemoteFlowSourceAsSource
@@ -29,13 +29,15 @@ module XmlInjection {
private import DataFlow::PathGraph
/** Holds if there is an XML injection from `source` to `sink` */
- predicate xmlInjection(DataFlow::PathNode source, DataFlow::PathNode sink) {
- any(XMLInjectionConfiguration xmlInjectionConfig).hasFlowPath(source, sink)
+ predicate xmlEntityInjection(DataFlow::PathNode source, DataFlow::PathNode sink) {
+ any(XmlEntityInjectionConfiguration x).hasFlowPath(source, sink)
}
/** Holds if there is an XML injection from `source` to `sink` vulnerable to `kind` */
- predicate xmlInjectionVulnerable(DataFlow::PathNode source, DataFlow::PathNode sink, string kind) {
- xmlInjection(source, sink) and
+ predicate xmlEntityInjectionVulnerable(
+ DataFlow::PathNode source, DataFlow::PathNode sink, string kind
+ ) {
+ xmlEntityInjection(source, sink) and
(
xmlParsingInputAsVulnerableSink(sink.getNode(), kind) or
xmlParserInputAsVulnerableSink(sink.getNode(), kind)
diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
similarity index 99%
rename from python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll
rename to python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
index 3e9dd22c69c0..177f89799563 100644
--- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll
+++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
@@ -15,7 +15,7 @@ private import semmle.python.ApiGraphs
* Provides default sources, sinks and sanitizers for detecting "xml injection"
* vulnerabilities, as well as extension points for adding your own.
*/
-module XmlInjection {
+module XmlEntityInjection {
/**
* A data flow source for "xml injection" vulnerabilities.
*/
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
similarity index 100%
rename from python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.expected
rename to python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref
new file mode 100644
index 000000000000..36a7c8845fb7
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref
@@ -0,0 +1 @@
+experimental/Security/CWE-611/XmlEntityInjection.ql
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref
deleted file mode 100644
index 24d483666acc..000000000000
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref
+++ /dev/null
@@ -1 +0,0 @@
-experimental/Security/CWE-611/XmlInjection.ql
From c5f30d99d5f01f5819e9653d1115a65d82d9b6f0 Mon Sep 17 00:00:00 2001
From: jorgectf
Date: Sun, 20 Feb 2022 17:34:12 +0100
Subject: [PATCH 34/79] Create an extendable `AdditionalTaintStep` class in
customizations
---
.../security/dataflow/XmlEntityInjection.qll | 2 +-
.../XmlEntityInjectionCustomizations.qll | 31 +++++++++++++++----
2 files changed, 26 insertions(+), 7 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll
index 4669e0e430d4..087c3057640e 100644
--- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll
+++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll
@@ -22,7 +22,7 @@ module XmlEntityInjection {
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
- ioAdditionalTaintStep(nodeFrom, nodeTo)
+ any(AdditionalTaintStep s).step(nodeFrom, nodeTo)
}
}
diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
index 177f89799563..8f8b3ae2c6ab 100644
--- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
+++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
@@ -31,6 +31,20 @@ module XmlEntityInjection {
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
+ /**
+ * A unit class for adding additional taint steps.
+ *
+ * Extend this class to add additional taint steps that should apply to `XmlEntityInjection`
+ * taint configuration.
+ */
+ class AdditionalTaintStep extends Unit {
+ /**
+ * Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint
+ * step for `XmlEntityInjection` configuration.
+ */
+ abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo);
+ }
+
/**
* A data flow sink for XML parsing libraries.
*
@@ -85,11 +99,16 @@ module XmlEntityInjection {
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
- predicate ioAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
- exists(DataFlow::CallCfgNode ioCalls |
- ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and
- nodeFrom = ioCalls.getArg(0) and
- nodeTo = ioCalls
- )
+ /**
+ * A taint step for `io`'s `StringIO` and `BytesIO` methods.
+ */
+ class IoAdditionalTaintStep extends AdditionalTaintStep {
+ override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+ exists(DataFlow::CallCfgNode ioCalls |
+ ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and
+ nodeFrom = ioCalls.getArg(0) and
+ nodeTo = ioCalls
+ )
+ }
}
}
From 500e0aced6e9a9e2fd426966e85109ce26ab5f1d Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 1 Mar 2022 17:14:35 +0100
Subject: [PATCH 35/79] Python: Rewrite sax XML tests
The tests for type-trackers were not that interesting, since they did
not have XML input in both cases, which is the problem we were trying
hard to solve.
I did keep the test-case of not-user-supplied url alive as well though
:+1:
I added OK/NOT OK annotations.
Notice that we report all 4 kinds of vulnerabilities on line 93
---
.../CWE-611/XmlEntityInjection.expected | 82 +++++++++++--------
.../Security/CWE-611/xml_sax_make_parser.py | 37 +++++----
2 files changed, 67 insertions(+), 52 deletions(-)
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
index 081a8c6e6af8..2e291875ce81 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
@@ -71,18 +71,19 @@ edges
| xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute |
| xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript |
| xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:57:19:57:30 | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:57:19:57:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:57:19:57:45 | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:57:19:57:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:69:19:69:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:19:69:30 | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:69:19:69:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:69:19:69:45 | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:69:19:69:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:73:34:73:54 | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:79:19:79:25 | ControlFlowNode for request | xml_sax_make_parser.py:79:19:79:30 | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:79:19:79:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:79:19:79:45 | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:79:19:79:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:86:22:86:42 | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:19:91:30 | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:91:19:91:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:91:19:91:45 | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:91:19:91:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() |
+| xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute |
+| xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript |
+| xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() |
+| xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute |
+| xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript |
+| xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() |
+| xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute |
+| xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript |
+| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() |
+| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() |
+| xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute |
+| xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript |
+| xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() |
| xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute |
| xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript |
| xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content |
@@ -186,22 +187,23 @@ nodes
| xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax_make_parser.py:57:19:57:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:57:19:57:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:69:19:69:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax_make_parser.py:69:19:69:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:69:19:69:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:73:34:73:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:79:19:79:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax_make_parser.py:79:19:79:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:79:19:79:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:86:22:86:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax_make_parser.py:91:19:91:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:91:19:91:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
+| xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
+| xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
+| xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
+| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
+| xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
| xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
@@ -231,13 +233,21 @@ subpaths
| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value |
| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value |
| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py
index 9f858d99ddd2..e95abf753796 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py
@@ -13,7 +13,7 @@ def characters(self, data):
self._result.append(data)
def parse(self, f):
- xml.sax.parse(f, self)
+ xml.sax.parse(f, self) # OK for XXE/DTD, NOT OK for billion laughs/quadratic
return self._result
# GOOD
@@ -33,7 +33,7 @@ def xml_makeparser_MainHandler():
GoodHandler = MainHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(GoodHandler)
- parser.parse(StringIO(xml_content))
+ parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic
return GoodHandler._result
@@ -46,11 +46,17 @@ def xml_makeparser_MainHandler_entitiesFalse():
parser.setContentHandler(GoodHandler)
# https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
parser.setFeature(xml.sax.handler.feature_external_ges, False)
- parser.parse(StringIO(xml_content))
+ parser.parse(StringIO(xml_content)) # # OK for XXE/DTD, NOT OK for billion laughs/quadratic
return GoodHandler._result
-# BAD
+@app.route("not-user-controlled")
+def not_user_controlled():
+ parser = xml.sax.make_parser()
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ parser.parse("/not-user-controlled/default_config.xml") # OK
+ return
+# BAD
@app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_True")
def xml_makeparser_MainHandler_entitiesTrue():
@@ -60,7 +66,7 @@ def xml_makeparser_MainHandler_entitiesTrue():
parser = xml.sax.make_parser()
parser.setContentHandler(BadHandler)
parser.setFeature(xml.sax.handler.feature_external_ges, True)
- parser.parse(StringIO(xml_content))
+ parser.parse(StringIO(xml_content)) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic
return BadHandler._result
@@ -70,7 +76,8 @@ def xml_makeparser_minidom_entitiesTrue():
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
- return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes
+ doc = xml.dom.minidom.parse(StringIO(xml_content), parser=parser) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic
+ return doc.documentElement.childNodes
# Forward Type Tracking test
@@ -80,20 +87,18 @@ def forward_tracking1(action):
parser = xml.sax.make_parser()
if action == 'load-config':
- parser.setFeature(xml.sax.handler.feature_external_ges, False)
- parser.parse("/not-user-controlled/default_config.xml")
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ parser.parse(StringIO(xml_content)) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic
else:
- parser.parse(StringIO(xml_content))
- return
+ parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic
+ return
@app.route("forward_tracking2")
def forward_tracking2(action):
xml_content = request.args['xml_content']
parser = xml.sax.make_parser()
- if action == 'load-config':
- parser.setFeature(xml.sax.handler.feature_external_ges, True)
- parser.parse("/not-user-controlled/default_config.xml")
- else:
- parser.parse(StringIO(xml_content))
- return
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ parser.setFeature(xml.sax.handler.feature_external_ges, False)
+ parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic
+ return
From ee23c05489deb55626fe0402760ff89535856c84 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 1 Mar 2022 17:15:19 +0100
Subject: [PATCH 36/79] Python: XML: Expose vuln kind on sink
---
.../Security/CWE-611/XmlEntityInjection.ql | 8 ++-
.../security/dataflow/XmlEntityInjection.qll | 18 -------
.../XmlEntityInjectionCustomizations.qll | 53 +++++++------------
3 files changed, 25 insertions(+), 54 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql
index 8f22ded4b157..03f0c7b1c0e9 100644
--- a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql
+++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql
@@ -15,8 +15,12 @@ import python
import experimental.semmle.python.security.dataflow.XmlEntityInjection
import DataFlow::PathGraph
-from DataFlow::PathNode source, DataFlow::PathNode sink, string kind
-where XmlEntityInjection::xmlEntityInjectionVulnerable(source, sink, kind)
+from
+ XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source,
+ DataFlow::PathNode sink, string kind
+where
+ config.hasFlowPath(source, sink) and
+ kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind()
select sink.getNode(), source, sink,
"$@ XML input is constructed from a $@ and is vulnerable to " + kind + ".", sink.getNode(),
"This", source.getNode(), "user-provided value"
diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll
index 087c3057640e..35220e153d12 100644
--- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll
+++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll
@@ -25,22 +25,4 @@ module XmlEntityInjection {
any(AdditionalTaintStep s).step(nodeFrom, nodeTo)
}
}
-
- private import DataFlow::PathGraph
-
- /** Holds if there is an XML injection from `source` to `sink` */
- predicate xmlEntityInjection(DataFlow::PathNode source, DataFlow::PathNode sink) {
- any(XmlEntityInjectionConfiguration x).hasFlowPath(source, sink)
- }
-
- /** Holds if there is an XML injection from `source` to `sink` vulnerable to `kind` */
- predicate xmlEntityInjectionVulnerable(
- DataFlow::PathNode source, DataFlow::PathNode sink, string kind
- ) {
- xmlEntityInjection(source, sink) and
- (
- xmlParsingInputAsVulnerableSink(sink.getNode(), kind) or
- xmlParserInputAsVulnerableSink(sink.getNode(), kind)
- )
- }
}
diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
index 8f8b3ae2c6ab..7de0c0c4b9c2 100644
--- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
+++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
@@ -24,7 +24,10 @@ module XmlEntityInjection {
/**
* A data flow sink for "xml injection" vulnerabilities.
*/
- abstract class Sink extends DataFlow::Node { }
+ abstract class Sink extends DataFlow::Node {
+ /** Gets the kind of XML injection that this sink is vulnerable to. */
+ abstract string getVulnerableKind();
+ }
/**
* A sanitizer guard for "xml injection" vulnerabilities.
@@ -46,53 +49,35 @@ module XmlEntityInjection {
}
/**
- * A data flow sink for XML parsing libraries.
+ * An input to a direct XML parsing function, considered as a flow sink.
*
* See `XML::XMLParsing`.
*/
- abstract class XMLParsingSink extends Sink { }
+ class XMLParsingInputAsSink extends Sink {
+ XML::XMLParsing xmlParsing;
- /**
- * A data flow sink for XML parsers.
- *
- * See `XML::XMLParser`
- */
- abstract class XMLParserSink extends Sink { }
+ XMLParsingInputAsSink() { this = xmlParsing.getAnInput() }
- /**
- * A source of remote user input, considered as a flow source.
- */
- class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
-
- /**
- * An xml parsing operation, considered as a flow sink.
- */
- class XMLParsingInputAsSink extends XMLParsingSink {
- XMLParsingInputAsSink() { this = any(XML::XMLParsing xmlParsing).getAnInput() }
+ override string getVulnerableKind() { xmlParsing.vulnerable(result) }
}
/**
- * An xml parsing operation vulnerable to `kind`.
+ * An input to an XML parser, considered as a flow sink.
+ *
+ * See `XML::XMLParser`
*/
- predicate xmlParsingInputAsVulnerableSink(DataFlow::Node sink, string kind) {
- exists(XML::XMLParsing xmlParsing |
- sink = xmlParsing.getAnInput() and xmlParsing.vulnerable(kind)
- )
- }
+ class XMLParserInputAsSink extends Sink {
+ XML::XMLParser xmlParser;
- /**
- * An xml parser operation, considered as a flow sink.
- */
- class XMLParserInputAsSink extends XMLParserSink {
- XMLParserInputAsSink() { this = any(XML::XMLParser xmlParser).getAnInput() }
+ XMLParserInputAsSink() { this = xmlParser.getAnInput() }
+
+ override string getVulnerableKind() { xmlParser.vulnerable(result) }
}
/**
- * An xml parser operation vulnerable to `kind`.
+ * A source of remote user input, considered as a flow source.
*/
- predicate xmlParserInputAsVulnerableSink(DataFlow::Node sink, string kind) {
- exists(XML::XMLParser xmlParser | sink = xmlParser.getAnInput() and xmlParser.vulnerable(kind))
- }
+ class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A comparison with a constant string, considered as a sanitizer-guard.
From aaf55b21c46dbd3e6a84204d0a43f39ec32d85fe Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 2 Mar 2022 10:58:58 +0100
Subject: [PATCH 37/79] Python: Add XMLVulnerabilityKind
This gives some freedom in changing the name presented, and not worrying about whether you have made a typo that makes everything break :|
---
.../experimental/semmle/python/Concepts.qll | 31 ++++++++++++++--
.../semmle/python/frameworks/Xml.qll | 37 ++++++++++---------
2 files changed, 47 insertions(+), 21 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll
index 09f8e7897c58..4cdd803c9327 100644
--- a/python/ql/src/experimental/semmle/python/Concepts.qll
+++ b/python/ql/src/experimental/semmle/python/Concepts.qll
@@ -15,6 +15,29 @@ private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
module XML {
+ /**
+ * A kind of XML vulnerability.
+ *
+ * See https://pypi.org/project/defusedxml/#python-xml-libraries
+ */
+ class XMLVulnerabilityKind extends string {
+ XMLVulnerabilityKind() {
+ this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval",]
+ }
+
+ /** Holds for Billion Laughs vulnerability kind. */
+ predicate isBillionLaughs() { this = "Billion Laughs" }
+
+ /** Holds for Quadratic Blowup vulnerability kind. */
+ predicate isQuadraticBlowup() { this = "Quadratic Blowup" }
+
+ /** Holds for XXE vulnerability kind. */
+ predicate isXxe() { this = "XXE" }
+
+ /** Holds for DTD retrieval vulnerability kind. */
+ predicate isDtdRetrieval() { this = "DTD retrieval" }
+ }
+
/**
* A data-flow node that collects functions parsing XML.
*
@@ -30,7 +53,7 @@ module XML {
/**
* Holds if the parsing method or the parser holding it is vulnerable to `kind`.
*/
- predicate vulnerable(string kind) { super.vulnerable(kind) }
+ predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) }
}
/** Provides classes for modeling XML parsing APIs. */
@@ -50,7 +73,7 @@ module XML {
/**
* Holds if the parsing method or the parser holding it is vulnerable to `kind`.
*/
- abstract predicate vulnerable(string kind);
+ abstract predicate vulnerable(XMLVulnerabilityKind kind);
}
}
@@ -69,7 +92,7 @@ module XML {
/**
* Holds if the parser is vulnerable to `kind`.
*/
- predicate vulnerable(string kind) { super.vulnerable(kind) }
+ predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) }
}
/** Provides classes for modeling XML parsers. */
@@ -89,7 +112,7 @@ module XML {
/**
* Holds if the parser is vulnerable to `kind`.
*/
- abstract predicate vulnerable(string kind);
+ abstract predicate vulnerable(XMLVulnerabilityKind kind);
}
}
}
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index cf4abbac995b..ffd8d44ba35c 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -24,7 +24,7 @@ private module Xml {
override DataFlow::Node getAnInput() { none() }
- override predicate vulnerable(string kind) { none() }
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) { none() }
}
/**
@@ -57,7 +57,7 @@ private module Xml {
override DataFlow::Node getAnInput() { result = this.getArg(0) }
- override predicate vulnerable(string kind) {
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
exists(XML::XMLParser xmlParser |
xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
)
@@ -111,27 +111,27 @@ private module Xml {
override DataFlow::Node getAnInput() { result = this.getAMethodCall("parse").getArg(0) }
- override predicate vulnerable(string kind) {
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
exists(DataFlow::MethodCallNode parse, API::Node handler, API::Node feature |
handler = API::moduleImport("xml").getMember("sax").getMember("handler") and
parse.calls(trackSaxFeature(this, feature), "parse") and
parse.getArg(0) = this.getAnInput() // enough to avoid FPs?
|
- kind = ["XXE", "DTD retrieval"] and
+ (kind.isXxe() or kind.isDtdRetrieval()) and
feature = handler.getMember("feature_external_ges")
or
- kind = ["Billion Laughs", "Quadratic Blowup"]
+ (kind.isBillionLaughs() or kind.isQuadraticBlowup())
)
}
- predicate vulnerable(DataFlow::Node n, string kind) {
+ predicate vulnerable(DataFlow::Node n, XML::XMLVulnerabilityKind kind) {
exists(API::Node handler, API::Node feature |
handler = API::moduleImport("xml").getMember("sax").getMember("handler") and
DataFlow::exprNode(trackSaxFeature(this, feature).asExpr())
.(DataFlow::LocalSourceNode)
.flowsTo(n)
|
- kind = ["XXE", "DTD retrieval"] and
+ (kind.isXxe() or kind.isDtdRetrieval()) and
feature = handler.getMember("feature_external_ges")
)
}
@@ -162,14 +162,14 @@ private module Xml {
override DataFlow::Node getAnInput() { none() }
- override predicate vulnerable(string kind) {
- kind = "XXE" and
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ kind.isXxe() and
not (
exists(this.getArgByName("resolve_entities")) or
this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f)
)
or
- kind = ["Billion Laughs", "Quadratic Blowup"] and
+ (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
(
this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and
not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f)
@@ -206,12 +206,12 @@ private module Xml {
override DataFlow::Node getAnInput() { result = this.getArg(0) }
- override predicate vulnerable(string kind) {
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
exists(XML::XMLParser xmlParser |
xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
)
or
- kind = "XXE" and not exists(this.getArgByName("parser"))
+ kind.isXxe() and not exists(this.getArgByName("parser"))
}
}
@@ -233,8 +233,8 @@ private module Xml {
override DataFlow::Node getAnInput() { result = this.getArg(0) }
- override predicate vulnerable(string kind) {
- kind = ["Billion Laughs", "Quadratic Blowup"] and
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f)
}
}
@@ -266,12 +266,13 @@ private module Xml {
override DataFlow::Node getAnInput() { result = this.getArg(0) }
- override predicate vulnerable(string kind) {
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
exists(XML::XMLParser xmlParser |
xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
)
or
- kind = ["Billion Laughs", "Quadratic Blowup"] and not exists(this.getArgByName("parser"))
+ (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
+ not exists(this.getArgByName("parser"))
}
}
@@ -300,6 +301,8 @@ private module Xml {
result = this.getAMethodCall("register_function").getArg(0)
}
- override predicate vulnerable(string kind) { kind = ["Billion Laughs", "Quadratic Blowup"] }
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ kind.isBillionLaughs() or kind.isQuadraticBlowup()
+ }
}
}
From 16e482bf6fcb07bc11a77ca1e82c65baf41c1ac8 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 2 Mar 2022 11:53:02 +0100
Subject: [PATCH 38/79] Python: Improve QLDoc for XML parsing/parsers
---
python/ql/src/experimental/semmle/python/Concepts.qll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll
index 4cdd803c9327..22616c0a5d2b 100644
--- a/python/ql/src/experimental/semmle/python/Concepts.qll
+++ b/python/ql/src/experimental/semmle/python/Concepts.qll
@@ -39,7 +39,7 @@ module XML {
}
/**
- * A data-flow node that collects functions parsing XML.
+ * A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
@@ -59,7 +59,7 @@ module XML {
/** Provides classes for modeling XML parsing APIs. */
module XMLParsing {
/**
- * A data-flow node that collects functions parsing XML.
+ * A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
@@ -78,7 +78,7 @@ module XML {
}
/**
- * A data-flow node that collects XML parsers.
+ * A data-flow node that constructs an XML parser.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParser` instead.
@@ -98,7 +98,7 @@ module XML {
/** Provides classes for modeling XML parsers. */
module XMLParser {
/**
- * A data-flow node that collects XML parsers.
+ * A data-flow node that constructs an XML parser.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParser` instead.
From 6dd776b2de9e6eede27d2cc22d9781db4fe8a83d Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 2 Mar 2022 14:52:11 +0100
Subject: [PATCH 39/79] Python: Only produce one alert per vulnerable XML sink
This made it much easier to debug the current alerts on tests at least.
Notice that it's important that we have `strictconcat` and not just
`concat`, since `concat` will also allow flow to sinks that are not
vulnerable to any kind of XML vulnerability :|
---
.../Security/CWE-611/XmlEntityInjection.ql | 11 +++-
.../CWE-611/XmlEntityInjection.expected | 55 ++++++-------------
2 files changed, 26 insertions(+), 40 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql
index 03f0c7b1c0e9..922ca346b173 100644
--- a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql
+++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql
@@ -17,10 +17,15 @@ import DataFlow::PathGraph
from
XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source,
- DataFlow::PathNode sink, string kind
+ DataFlow::PathNode sink, string kinds
where
config.hasFlowPath(source, sink) and
- kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind()
+ kinds =
+ strictconcat(string kind |
+ kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind()
+ |
+ kind, ", "
+ )
select sink.getNode(), source, sink,
- "$@ XML input is constructed from a $@ and is vulnerable to " + kind + ".", sink.getNode(),
+ "$@ XML input is constructed from a $@ and is vulnerable to: " + kinds + ".", sink.getNode(),
"This", source.getNode(), "user-provided value"
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
index 2e291875ce81..6c342ef223ea 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
@@ -214,40 +214,21 @@ nodes
| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
subpaths
#select
-| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | This | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | This | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | This | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
-| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value |
-| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
-| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value |
-| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value |
+| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | This | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | user-provided value |
+| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | This | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | user-provided value |
+| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | This | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | user-provided value |
+| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value |
+| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value |
+| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value |
+| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value |
+| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value |
+| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
+| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value |
+| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
+| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value |
+| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
+| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value |
From 7f7758b83dc1ae6a3e528cf6b3b7349e60fd3e56 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 2 Mar 2022 13:57:28 +0100
Subject: [PATCH 40/79] Python: rewrite xml sax modeling
---
.../semmle/python/frameworks/Xml.qll | 134 ++++++++++++------
.../CWE-611/XmlEntityInjection.expected | 6 +-
2 files changed, 93 insertions(+), 47 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index ffd8d44ba35c..1a01bf4c5c8f 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -64,32 +64,90 @@ private module Xml {
}
}
- /** Gets a reference to a `parser` that has been set a `feature`. */
- private DataFlow::Node trackSaxFeature(
- DataFlow::TypeTracker t, DataFlow::CallCfgNode parser, API::Node feature
+ /**
+ * A call to the `setFeature` method on a XML sax parser.
+ *
+ * See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature
+ */
+ class SaxParserSetFeatureCall extends DataFlow::MethodCallNode {
+ SaxParserSetFeatureCall() {
+ this =
+ API::moduleImport("xml")
+ .getMember("sax")
+ .getMember("make_parser")
+ .getReturn()
+ .getMember("setFeature")
+ .getACall()
+ }
+
+ // The keyword argument names does not match documentation. I checked (with Python
+ // 3.9.5) that the names used here actually works.
+ DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] }
+
+ DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] }
+ }
+
+ /** Gets a back-reference to the `setFeature` state argument `arg`. */
+ private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker(
+ DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and
- exists(DataFlow::MethodCallNode featureCall |
- featureCall = parser.getAMethodCall("setFeature") and
- featureCall.getArg(0).getALocalSource() = feature.getAUse() and
- featureCall.getArg(1).getALocalSource() = DataFlow::exprNode(any(True t_)) and
- result = featureCall.getObject()
+ arg = any(SaxParserSetFeatureCall c).getStateArg() and
+ result = arg.getALocalSource()
+ or
+ exists(DataFlow::TypeBackTracker t2 |
+ result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t)
+ )
+ }
+
+ /** Gets a back-reference to the `setFeature` state argument `arg`. */
+ DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) {
+ result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
+ }
+
+ /** Gets a reference to a XML sax parser that has `feature_external_ges` turned on */
+ private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) {
+ t.start() and
+ exists(SaxParserSetFeatureCall call |
+ call.getFeatureArg() =
+ API::moduleImport("xml")
+ .getMember("sax")
+ .getMember("handler")
+ .getMember("feature_external_ges")
+ .getAUse() and
+ saxParserSetFeatureStateArgBacktracker(call.getStateArg())
+ .asExpr()
+ .(BooleanLiteral)
+ .booleanValue() = true and
+ result = call.getObject()
)
or
exists(DataFlow::TypeTracker t2 |
- t = t2.smallstep(trackSaxFeature(t2, parser, feature), result)
+ t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result)
+ ) and
+ // take account of that we can set the feature to False, which makes the parser safe again
+ not exists(SaxParserSetFeatureCall call |
+ call.getObject() = result and
+ call.getFeatureArg() =
+ API::moduleImport("xml")
+ .getMember("sax")
+ .getMember("handler")
+ .getMember("feature_external_ges")
+ .getAUse() and
+ saxParserSetFeatureStateArgBacktracker(call.getStateArg())
+ .asExpr()
+ .(BooleanLiteral)
+ .booleanValue() = false
)
}
- /** Gets a reference to a `parser` that has been set a `feature`. */
- DataFlow::Node trackSaxFeature(DataFlow::CallCfgNode parser, API::Node feature) {
- result = trackSaxFeature(DataFlow::TypeTracker::end(), parser, feature)
+ /** Gets a reference to a XML sax parser that has been made unsafe for `kind`. */
+ DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() {
+ result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end())
}
/**
- * Gets a call to `xml.sax.make_parser`.
- *
- * Given the following example:
+ * A XML parsing call with a sax parser.
*
* ```py
* BadHandler = MainHandler()
@@ -99,41 +157,27 @@ private module Xml {
* parser.parse(StringIO(xml_content))
* parsed_xml = BadHandler._result
* ```
- *
- * * `this` would be `xml.sax.make_parser()`.
- * * `getAnInput()`'s result would be `StringIO(xml_content)`.
- * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
*/
- private class XMLSaxParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
- XMLSaxParser() {
- this = API::moduleImport("xml").getMember("sax").getMember("make_parser").getACall()
+ private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
+ XMLSaxParsing() {
+ this =
+ API::moduleImport("xml")
+ .getMember("sax")
+ .getMember("make_parser")
+ .getReturn()
+ .getMember("parse")
+ .getACall()
}
- override DataFlow::Node getAnInput() { result = this.getAMethodCall("parse").getArg(0) }
+ override DataFlow::Node getAnInput() { result = this.getArg(0) }
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
- exists(DataFlow::MethodCallNode parse, API::Node handler, API::Node feature |
- handler = API::moduleImport("xml").getMember("sax").getMember("handler") and
- parse.calls(trackSaxFeature(this, feature), "parse") and
- parse.getArg(0) = this.getAnInput() // enough to avoid FPs?
- |
- (kind.isXxe() or kind.isDtdRetrieval()) and
- feature = handler.getMember("feature_external_ges")
- or
- (kind.isBillionLaughs() or kind.isQuadraticBlowup())
- )
- }
-
- predicate vulnerable(DataFlow::Node n, XML::XMLVulnerabilityKind kind) {
- exists(API::Node handler, API::Node feature |
- handler = API::moduleImport("xml").getMember("sax").getMember("handler") and
- DataFlow::exprNode(trackSaxFeature(this, feature).asExpr())
- .(DataFlow::LocalSourceNode)
- .flowsTo(n)
- |
- (kind.isXxe() or kind.isDtdRetrieval()) and
- feature = handler.getMember("feature_external_ges")
- )
+ // always vuln to these
+ (kind.isBillionLaughs() or kind.isQuadraticBlowup())
+ or
+ // can be vuln to other things if features has been turned on
+ this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
+ (kind.isXxe() or kind.isDtdRetrieval())
}
}
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
index 6c342ef223ea..0109566be06d 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
@@ -227,8 +227,10 @@ subpaths
| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value |
| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | user-provided value |
| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value |
From 515b824b3cd857dc29a3484817a1a0d170bae2f6 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 09:42:19 +0100
Subject: [PATCH 41/79] Python: Add lxml positive test
---
.../CWE-611/XmlEntityInjection.expected | 23 ++++++++++++-------
.../Security/CWE-611/lxml_etree.py | 8 +++++++
2 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
index 0109566be06d..634e7dd28d7c 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
@@ -20,12 +20,15 @@ edges
| lxml_etree.py:54:19:54:25 | ControlFlowNode for request | lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute |
| lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript |
| lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content |
-| lxml_etree.py:65:19:65:25 | ControlFlowNode for request | lxml_etree.py:65:19:65:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:65:19:65:30 | ControlFlowNode for Attribute | lxml_etree.py:65:19:65:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:65:19:65:45 | ControlFlowNode for Subscript | lxml_etree.py:68:34:68:44 | ControlFlowNode for xml_content |
+| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute |
+| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript |
+| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content |
| lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute |
| lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript |
| lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content |
+| lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute |
+| lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript |
+| lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content |
| xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute |
| xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript |
| xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() |
@@ -119,14 +122,18 @@ nodes
| lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| lxml_etree.py:65:19:65:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:65:19:65:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:65:19:65:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:68:34:68:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
+| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
| lxml_etree.py:73:19:73:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
+| lxml_etree.py:81:19:81:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
| xml_dom.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
@@ -220,7 +227,7 @@ subpaths
| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value |
| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value |
| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value |
+| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | This | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py
index 2c3c6f5f2ffc..231116c2b720 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py
@@ -56,6 +56,14 @@ def lxml_parser():
parser = lxml.etree.XMLParser(resolve_entities=False)
return lxml.etree.fromstring(xml_content, parser=parser).text
+# XXE-vuln
+@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
+def lxml_parser():
+ xml_content = request.args['xml_content']
+
+ parser = lxml.etree.XMLParser(resolve_entities=True)
+ return lxml.etree.fromstring(xml_content, parser=parser).text
+
# Billion laughs and quadratic blowup (huge_tree)
## Good (huge_tree=True but resolve_entities=False)
From 661d8bf553778aa49054347e215015877c3876a5 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 09:45:42 +0100
Subject: [PATCH 42/79] Python: Better handling of `resolve_entities` arg in
lxml
---
.../ql/src/experimental/semmle/python/frameworks/Xml.qll | 9 ++++++---
.../Security/CWE-611/XmlEntityInjection.expected | 1 +
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 1a01bf4c5c8f..6f865e13cdb1 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -206,11 +206,14 @@ private module Xml {
override DataFlow::Node getAnInput() { none() }
+ // NOTE: it's not possible to change settings of a parser after constructing it
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
kind.isXxe() and
- not (
- exists(this.getArgByName("resolve_entities")) or
- this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f)
+ (
+ // resolve_entities has default True
+ not exists(this.getArgByName("resolve_entities"))
+ or
+ this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t)
)
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
index 634e7dd28d7c..86edcb89d4da 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
@@ -227,6 +227,7 @@ subpaths
| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value |
| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value |
| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value |
+| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value |
| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | This | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
From 52891cb4763bc3714a2e3cc95ea240145d55d910 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 12:36:24 +0100
Subject: [PATCH 43/79] Python: Add PoC for XML vulns
---
.../Security/CWE-611/dont_extract/PoC.py | 449 ++++++++++++++++++
.../Security/CWE-611/dont_extract/flag | 1 +
2 files changed, 450 insertions(+)
create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py
create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py
new file mode 100644
index 000000000000..85301c32bff6
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py
@@ -0,0 +1,449 @@
+#!/usr/bin/env python3
+
+# this file doesn't have a .py extension so the extractor doesn't pick it up, so it
+# doesn't have to be annotated
+
+# This file shows the ways to make exploit vulnerable XML parsing
+# see
+# https://pypi.org/project/defusedxml/#python-xml-libraries
+# https://docs.python.org/3.10/library/xml.html#xml-vulnerabilities
+
+import pathlib
+from flask import Flask
+import threading
+import multiprocessing
+import time
+from io import StringIO
+import pytest
+
+HOST = "localhost"
+PORT = 8080
+
+
+FLAG_PATH = pathlib.Path(__file__).with_name("flag")
+
+# ==============================================================================
+# xml samples
+
+ok_xml = f"""
+hello world
+"""
+
+local_xxe = f"""
+
+]>
+&xxe;
+"""
+
+remote_xxe = f"""
+
+]>
+&remote_xxe;
+"""
+
+billion_laughs = """
+
+
+
+
+
+
+
+
+
+
+
+]>
+&lol9;"""
+
+quadratic_blowup = f"""
+
+]>
+{"&oops;"*20000}"""
+
+dtd_retrieval = f"""
+
+bar
+"""
+
+# ==============================================================================
+# other setup
+
+# we set up local Flask application so we can tests whether loading external resources
+# works (such as SSRF from DTD-retrival works)
+app = Flask(__name__)
+
+@app.route("/alive")
+def alive():
+ return "ok"
+
+hit_dtd = False
+@app.route("/test.dtd")
+def test_dtd():
+ global hit_dtd
+ hit_dtd = True
+ return """"""
+
+hit_xxe = False
+@app.route("/xxe")
+def test_xxe():
+ global hit_xxe
+ hit_xxe = True
+ return "ok"
+
+def run_app():
+ app.run(host=HOST, port=PORT)
+
+@pytest.fixture(scope="session", autouse=True)
+def flask_app_running():
+ # run flask in other thread
+ flask_thread = threading.Thread(target=run_app, daemon=True)
+ flask_thread.start()
+
+ # give flask a bit of time to start
+ time.sleep(0.1)
+
+ # ensure that the server works
+ import requests
+ requests.get(f"http://{HOST}:{PORT}/alive")
+
+ yield
+
+def expects_timeout(func):
+ def inner():
+ proc = multiprocessing.Process(target=func)
+ proc.start()
+ time.sleep(0.1)
+ assert proc.exitcode == None
+ proc.kill()
+ proc.join()
+ return inner
+
+
+class TestExpectsTimeout:
+ "test that expects_timeout works as expected"
+
+ @staticmethod
+ @expects_timeout
+ def test_slow():
+ time.sleep(1000)
+
+ @staticmethod
+ def test_fast():
+ @expects_timeout
+ def fast_func():
+ return "done!"
+
+ with pytest.raises(AssertionError):
+ fast_func()
+
+# ==============================================================================
+import xml.sax
+
+class SimpleHandler(xml.sax.ContentHandler):
+ def __init__(self):
+ self.result = []
+
+ def characters(self, data):
+ self.result.append(data)
+
+class TestSax():
+ # always vuln to billion laughs, quadratic
+
+ @staticmethod
+ @expects_timeout
+ def test_billion_laughs_allowed_by_default():
+ parser = xml.sax.make_parser()
+ parser.parse(StringIO(billion_laughs))
+
+ @staticmethod
+ @expects_timeout
+ def test_quardratic_blowup_allowed_by_default():
+ parser = xml.sax.make_parser()
+ parser.parse(StringIO(quadratic_blowup))
+
+ @staticmethod
+ def test_ok_xml():
+ handler = SimpleHandler()
+ parser = xml.sax.make_parser()
+ parser.setContentHandler(handler)
+ parser.parse(StringIO(ok_xml))
+ assert handler.result == ["hello world"], handler.result
+
+ @staticmethod
+ def test_xxe_disabled_by_default():
+ handler = SimpleHandler()
+ parser = xml.sax.make_parser()
+ parser.setContentHandler(handler)
+ parser.parse(StringIO(local_xxe))
+ assert handler.result == [], handler.result
+
+ @staticmethod
+ def test_local_xxe_manually_enabled():
+ handler = SimpleHandler()
+ parser = xml.sax.make_parser()
+ parser.setContentHandler(handler)
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ parser.parse(StringIO(local_xxe))
+ assert handler.result[0] == "SECRET_FLAG", handler.result
+
+ @staticmethod
+ def test_remote_xxe_manually_enabled():
+ global hit_xxe
+ hit_xxe = False
+
+ handler = SimpleHandler()
+ parser = xml.sax.make_parser()
+ parser.setContentHandler(handler)
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ parser.parse(StringIO(remote_xxe))
+ assert handler.result == ["ok"], handler.result
+ assert hit_xxe == True
+
+ @staticmethod
+ def test_dtd_disabled_by_default():
+ global hit_dtd
+ hit_dtd = False
+
+ parser = xml.sax.make_parser()
+ parser.parse(StringIO(dtd_retrieval))
+ assert hit_dtd == False
+
+ @staticmethod
+ def test_dtd_manually_enabled():
+ global hit_dtd
+ hit_dtd = False
+
+ parser = xml.sax.make_parser()
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ parser.parse(StringIO(dtd_retrieval))
+ assert hit_dtd == True
+
+
+# ==============================================================================
+import xml.etree.ElementTree
+
+class TestEtree:
+
+ # always vuln to billion laughs, quadratic
+ @staticmethod
+ @expects_timeout
+ def test_billion_laughs_allowed_by_default():
+ parser = xml.etree.ElementTree.XMLParser()
+ _root = xml.etree.ElementTree.fromstring(billion_laughs, parser=parser)
+
+ @staticmethod
+ @expects_timeout
+ def test_quardratic_blowup_allowed_by_default():
+ parser = xml.etree.ElementTree.XMLParser()
+ _root = xml.etree.ElementTree.fromstring(quadratic_blowup, parser=parser)
+
+ @staticmethod
+ def test_ok_xml():
+ parser = xml.etree.ElementTree.XMLParser()
+ root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser)
+ assert root.tag == "test"
+ assert root.text == "hello world"
+
+ @staticmethod
+ def test_xxe_not_possible():
+ parser = xml.etree.ElementTree.XMLParser()
+ try:
+ _root = xml.etree.ElementTree.fromstring(local_xxe, parser=parser)
+ assert False
+ except xml.etree.ElementTree.ParseError as e:
+ assert "undefined entity &xxe" in str(e)
+
+ @staticmethod
+ def test_dtd_not_possible():
+ global hit_dtd
+ hit_dtd = False
+
+ parser = xml.etree.ElementTree.XMLParser()
+ _root = xml.etree.ElementTree.fromstring(dtd_retrieval, parser=parser)
+ assert hit_dtd == False
+
+# ==============================================================================
+import lxml.etree
+
+class TestLxml:
+ # see https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
+ @staticmethod
+ def test_billion_laughs_disabled_by_default():
+ parser = lxml.etree.XMLParser()
+ try:
+ _root = lxml.etree.fromstring(billion_laughs, parser=parser)
+ assert False
+ except lxml.etree.XMLSyntaxError as e:
+ assert "Detected an entity reference loop" in str(e)
+
+ @staticmethod
+ def test_quardratic_blowup_disabled_by_default():
+ parser = lxml.etree.XMLParser()
+ try:
+ _root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
+ assert False
+ except lxml.etree.XMLSyntaxError as e:
+ assert "Detected an entity reference loop" in str(e)
+
+ @staticmethod
+ @expects_timeout
+ def test_billion_laughs_manually_enabled():
+ parser = lxml.etree.XMLParser(huge_tree=True)
+ root = lxml.etree.fromstring(billion_laughs, parser=parser)
+
+ @staticmethod
+ @expects_timeout
+ def test_quadratic_blowup_manually_enabled():
+ parser = lxml.etree.XMLParser(huge_tree=True)
+ try:
+ _root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
+ assert False
+ except lxml.etree.XMLSyntaxError as e:
+ assert "Detected an entity reference loop" in str(e)
+
+ @staticmethod
+ def test_ok_xml():
+ parser = lxml.etree.XMLParser()
+ root = lxml.etree.fromstring(ok_xml, parser=parser)
+ assert root.tag == "test"
+ assert root.text == "hello world"
+
+ @staticmethod
+ def test_local_xxe_enabled_by_default():
+ parser = lxml.etree.XMLParser()
+ root = lxml.etree.fromstring(local_xxe, parser=parser)
+ assert root.tag == "test"
+ assert root.text == "SECRET_FLAG\n", root.text
+
+ @staticmethod
+ def test_local_xxe_disabled():
+ parser = lxml.etree.XMLParser(resolve_entities=False)
+ root = lxml.etree.fromstring(local_xxe, parser=parser)
+ assert root.tag == "test"
+ assert root.text == None
+
+ @staticmethod
+ def test_remote_xxe_disabled_by_default():
+ global hit_xxe
+ hit_xxe = False
+
+ parser = lxml.etree.XMLParser()
+ try:
+ root = lxml.etree.fromstring(remote_xxe, parser=parser)
+ assert False
+ except lxml.etree.XMLSyntaxError as e:
+ assert "Failure to process entity remote_xxe" in str(e)
+ assert hit_xxe == False
+
+ @staticmethod
+ def test_remote_xxe_manually_enabled():
+ global hit_xxe
+ hit_xxe = False
+
+ parser = lxml.etree.XMLParser(no_network=False)
+ root = lxml.etree.fromstring(remote_xxe, parser=parser)
+ assert root.tag == "test"
+ assert root.text == "ok"
+ assert hit_xxe == True
+
+ @staticmethod
+ def test_dtd_disabled_by_default():
+ global hit_dtd
+ hit_dtd = False
+
+ parser = lxml.etree.XMLParser()
+ root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
+ assert hit_dtd == False
+
+ @staticmethod
+ def test_dtd_manually_enabled():
+ global hit_dtd
+ hit_dtd = False
+
+ # Need to set BOTH load_dtd and no_network
+ parser = lxml.etree.XMLParser(load_dtd=True)
+ root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
+ assert hit_dtd == False
+
+ parser = lxml.etree.XMLParser(no_network=False)
+ root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
+ assert hit_dtd == False
+
+ parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
+ root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
+ assert hit_dtd == True
+
+ hit_dtd = False
+
+ # Setting dtd_validation also does not allow the remote access
+ parser = lxml.etree.XMLParser(dtd_validation=True, load_dtd=True)
+ try:
+ root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
+ except lxml.etree.XMLSyntaxError:
+ pass
+ assert hit_dtd == False
+
+
+# ==============================================================================
+
+import xmltodict
+
+class TestXmltodict:
+ @staticmethod
+ def test_billion_laughs_disabled_by_default():
+ d = xmltodict.parse(billion_laughs)
+ assert d == {"lolz": None}, d
+
+ @staticmethod
+ def test_quardratic_blowup_disabled_by_default():
+ d = xmltodict.parse(quadratic_blowup)
+ assert d == {"foo": None}, d
+
+ @staticmethod
+ @expects_timeout
+ def test_billion_laughs_manually_enabled():
+ xmltodict.parse(billion_laughs, disable_entities=False)
+
+ @staticmethod
+ @expects_timeout
+ def test_quardratic_blowup_manually_enabled():
+ xmltodict.parse(quadratic_blowup, disable_entities=False)
+
+ @staticmethod
+ def test_ok_xml():
+ d = xmltodict.parse(ok_xml)
+ assert d == {"test": "hello world"}, d
+
+ @staticmethod
+ def test_local_xxe_not_possible():
+ d = xmltodict.parse(local_xxe)
+ assert d == {"test": None}
+
+ d = xmltodict.parse(local_xxe, disable_entities=False)
+ assert d == {"test": None}
+
+ @staticmethod
+ def test_remote_xxe_not_possible():
+ global hit_xxe
+ hit_xxe = False
+
+ d = xmltodict.parse(remote_xxe)
+ assert d == {"test": None}
+ assert hit_xxe == False
+
+ d = xmltodict.parse(remote_xxe, disable_entities=False)
+ assert d == {"test": None}
+ assert hit_xxe == False
+
+ @staticmethod
+ def test_dtd_not_possible():
+ global hit_dtd
+ hit_dtd = False
+
+ d = xmltodict.parse(dtd_retrieval)
+ assert hit_dtd == False
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag
new file mode 100644
index 000000000000..45c9436ee9f2
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag
@@ -0,0 +1 @@
+SECRET_FLAG
From 3c321dd98dcd62193844f61c03eaa81ca5d4ee43 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 13:49:17 +0100
Subject: [PATCH 44/79] Python: Model `lxml.etree.get_default_parser` in own
class
---
.../semmle/python/frameworks/Xml.qll | 39 ++++++++++++-------
1 file changed, 24 insertions(+), 15 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 6f865e13cdb1..4ecd2d8a99e9 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -182,26 +182,35 @@ private module Xml {
}
/**
- * Gets a call to:
- * * `lxml.etree.XMLParser`
- * * `lxml.etree.get_default_parser`
- *
- * Given the following example:
+ * A call to `lxml.etree.get_default_parser`.
*
- * ```py
- * lxml.etree.XMLParser()
- * ```
+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
+ */
+ private class LXMLDefaultParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
+ LXMLDefaultParser() {
+ this = API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
+ }
+
+ override DataFlow::Node getAnInput() { none() }
+
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ // as highlighted by
+ // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
+ // by default XXE is allow. so as long as the default parser has not been
+ // overridden, the result is also vuln to XXE.
+ kind.isXxe()
+ // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`.
+ }
+ }
+
+ /**
+ * A call to `lxml.etree.XMLParser`.
*
- * * `this` would be `lxml.etree.XMLParser(resolve_entities=False)`.
- * * `vulnerable(kind)`'s `kind` would be `XXE`
+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
private class LXMLParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
LXMLParser() {
- this =
- API::moduleImport("lxml")
- .getMember("etree")
- .getMember(["XMLParser", "get_default_parser"])
- .getACall()
+ this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
}
override DataFlow::Node getAnInput() { none() }
From 124c03c15c9df8b14d143d3e38a410d3f60cb8e3 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 14:38:41 +0100
Subject: [PATCH 45/79] Python: Expand lxml tests
And add annotations, see PoC.py for reference
Some of these needs fixing though
---
.../CWE-611/XmlEntityInjection.expected | 37 +++++++++++--------
.../Security/CWE-611/lxml_etree.py | 32 +++++++++-------
2 files changed, 41 insertions(+), 28 deletions(-)
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
index 86edcb89d4da..b29a6d049db1 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
@@ -23,12 +23,15 @@ edges
| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute |
| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript |
| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content |
-| lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content |
-| lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content |
+| lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute |
+| lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript |
+| lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content |
+| lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute |
+| lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript |
+| lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content |
+| lxml_etree.py:87:19:87:25 | ControlFlowNode for request | lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute |
+| lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript |
+| lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content |
| xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute |
| xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript |
| xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() |
@@ -126,14 +129,18 @@ nodes
| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| lxml_etree.py:73:19:73:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| lxml_etree.py:81:19:81:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
+| lxml_etree.py:71:19:71:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
+| lxml_etree.py:78:19:78:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
+| lxml_etree.py:87:19:87:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
| xml_dom.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
@@ -228,7 +235,7 @@ subpaths
| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value |
| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value |
| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | This | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | user-provided value |
+| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | This | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py
index 231116c2b720..2c2712098503 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py
@@ -10,25 +10,25 @@
def lxml_etree_fromstring():
xml_content = request.args['xml_content']
- return lxml.etree.fromstring(xml_content).text
+ return lxml.etree.fromstring(xml_content).text # NOT OK for XXE
@app.route("/lxml_etree_fromstringlist")
def lxml_etree_fromstringlist():
xml_content = request.args['xml_content']
- return lxml.etree.fromstringlist([xml_content]).text
+ return lxml.etree.fromstringlist([xml_content]).text # NOT OK for XXE
@app.route("/lxml_etree_XML")
def lxml_etree_XML():
xml_content = request.args['xml_content']
- return lxml.etree.XML(xml_content).text
+ return lxml.etree.XML(xml_content).text # NOT OK for XXE
@app.route("/lxml_etree_parse")
def lxml_etree_parse():
xml_content = request.args['xml_content']
- return lxml.etree.parse(StringIO(xml_content)).getroot().text
+ return lxml.etree.parse(StringIO(xml_content)).getroot().text # NOT OK for XXE
# With parsers - Default
@@ -37,14 +37,14 @@ def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
- return lxml.etree.fromstring(xml_content, parser=parser).text
+ return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE
@app.route("/lxml_etree_fromstring-lxml.etree.get_default_parser")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.get_default_parser()
- return lxml.etree.fromstring(xml_content, parser=parser).text
+ return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE
# With parsers - With options
@@ -54,7 +54,7 @@ def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False)
- return lxml.etree.fromstring(xml_content, parser=parser).text
+ return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE
# XXE-vuln
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
@@ -62,23 +62,29 @@ def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=True)
- return lxml.etree.fromstring(xml_content, parser=parser).text
+ return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE
# Billion laughs and quadratic blowup (huge_tree)
-## Good (huge_tree=True but resolve_entities=False)
-
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
- return lxml.etree.fromstring(xml_content, parser=parser).text
+ return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE, NOT OK for billion laughs/quadratic
-## Bad
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(huge_tree=True)
- return lxml.etree.fromstring(xml_content, parser=parser).text
+ return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE, NOT OK for billion laughs/quadratic
+
+# DTD retrival
+
+@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
+def lxml_parser():
+ xml_content = request.args['xml_content']
+
+ parser = lxml.etree.XMLParser(resolve_entities=False, load_dtd=True, no_network=False)
+ return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for DTD, OK for rest
From e295399f7096f92592ea7aa4d1286619bf39f8d0 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 14:43:37 +0100
Subject: [PATCH 46/79] Python: Properly handle `huge_tree` in lxml
---
python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 5 +----
.../query-tests/Security/CWE-611/XmlEntityInjection.expected | 1 +
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 4ecd2d8a99e9..58b7edc327be 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -226,10 +226,7 @@ private module Xml {
)
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
- (
- this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and
- not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f)
- )
+ this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t)
}
}
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
index b29a6d049db1..fc6f8c9ad899 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
@@ -235,6 +235,7 @@ subpaths
| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value |
| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value |
| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value |
+| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | This | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | user-provided value |
| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | This | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
From 703e3e8a0f9f81df20a924b25412baf4b9189086 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 14:46:48 +0100
Subject: [PATCH 47/79] Python: Handle DTD retrieval vuln in lxml
---
python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 4 ++++
.../query-tests/Security/CWE-611/XmlEntityInjection.expected | 1 +
2 files changed, 5 insertions(+)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 58b7edc327be..315199e748c1 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -227,6 +227,10 @@ private module Xml {
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t)
+ or
+ kind.isDtdRetrieval() and
+ this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
+ this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
}
}
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
index fc6f8c9ad899..3c5ad70b23b9 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
@@ -237,6 +237,7 @@ subpaths
| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value |
| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | This | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | user-provided value |
| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | This | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | user-provided value |
+| lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | lxml_etree.py:87:19:87:25 | ControlFlowNode for request | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: DTD retrieval. | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | This | lxml_etree.py:87:19:87:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value |
From 61291936bfcb2667647f330ca0a512b33c80e82c Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 15:06:55 +0100
Subject: [PATCH 48/79] Python: Properly model `xml.etree`
---
.../ql/src/experimental/semmle/python/frameworks/Xml.qll | 7 ++++++-
.../Security/CWE-611/XmlEntityInjection.expected | 5 +++++
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 315199e748c1..5140915e0792 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -24,7 +24,9 @@ private module Xml {
override DataFlow::Node getAnInput() { none() }
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) { none() }
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ kind.isBillionLaughs() or kind.isQuadraticBlowup()
+ }
}
/**
@@ -58,6 +60,9 @@ private module Xml {
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ not exists(this.getArgByName("parser")) and
+ (kind.isBillionLaughs() or kind.isQuadraticBlowup())
+ or
exists(XML::XMLParser xmlParser |
xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
)
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
index 3c5ad70b23b9..bf43d01cec1e 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
@@ -242,6 +242,11 @@ subpaths
| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
+| xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | This | xml_etree.py:13:19:13:25 | ControlFlowNode for request | user-provided value |
+| xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | This | xml_etree.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
+| xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | This | xml_etree.py:25:19:25:25 | ControlFlowNode for request | user-provided value |
+| xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | xml_etree.py:31:19:31:25 | ControlFlowNode for request | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | This | xml_etree.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
+| xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | This | xml_etree.py:39:19:39:25 | ControlFlowNode for request | user-provided value |
| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value |
| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value |
| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
From 3affa6cf3abd3e88fed8722f959b0b5851936809 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 15:08:56 +0100
Subject: [PATCH 49/79] Python: Annotate xmltodict tests
---
.../experimental/query-tests/Security/CWE-611/xml_to_dict.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py
index 2b91a22e1a22..8f43d2e1cc1f 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py
@@ -8,10 +8,10 @@
def xmltodict_parse():
xml_content = request.args['xml_content']
- return xmltodict.parse(xml_content)
+ return xmltodict.parse(xml_content) # OK
@app.route("/xmltodict.parse2")
def xmltodict_parse2():
xml_content = request.args['xml_content']
- return xmltodict.parse(xml_content, disable_entities=False)
\ No newline at end of file
+ return xmltodict.parse(xml_content, disable_entities=False) # NOT OK for billion laughs/quadratic
From c4d08db62aafec4a020f4836a4bcb86329cc517b Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 17:30:16 +0100
Subject: [PATCH 50/79] Python: Expand XML PoC with minidom/pulldom/expat
---
.../Security/CWE-611/dont_extract/PoC.py | 201 ++++++++++++++++++
1 file changed, 201 insertions(+)
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py
index 85301c32bff6..862346de3e8a 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py
@@ -143,6 +143,7 @@ def fast_func():
# ==============================================================================
import xml.sax
+import xml.sax.handler
class SimpleHandler(xml.sax.ContentHandler):
def __init__(self):
@@ -447,3 +448,203 @@ def test_dtd_not_possible():
d = xmltodict.parse(dtd_retrieval)
assert hit_dtd == False
+
+# ==============================================================================
+import xml.dom.minidom
+
+class TestMinidom:
+ @staticmethod
+ @expects_timeout
+ def test_billion_laughs():
+ xml.dom.minidom.parseString(billion_laughs)
+
+ @staticmethod
+ @expects_timeout
+ def test_quardratic_blowup():
+ xml.dom.minidom.parseString(quadratic_blowup)
+
+ @staticmethod
+ def test_ok_xml():
+ doc = xml.dom.minidom.parseString(ok_xml)
+ assert doc.documentElement.tagName == "test"
+ assert doc.documentElement.childNodes[0].data == "hello world"
+
+ @staticmethod
+ def test_xxe():
+ # disabled by default
+ doc = xml.dom.minidom.parseString(local_xxe)
+ assert doc.documentElement.tagName == "test"
+ assert doc.documentElement.childNodes == []
+
+ # but can be turned on
+ parser = xml.sax.make_parser()
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ doc = xml.dom.minidom.parseString(local_xxe, parser=parser)
+ assert doc.documentElement.tagName == "test"
+ assert doc.documentElement.childNodes[0].data == "SECRET_FLAG"
+
+ # which also works remotely
+ global hit_xxe
+ hit_xxe = False
+
+ parser = xml.sax.make_parser()
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ _doc = xml.dom.minidom.parseString(remote_xxe, parser=parser)
+ assert hit_xxe == True
+
+ @staticmethod
+ def test_dtd():
+ # not possible by default
+ global hit_dtd
+ hit_dtd = False
+
+ _doc = xml.dom.minidom.parseString(dtd_retrieval)
+ assert hit_dtd == False
+
+ # but can be turned on
+ parser = xml.sax.make_parser()
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ _doc = xml.dom.minidom.parseString(dtd_retrieval, parser=parser)
+ assert hit_dtd == True
+
+# ==============================================================================
+import xml.dom.pulldom
+
+class TestPulldom:
+ @staticmethod
+ @expects_timeout
+ def test_billion_laughs():
+ doc = xml.dom.pulldom.parseString(billion_laughs)
+ # you NEED to iterate over the items for it to take long
+ for event, node in doc:
+ pass
+
+ @staticmethod
+ @expects_timeout
+ def test_quardratic_blowup():
+ doc = xml.dom.pulldom.parseString(quadratic_blowup)
+ for event, node in doc:
+ pass
+
+ @staticmethod
+ def test_ok_xml():
+ doc = xml.dom.pulldom.parseString(ok_xml)
+ for event, node in doc:
+ if event == xml.dom.pulldom.START_ELEMENT:
+ assert node.tagName == "test"
+ elif event == xml.dom.pulldom.CHARACTERS:
+ assert node.data == "hello world"
+
+ @staticmethod
+ def test_xxe():
+ # disabled by default
+ doc = xml.dom.pulldom.parseString(local_xxe)
+ found_flag = False
+ for event, node in doc:
+ if event == xml.dom.pulldom.START_ELEMENT:
+ assert node.tagName == "test"
+ elif event == xml.dom.pulldom.CHARACTERS:
+ if node.data == "SECRET_FLAG":
+ found_flag = True
+ assert found_flag == False
+
+ # but can be turned on
+ parser = xml.sax.make_parser()
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ doc = xml.dom.pulldom.parseString(local_xxe, parser=parser)
+ found_flag = False
+ for event, node in doc:
+ if event == xml.dom.pulldom.START_ELEMENT:
+ assert node.tagName == "test"
+ elif event == xml.dom.pulldom.CHARACTERS:
+ if node.data == "SECRET_FLAG":
+ found_flag = True
+ assert found_flag == True
+
+ # which also works remotely
+ global hit_xxe
+ hit_xxe = False
+ parser = xml.sax.make_parser()
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ doc = xml.dom.pulldom.parseString(remote_xxe, parser=parser)
+ assert hit_xxe == False
+ for event, node in doc:
+ pass
+ assert hit_xxe == True
+
+ @staticmethod
+ def test_dtd():
+ # not possible by default
+ global hit_dtd
+ hit_dtd = False
+
+ doc = xml.dom.pulldom.parseString(dtd_retrieval)
+ for event, node in doc:
+ pass
+ assert hit_dtd == False
+
+ # but can be turned on
+ parser = xml.sax.make_parser()
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ doc = xml.dom.pulldom.parseString(dtd_retrieval, parser=parser)
+ for event, node in doc:
+ pass
+ assert hit_dtd == True
+
+# ==============================================================================
+import xml.parsers.expat
+
+class TestExpat:
+ # this is the underlying parser implementation used by the rest of the Python
+ # standard library. But people are probably not using this directly.
+
+ @staticmethod
+ @expects_timeout
+ def test_billion_laughs():
+ parser = xml.parsers.expat.ParserCreate()
+ parser.Parse(billion_laughs, True)
+
+ @staticmethod
+ @expects_timeout
+ def test_quardratic_blowup():
+ parser = xml.parsers.expat.ParserCreate()
+ parser.Parse(quadratic_blowup, True)
+
+ @staticmethod
+ def test_ok_xml():
+ char_data_recv = []
+ def char_data_handler(data):
+ char_data_recv.append(data)
+
+ parser = xml.parsers.expat.ParserCreate()
+ parser.CharacterDataHandler = char_data_handler
+ parser.Parse(ok_xml, True)
+
+ assert char_data_recv == ["hello world"]
+
+ @staticmethod
+ def test_xxe():
+ # not vuln by default
+ char_data_recv = []
+ def char_data_handler(data):
+ char_data_recv.append(data)
+
+ parser = xml.parsers.expat.ParserCreate()
+ parser.CharacterDataHandler = char_data_handler
+ parser.Parse(local_xxe, True)
+
+ assert char_data_recv == []
+
+ # there might be ways to make it vuln, but I did not investigate futher.
+
+ @staticmethod
+ def test_dtd():
+ # not vuln by default
+ global hit_dtd
+ hit_dtd = False
+
+ parser = xml.parsers.expat.ParserCreate()
+ parser.Parse(dtd_retrieval, True)
+ assert hit_dtd == False
+
+ # there might be ways to make it vuln, but I did not investigate futher.
From 5a652480b1f26eba9ed9c82e338c5058330bf51a Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 17:37:25 +0100
Subject: [PATCH 51/79] Python: Annotate xml.dom tests
---
.../query-tests/Security/CWE-611/xml_dom.py | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py
index 428a2d645a1b..55762ece0be5 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py
@@ -12,25 +12,25 @@
def xml_minidom_parse():
xml_content = request.args['xml_content']
- return xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNodes
+ return xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic
@app.route("/xml_minidom_parseString")
def xml_minidom_parseString():
xml_content = request.args['xml_content']
- return xml.dom.minidom.parseString(xml_content).documentElement.childNodes
+ return xml.dom.minidom.parseString(xml_content).documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic
@app.route("/xml_pulldom_parse")
def xml_pulldom_parse():
xml_content = request.args['xml_content']
- return xml.dom.pulldom.parse(StringIO(xml_content))['START_DOCUMENT'][1].documentElement.childNodes
+ return xml.dom.pulldom.parse(StringIO(xml_content))['START_DOCUMENT'][1].documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic
@app.route("/xml_pulldom_parseString")
def xml_pulldom_parseString():
xml_content = request.args['xml_content']
- return xml.dom.pulldom.parseString(xml_content)['START_DOCUMENT'][1].documentElement.childNodes
+ return xml.dom.pulldom.parseString(xml_content)['START_DOCUMENT'][1].documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic
# With parsers
@@ -40,5 +40,4 @@ def xml_minidom_parse_xml_sax_make_parser():
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
- return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes
-
+ return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic
From 9406a972cdbf24ab8c0e5608490042ffc12b297f Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 17:52:11 +0100
Subject: [PATCH 52/79] Python: Fix vuln detection for xml.minidom with parser
arg
---
.../semmle/python/frameworks/Xml.qll | 28 +++++++------------
.../CWE-611/XmlEntityInjection.expected | 2 ++
2 files changed, 12 insertions(+), 18 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 5140915e0792..d7e27f35b0d9 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -302,19 +302,9 @@ private module Xml {
}
/**
- * Gets a call to:
- * * `xml.dom.minidom.parse`
- * * `xml.dom.pulldom.parse`
- *
- * Given the following example:
+ * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
*
- * ```py
- * xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNode
- * ```
- *
- * * `this` would be `xml.dom.minidom.parse(StringIO(xml_content), parser=parser)`.
- * * `getAnInput()`'s result would be `StringIO(xml_content)`.
- * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
+ * Both of these modules are based on SAX parsers.
*/
private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLDomParsing() {
@@ -326,15 +316,17 @@ private module Xml {
.getACall()
}
- override DataFlow::Node getAnInput() { result = this.getArg(0) }
+ override DataFlow::Node getAnInput() {
+ result in [this.getArg(0), this.getArgByName("string"), this.getArgByName("file")]
+ }
+
+ DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
- exists(XML::XMLParser xmlParser |
- xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
- )
+ this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and
+ (kind.isXxe() or kind.isDtdRetrieval())
or
- (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
- not exists(this.getArgByName("parser"))
+ (kind.isBillionLaughs() or kind.isQuadraticBlowup())
}
}
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
index bf43d01cec1e..b08e7dd727e4 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
@@ -242,6 +242,7 @@ subpaths
| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value |
| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
+| xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | xml_dom.py:39:19:39:25 | ControlFlowNode for request | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | This | xml_dom.py:39:19:39:25 | ControlFlowNode for request | user-provided value |
| xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | This | xml_etree.py:13:19:13:25 | ControlFlowNode for request | user-provided value |
| xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | This | xml_etree.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
| xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | This | xml_etree.py:25:19:25:25 | ControlFlowNode for request | user-provided value |
@@ -252,6 +253,7 @@ subpaths
| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | user-provided value |
| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
+| xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | user-provided value |
| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
From 7cda901da21b814d96c326d8499d6d9b2ca3de9f Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 19:35:33 +0100
Subject: [PATCH 53/79] Python: Add separate query for SimpleXMLRPCServer
This was a rough quick-n-dirty query, and should get some qhelp as well at some point.
---
.../Security/CWE-611/SimpleXmlRpcServer.ql | 27 +++++++++++++++++
.../semmle/python/frameworks/Xml.qll | 30 -------------------
.../CWE-611/SimpleXmlRpcServer.expected | 1 +
.../Security/CWE-611/SimpleXmlRpcServer.qlref | 1 +
.../Security/CWE-611/xmlrpc_server.py | 8 +++--
5 files changed, 34 insertions(+), 33 deletions(-)
create mode 100644 python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql
create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected
create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref
diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql
new file mode 100644
index 000000000000..0e3deebf6016
--- /dev/null
+++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql
@@ -0,0 +1,27 @@
+/**
+ * @name SimpleXMLRPCServer DoS vulnerability
+ * @description SimpleXMLRPCServer is vulnerable to DoS attacks from untrusted user input
+ * @kind path-problem
+ * @problem.severity warning
+ * @precision high
+ * @id py/simple-xml-rpc-server
+ * @tags security
+ * external/cwe/cwe-776
+ */
+
+private import python
+private import semmle.python.dataflow.new.DataFlow
+private import semmle.python.Concepts
+private import experimental.semmle.python.Concepts
+private import semmle.python.ApiGraphs
+
+from DataFlow::CallCfgNode call, string kinds
+where
+ call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and
+ kinds =
+ strictconcat(XML::XMLVulnerabilityKind kind |
+ kind.isBillionLaughs() or kind.isQuadraticBlowup()
+ |
+ kind, ", "
+ )
+select call, "SimpleXMLRPCServer is vulnerable to: " + kinds + "."
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index d7e27f35b0d9..bf481a1f2a34 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -329,34 +329,4 @@ private module Xml {
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
}
}
-
- /**
- * Gets a call to `xmlrpc.server.SimpleXMLRPCServer`.
- *
- * Given the following example:
- *
- * ```py
- * server = SimpleXMLRPCServer(("127.0.0.1", 8000))
- * server.register_function(foo, "foo")
- * server.serve_forever()
- * ```
- *
- * * `this` would be `SimpleXMLRPCServer(("127.0.0.1", 8000))`.
- * * `getAnInput()`'s result would be `foo`.
- * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
- */
- private class XMLRPCServer extends DataFlow::CallCfgNode, XML::XMLParser::Range {
- XMLRPCServer() {
- this =
- API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall()
- }
-
- override DataFlow::Node getAnInput() {
- result = this.getAMethodCall("register_function").getArg(0)
- }
-
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
- kind.isBillionLaughs() or kind.isQuadraticBlowup()
- }
- }
}
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected
new file mode 100644
index 000000000000..4a08d61c47af
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected
@@ -0,0 +1 @@
+| xmlrpc_server.py:7:10:7:48 | ControlFlowNode for SimpleXMLRPCServer() | SimpleXMLRPCServer is vulnerable to: Billion Laughs, Quadratic Blowup. |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref
new file mode 100644
index 000000000000..a0b30e6d69b8
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref
@@ -0,0 +1 @@
+experimental/Security/CWE-611/SimpleXmlRpcServer.ql
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py
index baa433c4a8ab..83c18b549b3d 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py
@@ -1,10 +1,12 @@
from xmlrpc.server import SimpleXMLRPCServer
-def foo(n):
- return n
+def foo(n: str):
+ print("foo called with arg:", n, type(n))
+ return "ok"
server = SimpleXMLRPCServer(("127.0.0.1", 8000))
server.register_function(foo, "foo")
server.serve_forever()
-# billion_laughs -> curl 127.0.0.1:8000 --data-raw ']>foo&lol9;'
+# normal: curl 127.0.0.1:8000 --data-raw 'foo42'
+# billion_laughs: curl 127.0.0.1:8000 --data-raw ']>foo&lol9;'
From 4b03f5c72400106027dd034a95079573904c1a12 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 19:38:31 +0100
Subject: [PATCH 54/79] Python: Rename xml.sax test for consistency
---
.../CWE-611/XmlEntityInjection.expected | 102 +++++++++---------
.../{xml_sax_make_parser.py => xml_sax.py} | 0
2 files changed, 51 insertions(+), 51 deletions(-)
rename python/ql/test/experimental/query-tests/Security/CWE-611/{xml_sax_make_parser.py => xml_sax.py} (100%)
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
index b08e7dd727e4..f5f85bf178b8 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
@@ -71,25 +71,25 @@ edges
| xml_etree.py:60:19:60:25 | ControlFlowNode for request | xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute |
| xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript |
| xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content |
-| xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:31:19:31:30 | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:31:19:31:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:31:19:31:45 | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:31:19:31:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() |
+| xml_sax.py:31:19:31:25 | ControlFlowNode for request | xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute |
+| xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript |
+| xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() |
+| xml_sax.py:42:19:42:25 | ControlFlowNode for request | xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute |
+| xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript |
+| xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() |
+| xml_sax.py:63:19:63:25 | ControlFlowNode for request | xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute |
+| xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript |
+| xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() |
+| xml_sax.py:75:19:75:25 | ControlFlowNode for request | xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute |
+| xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript |
+| xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() |
+| xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute |
+| xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript |
+| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() |
+| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() |
+| xml_sax.py:98:19:98:25 | ControlFlowNode for request | xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute |
+| xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript |
+| xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() |
| xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute |
| xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript |
| xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content |
@@ -193,31 +193,31 @@ nodes
| xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax_make_parser.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
+| xml_sax.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
+| xml_sax.py:42:19:42:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
+| xml_sax.py:63:19:63:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
+| xml_sax.py:75:19:75:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
+| xml_sax.py:86:19:86:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
+| xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
+| xml_sax.py:98:19:98:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
| xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
@@ -250,11 +250,11 @@ subpaths
| xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | This | xml_etree.py:39:19:39:25 | ControlFlowNode for request | user-provided value |
| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value |
| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
+| xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax.py:31:19:31:25 | ControlFlowNode for request | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
+| xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax.py:42:19:42:25 | ControlFlowNode for request | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax.py:42:19:42:25 | ControlFlowNode for request | user-provided value |
+| xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax.py:63:19:63:25 | ControlFlowNode for request | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
+| xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | xml_sax.py:75:19:75:25 | ControlFlowNode for request | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | This | xml_sax.py:75:19:75:25 | ControlFlowNode for request | user-provided value |
+| xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
+| xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax.py:98:19:98:25 | ControlFlowNode for request | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py
similarity index 100%
rename from python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py
rename to python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py
From faebaee141c10ace600153d84a2d8d1952beb73a Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 20:36:16 +0100
Subject: [PATCH 55/79] Python: Use concept tests for XML Parsing
I was loosing my mind from looking through those .expected files
Just going to take it one file at time, to make reviewing easier
---
.../XML/ExperimentalXmlConceptsTests.expected | 0
.../XML/ExperimentalXmlConceptsTests.ql | 33 +++++++
.../frameworks/XML/lxml_etree.py | 40 +++++++++
.../Security/CWE-611/lxml_etree.py | 90 -------------------
4 files changed, 73 insertions(+), 90 deletions(-)
create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected
create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql
create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql
new file mode 100644
index 000000000000..8ca33765d64f
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql
@@ -0,0 +1,33 @@
+import python
+import experimental.semmle.python.Concepts
+import experimental.semmle.python.frameworks.Xml
+import semmle.python.dataflow.new.DataFlow
+import TestUtilities.InlineExpectationsTest
+private import semmle.python.dataflow.new.internal.PrintNode
+
+class XmlParsingTest extends InlineExpectationsTest {
+ XmlParsingTest() { this = "XmlParsingTest" }
+
+ override string getARelevantTag() { result in ["input", "vuln"] }
+
+ override predicate hasActualResult(Location location, string element, string tag, string value) {
+ exists(location.getFile().getRelativePath()) and
+ exists(XML::XMLParsing parsing |
+ exists(DataFlow::Node input |
+ input = parsing.getAnInput() and
+ location = input.getLocation() and
+ element = input.toString() and
+ value = prettyNodeForInlineTest(input) and
+ tag = "input"
+ )
+ or
+ exists(XML::XMLVulnerabilityKind kind |
+ parsing.vulnerable(kind) and
+ location = parsing.getLocation() and
+ element = parsing.toString() and
+ value = "'" + kind + "'" and
+ tag = "vuln"
+ )
+ )
+ }
+}
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
new file mode 100644
index 000000000000..64fa51175837
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
@@ -0,0 +1,40 @@
+from io import StringIO
+import lxml.etree
+
+x = "some xml"
+
+# different parsing methods
+lxml.etree.fromstring(x) # $ input=x vuln='XXE'
+
+lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE'
+
+lxml.etree.XML(x) # $ input=x vuln='XXE'
+
+lxml.etree.parse(StringIO(x)).getroot() # $ input=StringIO(..) vuln='XXE'
+
+# With default parsers (nothing changed)
+parser = lxml.etree.XMLParser()
+lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
+
+parser = lxml.etree.get_default_parser()
+lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
+
+# XXE-safe
+parser = lxml.etree.XMLParser(resolve_entities=False)
+lxml.etree.fromstring(x, parser=parser) # $ input=x
+
+# XXE-vuln
+parser = lxml.etree.XMLParser(resolve_entities=True)
+lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
+
+# Billion laughs vuln (also XXE)
+parser = lxml.etree.XMLParser(huge_tree=True)
+lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' vuln='XXE'
+
+# Billion laughs, but not XXE
+parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
+lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+
+# DTD retrival vuln (also XXE)
+parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
+lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='DTD retrieval' vuln='XXE'
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py
deleted file mode 100644
index 2c2712098503..000000000000
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py
+++ /dev/null
@@ -1,90 +0,0 @@
-from flask import request, Flask
-from io import StringIO, BytesIO
-import lxml.etree
-
-app = Flask(__name__)
-
-# Parsing
-
-@app.route("/lxml_etree_fromstring")
-def lxml_etree_fromstring():
- xml_content = request.args['xml_content']
-
- return lxml.etree.fromstring(xml_content).text # NOT OK for XXE
-
-@app.route("/lxml_etree_fromstringlist")
-def lxml_etree_fromstringlist():
- xml_content = request.args['xml_content']
-
- return lxml.etree.fromstringlist([xml_content]).text # NOT OK for XXE
-
-@app.route("/lxml_etree_XML")
-def lxml_etree_XML():
- xml_content = request.args['xml_content']
-
- return lxml.etree.XML(xml_content).text # NOT OK for XXE
-
-@app.route("/lxml_etree_parse")
-def lxml_etree_parse():
- xml_content = request.args['xml_content']
-
- return lxml.etree.parse(StringIO(xml_content)).getroot().text # NOT OK for XXE
-
-# With parsers - Default
-
-@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser")
-def lxml_parser():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.XMLParser()
- return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE
-
-@app.route("/lxml_etree_fromstring-lxml.etree.get_default_parser")
-def lxml_parser():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.get_default_parser()
- return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE
-
-# With parsers - With options
-
-# XXE-safe
-@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
-def lxml_parser():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.XMLParser(resolve_entities=False)
- return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE
-
-# XXE-vuln
-@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
-def lxml_parser():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.XMLParser(resolve_entities=True)
- return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE
-
-# Billion laughs and quadratic blowup (huge_tree)
-
-@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
-def lxml_parser():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
- return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE, NOT OK for billion laughs/quadratic
-
-@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
-def lxml_parser():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.XMLParser(huge_tree=True)
- return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE, NOT OK for billion laughs/quadratic
-
-# DTD retrival
-
-@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
-def lxml_parser():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.XMLParser(resolve_entities=False, load_dtd=True, no_network=False)
- return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for DTD, OK for rest
From a7134cac2eb339c76f3f299c77b927742e5e0320 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 20:39:56 +0100
Subject: [PATCH 56/79] Python: Port xml.dom tests
---
.../library-tests/frameworks/XML/xml_dom.py | 19 ++++++++
.../query-tests/Security/CWE-611/xml_dom.py | 43 -------------------
2 files changed, 19 insertions(+), 43 deletions(-)
create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py
delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py
new file mode 100644
index 000000000000..ade6ece910d8
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py
@@ -0,0 +1,19 @@
+from io import StringIO
+import xml.dom.minidom
+import xml.dom.pulldom
+import xml.sax
+
+x = "some xml"
+
+# minidom
+xml.dom.minidom.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.dom.minidom.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+
+# pulldom
+xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+
+# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/)
+parser = xml.sax.make_parser()
+parser.setFeature(xml.sax.handler.feature_external_ges, True)
+xml.dom.minidom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py
deleted file mode 100644
index 55762ece0be5..000000000000
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from flask import request, Flask
-from io import StringIO, BytesIO
-import xml.dom.minidom
-import xml.dom.pulldom
-import xml.sax
-
-app = Flask(__name__)
-
-# Parsing
-
-@app.route("/xml_minidom_parse")
-def xml_minidom_parse():
- xml_content = request.args['xml_content']
-
- return xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic
-
-@app.route("/xml_minidom_parseString")
-def xml_minidom_parseString():
- xml_content = request.args['xml_content']
-
- return xml.dom.minidom.parseString(xml_content).documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic
-
-@app.route("/xml_pulldom_parse")
-def xml_pulldom_parse():
- xml_content = request.args['xml_content']
-
- return xml.dom.pulldom.parse(StringIO(xml_content))['START_DOCUMENT'][1].documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic
-
-@app.route("/xml_pulldom_parseString")
-def xml_pulldom_parseString():
- xml_content = request.args['xml_content']
-
- return xml.dom.pulldom.parseString(xml_content)['START_DOCUMENT'][1].documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic
-
-# With parsers
-
-@app.route("/xml_minidom_parse_xml_sax_make_parser")
-def xml_minidom_parse_xml_sax_make_parser():
- xml_content = request.args['xml_content']
-
- parser = xml.sax.make_parser()
- parser.setFeature(xml.sax.handler.feature_external_ges, True)
- return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic
From 5fb4c4d1524f8a6bae5a8a3ff1c40b35b66f0998 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 20:50:45 +0100
Subject: [PATCH 57/79] Python: Port xml.etree tests
---
.../library-tests/frameworks/XML/xml_etree.py | 19 ++++++
.../Security/CWE-611/dont_extract/PoC.py | 17 +++++
.../query-tests/Security/CWE-611/xml_etree.py | 64 -------------------
3 files changed, 36 insertions(+), 64 deletions(-)
create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
new file mode 100644
index 000000000000..e2b81b3ad529
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
@@ -0,0 +1,19 @@
+from io import StringIO
+import xml.etree.ElementTree
+
+x = "some xml"
+
+# Parsing in different ways
+xml.etree.ElementTree.fromstring(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.etree.ElementTree.fromstringlist(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.etree.ElementTree.XML(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.etree.ElementTree.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+
+# With parsers (no options available to disable/enable security features)
+parser = xml.etree.ElementTree.XMLParser()
+xml.etree.ElementTree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+
+# note: it's technically possible to use the thing wrapper func `fromstring` with an
+# `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it
+# seems very unlikely that anyone would do this, so we have intentionally not added any
+# tests for this.
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py
index 862346de3e8a..b38ff9889e9d 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py
@@ -250,6 +250,23 @@ def test_ok_xml():
assert root.tag == "test"
assert root.text == "hello world"
+ @staticmethod
+ def test_ok_xml_sax_parser():
+ # you _can_ pass a SAX parser to xml.etree... but it doesn't give you the output :|
+ parser = xml.sax.make_parser()
+ root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser)
+ assert root == None
+
+ @staticmethod
+ def test_ok_xml_lxml_parser():
+ # this is technically possible, since parsers follow the same API, and the
+ # `fromstring` function is just a thin wrapper... seems very unlikely that
+ # anyone would do this though :|
+ parser = lxml.etree.XMLParser()
+ root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser)
+ assert root.tag == "test"
+ assert root.text == "hello world"
+
@staticmethod
def test_xxe_not_possible():
parser = xml.etree.ElementTree.XMLParser()
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py
deleted file mode 100644
index b9c980045e2a..000000000000
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from flask import request, Flask
-from io import StringIO, BytesIO
-import xml.etree
-import xml.etree.ElementTree
-import lxml.etree
-
-app = Flask(__name__)
-
-# Parsing
-
-@app.route("/xml_etree_fromstring")
-def xml_etree_fromstring():
- xml_content = request.args['xml_content']
-
- return xml.etree.ElementTree.fromstring(xml_content).text
-
-@app.route("/xml_etree_fromstringlist")
-def xml_etree_fromstringlist():
- xml_content = request.args['xml_content']
-
- return xml.etree.ElementTree.fromstringlist(xml_content).text
-
-@app.route("/xml_etree_XML")
-def xml_etree_XML():
- xml_content = request.args['xml_content']
-
- return xml.etree.ElementTree.XML(xml_content).text
-
-@app.route("/xml_etree_parse")
-def xml_etree_parse():
- xml_content = request.args['xml_content']
-
- return xml.etree.ElementTree.parse(StringIO(xml_content)).getroot().text
-
-# With parsers
-
-@app.route("/xml_etree_fromstring-xml_etree_XMLParser")
-def xml_parser_1():
- xml_content = request.args['xml_content']
-
- parser = xml.etree.ElementTree.XMLParser()
- return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
-
-@app.route("/xml_etree_fromstring-lxml_etree_XMLParser")
-def xml_parser_2():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.XMLParser()
- return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
-
-@app.route("/xml_etree_fromstring-lxml_get_default_parser")
-def xml_parser_3():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.get_default_parser()
- return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
-
-@app.route("/xml_etree_fromstring-lxml_get_default_parser")
-def xml_parser_4():
- xml_content = request.args['xml_content']
-
- parser = xml.sax.make_parser()
- parser.setFeature(xml.sax.handler.feature_external_ges, True)
- return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
\ No newline at end of file
From 0b12d918171ee00b8a40f576d75c65b38193ebf0 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 20:57:04 +0100
Subject: [PATCH 58/79] Python: Port xml.sax tests
---
.../library-tests/frameworks/XML/xml_sax.py | 47 ++++++++
.../query-tests/Security/CWE-611/xml_sax.py | 104 ------------------
2 files changed, 47 insertions(+), 104 deletions(-)
create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py
delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py
new file mode 100644
index 000000000000..47f6600b153e
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py
@@ -0,0 +1,47 @@
+from io import StringIO
+import xml.sax
+
+x = "some xml"
+
+class MainHandler(xml.sax.ContentHandler):
+ def __init__(self):
+ self._result = []
+
+ def characters(self, data):
+ self._result.append(data)
+
+ def parse(self, f):
+ xml.sax.parse(f, self) # $ MISSING: input=f vuln='Billion Laughs' vuln='Quadratic Blowup'
+ self._result
+
+MainHandler().parse(StringIO(x))
+
+parser = xml.sax.make_parser()
+parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+
+# You can make it vuln to both XXE and DTD retrieval by setting this flag
+# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
+parser = xml.sax.make_parser()
+parser.setFeature(xml.sax.handler.feature_external_ges, True)
+parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
+
+parser = xml.sax.make_parser()
+parser.setFeature(xml.sax.handler.feature_external_ges, False)
+parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+
+# Forward Type Tracking test
+
+def func(cond):
+ parser = xml.sax.make_parser()
+ if cond:
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
+ else:
+ parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+
+# make it vuln, then making it safe
+# a bit of an edge-case, but is nice to be able to handle.
+parser = xml.sax.make_parser()
+parser.setFeature(xml.sax.handler.feature_external_ges, True)
+parser.setFeature(xml.sax.handler.feature_external_ges, False)
+parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py
deleted file mode 100644
index e95abf753796..000000000000
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py
+++ /dev/null
@@ -1,104 +0,0 @@
-from flask import request, Flask
-from io import StringIO
-import xml.sax
-
-app = Flask(__name__)
-
-
-class MainHandler(xml.sax.ContentHandler):
- def __init__(self):
- self._result = []
-
- def characters(self, data):
- self._result.append(data)
-
- def parse(self, f):
- xml.sax.parse(f, self) # OK for XXE/DTD, NOT OK for billion laughs/quadratic
- return self._result
-
-# GOOD
-
-
-@app.route("/MainHandler")
-def mainHandler():
- xml_content = request.args['xml_content']
-
- return MainHandler().parse(StringIO(xml_content))
-
-
-@app.route("/xml.sax.make_parser()+MainHandler")
-def xml_makeparser_MainHandler():
- xml_content = request.args['xml_content']
-
- GoodHandler = MainHandler()
- parser = xml.sax.make_parser()
- parser.setContentHandler(GoodHandler)
- parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic
- return GoodHandler._result
-
-
-@app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_False")
-def xml_makeparser_MainHandler_entitiesFalse():
- xml_content = request.args['xml_content']
-
- GoodHandler = MainHandler()
- parser = xml.sax.make_parser()
- parser.setContentHandler(GoodHandler)
- # https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
- parser.setFeature(xml.sax.handler.feature_external_ges, False)
- parser.parse(StringIO(xml_content)) # # OK for XXE/DTD, NOT OK for billion laughs/quadratic
- return GoodHandler._result
-
-@app.route("not-user-controlled")
-def not_user_controlled():
- parser = xml.sax.make_parser()
- parser.setFeature(xml.sax.handler.feature_external_ges, True)
- parser.parse("/not-user-controlled/default_config.xml") # OK
- return
-
-# BAD
-
-@app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_True")
-def xml_makeparser_MainHandler_entitiesTrue():
- xml_content = request.args['xml_content']
-
- BadHandler = MainHandler()
- parser = xml.sax.make_parser()
- parser.setContentHandler(BadHandler)
- parser.setFeature(xml.sax.handler.feature_external_ges, True)
- parser.parse(StringIO(xml_content)) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic
- return BadHandler._result
-
-
-@app.route("/xml.sax.make_parser()+xml.dom.minidom.parse-xml.sax.handler.feature_external_ges_True")
-def xml_makeparser_minidom_entitiesTrue():
- xml_content = request.args['xml_content']
-
- parser = xml.sax.make_parser()
- parser.setFeature(xml.sax.handler.feature_external_ges, True)
- doc = xml.dom.minidom.parse(StringIO(xml_content), parser=parser) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic
- return doc.documentElement.childNodes
-
-# Forward Type Tracking test
-
-@app.route("forward_tracking1")
-def forward_tracking1(action):
- xml_content = request.args['xml_content']
-
- parser = xml.sax.make_parser()
- if action == 'load-config':
- parser.setFeature(xml.sax.handler.feature_external_ges, True)
- parser.parse(StringIO(xml_content)) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic
- else:
- parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic
- return
-
-@app.route("forward_tracking2")
-def forward_tracking2(action):
- xml_content = request.args['xml_content']
-
- parser = xml.sax.make_parser()
- parser.setFeature(xml.sax.handler.feature_external_ges, True)
- parser.setFeature(xml.sax.handler.feature_external_ges, False)
- parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic
- return
From c739ae40b60ef5644d0c0e9c1a8238742f2207e2 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 20:59:00 +0100
Subject: [PATCH 59/79] Python: Port `xmltodict` tests
---
.../library-tests/frameworks/XML/xmltodict.py | 6 ++++++
.../query-tests/Security/CWE-611/xml_to_dict.py | 17 -----------------
2 files changed, 6 insertions(+), 17 deletions(-)
create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py
delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py
new file mode 100644
index 000000000000..ee0b38719125
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py
@@ -0,0 +1,6 @@
+import xmltodict
+
+x = "some xml"
+
+xmltodict.parse(x) # $ input=x
+xmltodict.parse(x, disable_entities=False) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py
deleted file mode 100644
index 8f43d2e1cc1f..000000000000
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from flask import request, Flask
-from io import StringIO, BytesIO
-import xmltodict
-
-app = Flask(__name__)
-
-@app.route("/xmltodict.parse")
-def xmltodict_parse():
- xml_content = request.args['xml_content']
-
- return xmltodict.parse(xml_content) # OK
-
-@app.route("/xmltodict.parse2")
-def xmltodict_parse2():
- xml_content = request.args['xml_content']
-
- return xmltodict.parse(xml_content, disable_entities=False) # NOT OK for billion laughs/quadratic
From 2451123c6712e566fe0256f9349952e8ef738cd2 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 20:59:45 +0100
Subject: [PATCH 60/79] Python: Move XML PoC to new test dir
---
.../dont_extract => library-tests/frameworks/XML/poc}/PoC.py | 0
.../dont_extract => library-tests/frameworks/XML/poc}/flag | 0
.../library-tests/frameworks/XML/poc/this-dir-is-not-extracted | 1 +
3 files changed, 1 insertion(+)
rename python/ql/test/experimental/{query-tests/Security/CWE-611/dont_extract => library-tests/frameworks/XML/poc}/PoC.py (100%)
rename python/ql/test/experimental/{query-tests/Security/CWE-611/dont_extract => library-tests/frameworks/XML/poc}/flag (100%)
create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py
similarity index 100%
rename from python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py
rename to python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag b/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag
similarity index 100%
rename from python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag
rename to python/ql/test/experimental/library-tests/frameworks/XML/poc/flag
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted b/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted
new file mode 100644
index 000000000000..b1925ade1d3a
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted
@@ -0,0 +1 @@
+just FYI
From 32787939721e9478e4075b6c7d2f10a96b2a2cb1 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 21:05:44 +0100
Subject: [PATCH 61/79] Python: Handle more functions and kw-args
---
.../semmle/python/frameworks/Xml.qll | 81 ++++++++++++++++---
.../frameworks/XML/lxml_etree.py | 9 ++-
.../library-tests/frameworks/XML/xml_dom.py | 12 +++
.../library-tests/frameworks/XML/xml_etree.py | 16 +++-
.../library-tests/frameworks/XML/xml_sax.py | 10 +--
.../library-tests/frameworks/XML/xmltodict.py | 2 +
6 files changed, 114 insertions(+), 16 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index bf481a1f2a34..b0e7592c3936 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -53,11 +53,21 @@ private module Xml {
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
- .getMember(["fromstring", "fromstringlist", "XML", "parse"])
+ .getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"])
.getACall()
}
- override DataFlow::Node getAnInput() { result = this.getArg(0) }
+ override DataFlow::Node getAnInput() {
+ result in [
+ this.getArg(0),
+ // fromstring / XML / XMLID
+ this.getArgByName("text"),
+ // fromstringlist
+ this.getArgByName("sequence"),
+ // parse / iterparse
+ this.getArgByName("source"),
+ ]
+ }
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
not exists(this.getArgByName("parser")) and
@@ -163,8 +173,8 @@ private module Xml {
* parsed_xml = BadHandler._result
* ```
*/
- private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
- XMLSaxParsing() {
+ private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
+ XMLSaxInstanceParsing() {
this =
API::moduleImport("xml")
.getMember("sax")
@@ -174,7 +184,40 @@ private module Xml {
.getACall()
}
- override DataFlow::Node getAnInput() { result = this.getArg(0) }
+ override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
+
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ // always vuln to these
+ (kind.isBillionLaughs() or kind.isQuadraticBlowup())
+ or
+ // can be vuln to other things if features has been turned on
+ this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
+ (kind.isXxe() or kind.isDtdRetrieval())
+ }
+ }
+
+ /**
+ * A call to either `parse` or `parseString` from `xml.sax` module.
+ *
+ * See:
+ * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse
+ * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString
+ */
+ private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
+ XMLSaxParsing() {
+ this =
+ API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall()
+ }
+
+ override DataFlow::Node getAnInput() {
+ result in [
+ this.getArg(0),
+ // parseString
+ this.getArgByName("string"),
+ // parse
+ this.getArgByName("source"),
+ ]
+ }
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
// always vuln to these
@@ -262,11 +305,21 @@ private module Xml {
this =
API::moduleImport("lxml")
.getMember("etree")
- .getMember(["fromstring", "fromstringlist", "XML", "parse"])
+ .getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"])
.getACall()
}
- override DataFlow::Node getAnInput() { result = this.getArg(0) }
+ override DataFlow::Node getAnInput() {
+ result in [
+ this.getArg(0),
+ // fromstring / XML
+ this.getArgByName("text"),
+ // fromstringlist
+ this.getArgByName("strings"),
+ // parse / parseid
+ this.getArgByName("source"),
+ ]
+ }
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
exists(XML::XMLParser xmlParser |
@@ -293,7 +346,9 @@ private module Xml {
private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() }
- override DataFlow::Node getAnInput() { result = this.getArg(0) }
+ override DataFlow::Node getAnInput() {
+ result in [this.getArg(0), this.getArgByName("xml_input")]
+ }
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
@@ -317,7 +372,15 @@ private module Xml {
}
override DataFlow::Node getAnInput() {
- result in [this.getArg(0), this.getArgByName("string"), this.getArgByName("file")]
+ result in [
+ this.getArg(0),
+ // parseString
+ this.getArgByName("string"),
+ // minidom.parse
+ this.getArgByName("file"),
+ // pulldom.parse
+ this.getArgByName("stream_or_string"),
+ ]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
index 64fa51175837..3e6e6fb08e7f 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
@@ -5,12 +5,19 @@
# different parsing methods
lxml.etree.fromstring(x) # $ input=x vuln='XXE'
+lxml.etree.fromstring(text=x) # $ input=x vuln='XXE'
lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE'
+lxml.etree.fromstringlist(strings=[x]) # $ input=List vuln='XXE'
lxml.etree.XML(x) # $ input=x vuln='XXE'
+lxml.etree.XML(text=x) # $ input=x vuln='XXE'
-lxml.etree.parse(StringIO(x)).getroot() # $ input=StringIO(..) vuln='XXE'
+lxml.etree.parse(StringIO(x)) # $ input=StringIO(..) vuln='XXE'
+lxml.etree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE'
+
+lxml.etree.parseid(StringIO(x)) # $ input=StringIO(..) vuln='XXE'
+lxml.etree.parseid(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE'
# With default parsers (nothing changed)
parser = lxml.etree.XMLParser()
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py
index ade6ece910d8..7dce29fc7b9d 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py
@@ -7,13 +7,25 @@
# minidom
xml.dom.minidom.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.dom.minidom.parse(file=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+
xml.dom.minidom.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.dom.minidom.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+
# pulldom
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+
xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+
# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/)
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
+xml.dom.minidom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
+
+xml.dom.pulldom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
+xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
index e2b81b3ad529..23ac3784cbc3 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
@@ -5,9 +5,23 @@
# Parsing in different ways
xml.etree.ElementTree.fromstring(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
-xml.etree.ElementTree.fromstringlist(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.etree.ElementTree.fromstring(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+
+xml.etree.ElementTree.fromstringlist([x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup'
+
xml.etree.ElementTree.XML(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.etree.ElementTree.XML(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+
+xml.etree.ElementTree.XMLID(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.etree.ElementTree.XMLID(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+
xml.etree.ElementTree.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.etree.ElementTree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+
+xml.etree.ElementTree.iterparse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+
# With parsers (no options available to disable/enable security features)
parser = xml.etree.ElementTree.XMLParser()
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py
index 47f6600b153e..89bbec3f1f57 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py
@@ -10,14 +10,15 @@ def __init__(self):
def characters(self, data):
self._result.append(data)
- def parse(self, f):
- xml.sax.parse(f, self) # $ MISSING: input=f vuln='Billion Laughs' vuln='Quadratic Blowup'
- self._result
+xml.sax.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.sax.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
-MainHandler().parse(StringIO(x))
+xml.sax.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+xml.sax.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser = xml.sax.make_parser()
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+parser.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
# You can make it vuln to both XXE and DTD retrieval by setting this flag
# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
@@ -30,7 +31,6 @@ def parse(self, f):
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
# Forward Type Tracking test
-
def func(cond):
parser = xml.sax.make_parser()
if cond:
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py
index ee0b38719125..473e51c9fe66 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py
@@ -3,4 +3,6 @@
x = "some xml"
xmltodict.parse(x) # $ input=x
+xmltodict.parse(xml_input=x) # $ input=x
+
xmltodict.parse(x, disable_entities=False) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
From f72f673e7ee82e5fd4156d2d6a5a4e8144d371d7 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 21:09:29 +0100
Subject: [PATCH 62/79] Python: Update `XmlEntityInjection.expected`
I had forgotten about this, but better late than never... also added a
small representative test
---
.../CWE-611/XmlEntityInjection.expected | 279 ++----------------
.../query-tests/Security/CWE-611/test.py | 30 ++
2 files changed, 53 insertions(+), 256 deletions(-)
create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/test.py
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
index f5f85bf178b8..25594b4ddaaf 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected
@@ -1,260 +1,27 @@
edges
-| lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:11:19:11:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:11:19:11:30 | ControlFlowNode for Attribute | lxml_etree.py:11:19:11:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:11:19:11:45 | ControlFlowNode for Subscript | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content |
-| lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:17:19:17:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:17:19:17:30 | ControlFlowNode for Attribute | lxml_etree.py:17:19:17:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:17:19:17:45 | ControlFlowNode for Subscript | lxml_etree.py:19:38:19:50 | ControlFlowNode for List |
-| lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:23:19:23:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:23:19:23:30 | ControlFlowNode for Attribute | lxml_etree.py:23:19:23:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:23:19:23:45 | ControlFlowNode for Subscript | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content |
-| lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:29:19:29:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:29:19:29:30 | ControlFlowNode for Attribute | lxml_etree.py:29:19:29:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:29:19:29:45 | ControlFlowNode for Subscript | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() |
-| lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:37:19:37:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:37:19:37:30 | ControlFlowNode for Attribute | lxml_etree.py:37:19:37:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:37:19:37:45 | ControlFlowNode for Subscript | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content |
-| lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:44:19:44:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:44:19:44:30 | ControlFlowNode for Attribute | lxml_etree.py:44:19:44:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:44:19:44:45 | ControlFlowNode for Subscript | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content |
-| lxml_etree.py:54:19:54:25 | ControlFlowNode for request | lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content |
-| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content |
-| lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content |
-| lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content |
-| lxml_etree.py:87:19:87:25 | ControlFlowNode for request | lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute |
-| lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript |
-| lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content |
-| xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute |
-| xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript |
-| xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() |
-| xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:19:19:19:30 | ControlFlowNode for Attribute |
-| xml_dom.py:19:19:19:30 | ControlFlowNode for Attribute | xml_dom.py:19:19:19:45 | ControlFlowNode for Subscript |
-| xml_dom.py:19:19:19:45 | ControlFlowNode for Subscript | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content |
-| xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:25:19:25:30 | ControlFlowNode for Attribute |
-| xml_dom.py:25:19:25:30 | ControlFlowNode for Attribute | xml_dom.py:25:19:25:45 | ControlFlowNode for Subscript |
-| xml_dom.py:25:19:25:45 | ControlFlowNode for Subscript | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() |
-| xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:31:19:31:30 | ControlFlowNode for Attribute |
-| xml_dom.py:31:19:31:30 | ControlFlowNode for Attribute | xml_dom.py:31:19:31:45 | ControlFlowNode for Subscript |
-| xml_dom.py:31:19:31:45 | ControlFlowNode for Subscript | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content |
-| xml_dom.py:39:19:39:25 | ControlFlowNode for request | xml_dom.py:39:19:39:30 | ControlFlowNode for Attribute |
-| xml_dom.py:39:19:39:30 | ControlFlowNode for Attribute | xml_dom.py:39:19:39:45 | ControlFlowNode for Subscript |
-| xml_dom.py:39:19:39:45 | ControlFlowNode for Subscript | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() |
-| xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:13:19:13:30 | ControlFlowNode for Attribute |
-| xml_etree.py:13:19:13:30 | ControlFlowNode for Attribute | xml_etree.py:13:19:13:45 | ControlFlowNode for Subscript |
-| xml_etree.py:13:19:13:45 | ControlFlowNode for Subscript | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content |
-| xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:19:19:19:30 | ControlFlowNode for Attribute |
-| xml_etree.py:19:19:19:30 | ControlFlowNode for Attribute | xml_etree.py:19:19:19:45 | ControlFlowNode for Subscript |
-| xml_etree.py:19:19:19:45 | ControlFlowNode for Subscript | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content |
-| xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:25:19:25:30 | ControlFlowNode for Attribute |
-| xml_etree.py:25:19:25:30 | ControlFlowNode for Attribute | xml_etree.py:25:19:25:45 | ControlFlowNode for Subscript |
-| xml_etree.py:25:19:25:45 | ControlFlowNode for Subscript | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content |
-| xml_etree.py:31:19:31:25 | ControlFlowNode for request | xml_etree.py:31:19:31:30 | ControlFlowNode for Attribute |
-| xml_etree.py:31:19:31:30 | ControlFlowNode for Attribute | xml_etree.py:31:19:31:45 | ControlFlowNode for Subscript |
-| xml_etree.py:31:19:31:45 | ControlFlowNode for Subscript | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() |
-| xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:39:19:39:30 | ControlFlowNode for Attribute |
-| xml_etree.py:39:19:39:30 | ControlFlowNode for Attribute | xml_etree.py:39:19:39:45 | ControlFlowNode for Subscript |
-| xml_etree.py:39:19:39:45 | ControlFlowNode for Subscript | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content |
-| xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:46:19:46:30 | ControlFlowNode for Attribute |
-| xml_etree.py:46:19:46:30 | ControlFlowNode for Attribute | xml_etree.py:46:19:46:45 | ControlFlowNode for Subscript |
-| xml_etree.py:46:19:46:45 | ControlFlowNode for Subscript | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content |
-| xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:53:19:53:30 | ControlFlowNode for Attribute |
-| xml_etree.py:53:19:53:30 | ControlFlowNode for Attribute | xml_etree.py:53:19:53:45 | ControlFlowNode for Subscript |
-| xml_etree.py:53:19:53:45 | ControlFlowNode for Subscript | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content |
-| xml_etree.py:60:19:60:25 | ControlFlowNode for request | xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute |
-| xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript |
-| xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content |
-| xml_sax.py:31:19:31:25 | ControlFlowNode for request | xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute |
-| xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript |
-| xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() |
-| xml_sax.py:42:19:42:25 | ControlFlowNode for request | xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute |
-| xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript |
-| xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() |
-| xml_sax.py:63:19:63:25 | ControlFlowNode for request | xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute |
-| xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript |
-| xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() |
-| xml_sax.py:75:19:75:25 | ControlFlowNode for request | xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute |
-| xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript |
-| xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() |
-| xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute |
-| xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript |
-| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() |
-| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() |
-| xml_sax.py:98:19:98:25 | ControlFlowNode for request | xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute |
-| xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript |
-| xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() |
-| xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute |
-| xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript |
-| xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content |
-| xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:15:19:15:30 | ControlFlowNode for Attribute |
-| xml_to_dict.py:15:19:15:30 | ControlFlowNode for Attribute | xml_to_dict.py:15:19:15:45 | ControlFlowNode for Subscript |
-| xml_to_dict.py:15:19:15:45 | ControlFlowNode for Subscript | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content |
+| test.py:8:19:8:25 | ControlFlowNode for request | test.py:8:19:8:30 | ControlFlowNode for Attribute |
+| test.py:8:19:8:30 | ControlFlowNode for Attribute | test.py:8:19:8:45 | ControlFlowNode for Subscript |
+| test.py:8:19:8:45 | ControlFlowNode for Subscript | test.py:9:34:9:44 | ControlFlowNode for xml_content |
+| test.py:13:19:13:25 | ControlFlowNode for request | test.py:13:19:13:30 | ControlFlowNode for Attribute |
+| test.py:13:19:13:30 | ControlFlowNode for Attribute | test.py:13:19:13:45 | ControlFlowNode for Subscript |
+| test.py:13:19:13:45 | ControlFlowNode for Subscript | test.py:15:34:15:44 | ControlFlowNode for xml_content |
+| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute |
+| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript |
+| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content |
nodes
-| lxml_etree.py:11:19:11:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:11:19:11:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:11:19:11:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| lxml_etree.py:17:19:17:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:17:19:17:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:17:19:17:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
-| lxml_etree.py:23:19:23:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:23:19:23:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:23:19:23:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| lxml_etree.py:29:19:29:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:29:19:29:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:29:19:29:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| lxml_etree.py:37:19:37:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:37:19:37:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:37:19:37:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| lxml_etree.py:44:19:44:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:44:19:44:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:44:19:44:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| lxml_etree.py:54:19:54:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| lxml_etree.py:71:19:71:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| lxml_etree.py:78:19:78:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| lxml_etree.py:87:19:87:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| xml_dom.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_dom.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_dom.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_dom.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| xml_dom.py:25:19:25:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_dom.py:25:19:25:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_dom.py:25:19:25:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_dom.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_dom.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_dom.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| xml_dom.py:39:19:39:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_dom.py:39:19:39:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_dom.py:39:19:39:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_etree.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_etree.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_etree.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| xml_etree.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_etree.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_etree.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| xml_etree.py:25:19:25:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_etree.py:25:19:25:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_etree.py:25:19:25:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| xml_etree.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_etree.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_etree.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_etree.py:39:19:39:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_etree.py:39:19:39:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_etree.py:39:19:39:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| xml_etree.py:46:19:46:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_etree.py:46:19:46:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_etree.py:46:19:46:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| xml_etree.py:53:19:53:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_etree.py:53:19:53:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_etree.py:53:19:53:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| xml_etree.py:60:19:60:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| xml_sax.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax.py:42:19:42:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax.py:63:19:63:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax.py:75:19:75:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax.py:86:19:86:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_sax.py:98:19:98:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() |
-| xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
-| xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| xml_to_dict.py:15:19:15:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| xml_to_dict.py:15:19:15:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
+| test.py:8:19:8:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| test.py:8:19:8:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| test.py:8:19:8:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| test.py:9:34:9:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
+| test.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| test.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| test.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| test.py:15:34:15:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
+| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
subpaths
#select
-| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | This | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | This | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | This | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | This | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | This | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | user-provided value |
-| lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | lxml_etree.py:87:19:87:25 | ControlFlowNode for request | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: DTD retrieval. | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | This | lxml_etree.py:87:19:87:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
-| xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | xml_dom.py:39:19:39:25 | ControlFlowNode for request | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | This | xml_dom.py:39:19:39:25 | ControlFlowNode for request | user-provided value |
-| xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | This | xml_etree.py:13:19:13:25 | ControlFlowNode for request | user-provided value |
-| xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | This | xml_etree.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
-| xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | This | xml_etree.py:25:19:25:25 | ControlFlowNode for request | user-provided value |
-| xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | xml_etree.py:31:19:31:25 | ControlFlowNode for request | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | This | xml_etree.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
-| xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | This | xml_etree.py:39:19:39:25 | ControlFlowNode for request | user-provided value |
-| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value |
-| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value |
-| xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax.py:31:19:31:25 | ControlFlowNode for request | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
-| xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax.py:42:19:42:25 | ControlFlowNode for request | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax.py:42:19:42:25 | ControlFlowNode for request | user-provided value |
-| xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax.py:63:19:63:25 | ControlFlowNode for request | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
-| xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | xml_sax.py:75:19:75:25 | ControlFlowNode for request | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | This | xml_sax.py:75:19:75:25 | ControlFlowNode for request | user-provided value |
-| xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
-| xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax.py:98:19:98:25 | ControlFlowNode for request | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
-| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value |
+| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:8:19:8:25 | ControlFlowNode for request | test.py:9:34:9:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | test.py:9:34:9:44 | ControlFlowNode for xml_content | This | test.py:8:19:8:25 | ControlFlowNode for request | user-provided value |
+| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | test.py:30:34:30:44 | ControlFlowNode for xml_content | This | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/test.py b/python/ql/test/experimental/query-tests/Security/CWE-611/test.py
new file mode 100644
index 000000000000..d9181c4cf346
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/test.py
@@ -0,0 +1,30 @@
+from flask import Flask, request
+import lxml.etree
+
+app = Flask(__name__)
+
+@app.route("/vuln-handler")
+def vuln_handler():
+ xml_content = request.args['xml_content']
+ return lxml.etree.fromstring(xml_content).text
+
+@app.route("/safe-handler")
+def safe_handler():
+ xml_content = request.args['xml_content']
+ parser = lxml.etree.XMLParser(resolve_entities=False)
+ return lxml.etree.fromstring(xml_content, parser=parser).text
+
+@app.route("/super-vuln-handler")
+def super_vuln_handler():
+ xml_content = request.args['xml_content']
+ parser = lxml.etree.XMLParser(
+ # allows XXE
+ resolve_entities=True,
+ # allows remote XXE
+ no_network=False,
+ # together with `no_network=False`, allows DTD-retrival
+ load_dtd=True,
+ # allows DoS attacks
+ huge_tree=True,
+ )
+ return lxml.etree.fromstring(xml_content, parser=parser).text
From 33ebcdf43715f10d1deafc493e9fd568f7b78bea Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 21:26:06 +0100
Subject: [PATCH 63/79] Python: Support feed method of lxml/xml.etree Parsers
---
.../semmle/python/frameworks/Xml.qll | 50 +++++++++++++++++++
.../frameworks/XML/lxml_etree.py | 6 +++
.../library-tests/frameworks/XML/xml_etree.py | 6 +++
3 files changed, 62 insertions(+)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index b0e7592c3936..caf5a3b434ab 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -79,6 +79,28 @@ private module Xml {
}
}
+ /**
+ * A call to the `feed` method of an `xml.etree` parser.
+ */
+ private class XMLEtreeParserFeedCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
+ XMLEtreeParserFeedCall() {
+ this =
+ API::moduleImport("xml")
+ .getMember("etree")
+ .getMember("ElementTree")
+ .getMember("XMLParser")
+ .getReturn()
+ .getMember("feed")
+ .getACall()
+ }
+
+ override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
+
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ kind.isBillionLaughs() or kind.isQuadraticBlowup()
+ }
+ }
+
/**
* A call to the `setFeature` method on a XML sax parser.
*
@@ -322,6 +344,7 @@ private module Xml {
}
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ // TODO: This should be done with type-tracking
exists(XML::XMLParser xmlParser |
xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
)
@@ -330,6 +353,33 @@ private module Xml {
}
}
+ /**
+ * A call to the `feed` method of an `lxml.etree` parser.
+ */
+ private class LXMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
+ LXMLEtreeParserFeedCall() {
+ exists(API::Node parserInstance |
+ parserInstance =
+ API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getReturn()
+ or
+ parserInstance =
+ API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getReturn()
+ |
+ this = parserInstance.getMember("feed").getACall()
+ )
+ }
+
+ override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
+
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ // TODO: This should be done with type-tracking
+ exists(XML::XMLParser xmlParser |
+ xmlParser = this.getObject().getALocalSource() and
+ xmlParser.vulnerable(kind)
+ )
+ }
+ }
+
/**
* Gets a call to `xmltodict.parse`.
*
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
index 3e6e6fb08e7f..db8b667e70a5 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
@@ -26,6 +26,12 @@
parser = lxml.etree.get_default_parser()
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
+# manual use of feed method
+parser = lxml.etree.XMLParser()
+parser.feed(x) # $ input=x vuln='XXE'
+parser.feed(data=x) # $ input=x vuln='XXE'
+parser.close()
+
# XXE-safe
parser = lxml.etree.XMLParser(resolve_entities=False)
lxml.etree.fromstring(x, parser=parser) # $ input=x
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
index 23ac3784cbc3..3220d95c624c 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
@@ -27,6 +27,12 @@
parser = xml.etree.ElementTree.XMLParser()
xml.etree.ElementTree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+# manual use of feed method
+parser = xml.etree.ElementTree.XMLParser()
+parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+parser.close()
+
# note: it's technically possible to use the thing wrapper func `fromstring` with an
# `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it
# seems very unlikely that anyone would do this, so we have intentionally not added any
From 46238d5ea049e5b51f99f4b66366957852a649c8 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 21:27:52 +0100
Subject: [PATCH 64/79] Python: Add test for XMLPullParser
But handling this in a nice way will require some restructuring
---
.../experimental/library-tests/frameworks/XML/xml_etree.py | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
index 3220d95c624c..ee452c11853d 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
@@ -33,6 +33,12 @@
parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser.close()
+# manual use of feed method on XMLPullParser
+parser = xml.etree.ElementTree.XMLPullParser()
+parser.feed(x) # $ MISSING: input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+parser.feed(data=x) # $ MISSING: input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+parser.close()
+
# note: it's technically possible to use the thing wrapper func `fromstring` with an
# `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it
# seems very unlikely that anyone would do this, so we have intentionally not added any
From de0e67f327de078af5c6574445e82f7574f52984 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 21:31:15 +0100
Subject: [PATCH 65/79] Python: Restructure overall XML modeling
---
.../semmle/python/frameworks/Xml.qll | 82 ++++++++++---------
1 file changed, 44 insertions(+), 38 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index caf5a3b434ab..55f45df99ca7 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -8,7 +8,7 @@ private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
-private module Xml {
+private module XmlEtree {
/**
* Gets a call to `xml.etree.ElementTree.XMLParser`.
*/
@@ -100,7 +100,9 @@ private module Xml {
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
}
+}
+private module SaxBasedParsing {
/**
* A call to the `setFeature` method on a XML sax parser.
*
@@ -251,6 +253,45 @@ private module Xml {
}
}
+ /**
+ * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
+ *
+ * Both of these modules are based on SAX parsers.
+ */
+ private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
+ XMLDomParsing() {
+ this =
+ API::moduleImport("xml")
+ .getMember("dom")
+ .getMember(["minidom", "pulldom"])
+ .getMember(["parse", "parseString"])
+ .getACall()
+ }
+
+ override DataFlow::Node getAnInput() {
+ result in [
+ this.getArg(0),
+ // parseString
+ this.getArgByName("string"),
+ // minidom.parse
+ this.getArgByName("file"),
+ // pulldom.parse
+ this.getArgByName("stream_or_string"),
+ ]
+ }
+
+ DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
+
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and
+ (kind.isXxe() or kind.isDtdRetrieval())
+ or
+ (kind.isBillionLaughs() or kind.isQuadraticBlowup())
+ }
+ }
+}
+
+private module Lxml {
/**
* A call to `lxml.etree.get_default_parser`.
*
@@ -379,7 +420,9 @@ private module Xml {
)
}
}
+}
+private module Xmltodict {
/**
* Gets a call to `xmltodict.parse`.
*
@@ -405,41 +448,4 @@ private module Xml {
this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f)
}
}
-
- /**
- * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
- *
- * Both of these modules are based on SAX parsers.
- */
- private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
- XMLDomParsing() {
- this =
- API::moduleImport("xml")
- .getMember("dom")
- .getMember(["minidom", "pulldom"])
- .getMember(["parse", "parseString"])
- .getACall()
- }
-
- override DataFlow::Node getAnInput() {
- result in [
- this.getArg(0),
- // parseString
- this.getArgByName("string"),
- // minidom.parse
- this.getArgByName("file"),
- // pulldom.parse
- this.getArgByName("stream_or_string"),
- ]
- }
-
- DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
-
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
- this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and
- (kind.isXxe() or kind.isDtdRetrieval())
- or
- (kind.isBillionLaughs() or kind.isQuadraticBlowup())
- }
- }
}
From a033b71eaf16dce055696ca7f1485c7f079ad2ed Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 21:34:46 +0100
Subject: [PATCH 66/79] Python: Align QLdocs of XML modeling
---
.../semmle/python/frameworks/Xml.qll | 72 +++++--------------
1 file changed, 18 insertions(+), 54 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 55f45df99ca7..6c3b86f426e3 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -10,7 +10,7 @@ private import semmle.python.ApiGraphs
private module XmlEtree {
/**
- * Gets a call to `xml.etree.ElementTree.XMLParser`.
+ * A call to `xml.etree.ElementTree.XMLParser`.
*/
private class XMLEtreeParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
XMLEtreeParser() {
@@ -30,22 +30,13 @@ private module XmlEtree {
}
/**
- * Gets a call to:
- * * `xml.etree.ElementTree.fromstring`
- * * `xml.etree.ElementTree.fromstringlist`
- * * `xml.etree.ElementTree.XML`
- * * `xml.etree.ElementTree.parse`
- *
- * Given the following example:
- *
- * ```py
- * parser = lxml.etree.XMLParser()
- * xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
- * ```
- *
- * * `this` would be `xml.etree.ElementTree.fromstring(xml_content, parser=parser)`.
- * * `getAnInput()`'s result would be `xml_content`.
- * * `vulnerable(kind)`'s `kind` would be `XXE`.
+ * A call to either of:
+ * - `xml.etree.ElementTree.fromstring`
+ * - `xml.etree.ElementTree.fromstringlist`
+ * - `xml.etree.ElementTree.XML`
+ * - `xml.etree.ElementTree.XMLID`
+ * - `xml.etree.ElementTree.parse`
+ * - `xml.etree.ElementTree.iterparse`
*/
private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLEtreeParsing() {
@@ -186,16 +177,7 @@ private module SaxBasedParsing {
}
/**
- * A XML parsing call with a sax parser.
- *
- * ```py
- * BadHandler = MainHandler()
- * parser = xml.sax.make_parser()
- * parser.setContentHandler(BadHandler)
- * parser.setFeature(xml.sax.handler.feature_external_ges, False)
- * parser.parse(StringIO(xml_content))
- * parsed_xml = BadHandler._result
- * ```
+ * A call to the `parse` method on a SAX XML parser.
*/
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLSaxInstanceParsing() {
@@ -346,22 +328,14 @@ private module Lxml {
}
/**
- * Gets a call to:
- * * `lxml.etree.fromstring`
- * * `xml.etree.fromstringlist`
- * * `xml.etree.XML`
- * * `xml.etree.parse`
- *
- * Given the following example:
+ * A call to either of:
+ * - `lxml.etree.fromstring`
+ * - `lxml.etree.fromstringlist`
+ * - `lxml.etree.XML`
+ * - `lxml.etree.parse`
+ * - `lxml.etree.parseid`
*
- * ```py
- * parser = lxml.etree.XMLParser()
- * lxml.etree.fromstring(xml_content, parser=parser).text
- * ```
- *
- * * `this` would be `lxml.etree.fromstring(xml_content, parser=parser)`.
- * * `getAnInput()`'s result would be `xml_content`.
- * * `vulnerable(kind)`'s `kind` would be `XXE`.
+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
*/
private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
LXMLParsing() {
@@ -395,7 +369,7 @@ private module Lxml {
}
/**
- * A call to the `feed` method of an `lxml.etree` parser.
+ * A call to the `feed` method of an `lxml` parser.
*/
private class LXMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
LXMLEtreeParserFeedCall() {
@@ -424,17 +398,7 @@ private module Lxml {
private module Xmltodict {
/**
- * Gets a call to `xmltodict.parse`.
- *
- * Given the following example:
- *
- * ```py
- * xmltodict.parse(xml_content, disable_entities=False)
- * ```
- *
- * * `this` would be `xmltodict.parse(xml_content, disable_entities=False)`.
- * * `getAnInput()`'s result would be `xml_content`.
- * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
+ * A call to `xmltodict.parse`.
*/
private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() }
From c0a2c25f5a712967ea5d067907e7c5be7b71a144 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 21:38:52 +0100
Subject: [PATCH 67/79] Python: Restructure modeling of `xml.etree` parsers
---
.../semmle/python/frameworks/Xml.qll | 96 +++++++++++--------
.../library-tests/frameworks/XML/xml_etree.py | 4 +-
2 files changed, 59 insertions(+), 41 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 6c3b86f426e3..e6a52fc19be6 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -10,22 +10,65 @@ private import semmle.python.ApiGraphs
private module XmlEtree {
/**
- * A call to `xml.etree.ElementTree.XMLParser`.
+ * Provides models for `xml.etree` parsers
+ *
+ * See
+ * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser
+ * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser
*/
- private class XMLEtreeParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
- XMLEtreeParser() {
- this =
- API::moduleImport("xml")
- .getMember("etree")
- .getMember("ElementTree")
- .getMember("XMLParser")
- .getACall()
+ module XMLParser {
+ /**
+ * A source of instances of `xml.etree` parsers, extend this class to model new instances.
+ *
+ * This can include instantiations of the class, return values from function
+ * calls, or a special parameter that will be set when functions are called by an external
+ * library.
+ *
+ * Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers.
+ */
+ abstract class InstanceSource extends DataFlow::LocalSourceNode { }
+
+ /** A direct instantiation of `xml.etree` parsers. */
+ private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
+ ClassInstantiation() {
+ this =
+ API::moduleImport("xml")
+ .getMember("etree")
+ .getMember("ElementTree")
+ .getMember("XMLParser")
+ .getACall()
+ or
+ this =
+ API::moduleImport("xml")
+ .getMember("etree")
+ .getMember("ElementTree")
+ .getMember("XMLPullParser")
+ .getACall()
+ }
}
- override DataFlow::Node getAnInput() { none() }
+ /** Gets a reference to an `xml.etree` parser instance. */
+ private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
+ t.start() and
+ result instanceof InstanceSource
+ or
+ exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
+ }
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
- kind.isBillionLaughs() or kind.isQuadraticBlowup()
+ /** Gets a reference to an `xml.etree` parser instance. */
+ DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
+
+ /**
+ * A call to the `feed` method of an `xml.etree` parser.
+ */
+ private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
+ XMLEtreeParserFeedCall() { this.calls(instance(), "feed") }
+
+ override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
+
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ kind.isBillionLaughs() or kind.isQuadraticBlowup()
+ }
}
}
@@ -61,33 +104,8 @@ private module XmlEtree {
}
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
- not exists(this.getArgByName("parser")) and
- (kind.isBillionLaughs() or kind.isQuadraticBlowup())
- or
- exists(XML::XMLParser xmlParser |
- xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
- )
- }
- }
-
- /**
- * A call to the `feed` method of an `xml.etree` parser.
- */
- private class XMLEtreeParserFeedCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
- XMLEtreeParserFeedCall() {
- this =
- API::moduleImport("xml")
- .getMember("etree")
- .getMember("ElementTree")
- .getMember("XMLParser")
- .getReturn()
- .getMember("feed")
- .getACall()
- }
-
- override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
-
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ // note: it does not matter what `xml.etree` parser you are using, you cannot
+ // change the security features anyway :|
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
}
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
index ee452c11853d..df126e458e2d 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py
@@ -35,8 +35,8 @@
# manual use of feed method on XMLPullParser
parser = xml.etree.ElementTree.XMLPullParser()
-parser.feed(x) # $ MISSING: input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
-parser.feed(data=x) # $ MISSING: input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser.close()
# note: it's technically possible to use the thing wrapper func `fromstring` with an
From c0a6f9f3fdcd7d3b52d4da7fb5657ad839686322 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 22:00:55 +0100
Subject: [PATCH 68/79] Python: Restructure lxml modeling
and handle parser being passed as positional argument
---
.../semmle/python/frameworks/Xml.qll | 164 ++++++++++--------
.../frameworks/XML/lxml_etree.py | 1 +
2 files changed, 94 insertions(+), 71 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index e6a52fc19be6..4af068cad317 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -293,55 +293,104 @@ private module SaxBasedParsing {
private module Lxml {
/**
- * A call to `lxml.etree.get_default_parser`.
+ * Provides models for `lxml.etree` parsers
*
- * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
- private class LXMLDefaultParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
- LXMLDefaultParser() {
- this = API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
+ module XMLParser {
+ /**
+ * A source of instances of `lxml.etree` parsers, extend this class to model new instances.
+ *
+ * This can include instantiations of the class, return values from function
+ * calls, or a special parameter that will be set when functions are called by an external
+ * library.
+ *
+ * Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers.
+ */
+ abstract class InstanceSource extends DataFlow::LocalSourceNode {
+ /** Holds if this instance is vulnerable to `kind`. */
+ abstract predicate vulnerable(XML::XMLVulnerabilityKind kind);
}
- override DataFlow::Node getAnInput() { none() }
+ /**
+ * A call to `lxml.etree.XMLParser`.
+ *
+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
+ */
+ private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode {
+ LXMLParser() {
+ this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
+ }
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
- // as highlighted by
- // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
- // by default XXE is allow. so as long as the default parser has not been
- // overridden, the result is also vuln to XXE.
- kind.isXxe()
- // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`.
+ // NOTE: it's not possible to change settings of a parser after constructing it
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ kind.isXxe() and
+ (
+ // resolve_entities has default True
+ not exists(this.getArgByName("resolve_entities"))
+ or
+ this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t)
+ )
+ or
+ (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
+ this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t)
+ or
+ kind.isDtdRetrieval() and
+ this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
+ this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
+ }
}
- }
- /**
- * A call to `lxml.etree.XMLParser`.
- *
- * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
- */
- private class LXMLParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
- LXMLParser() {
- this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
- }
+ /**
+ * A call to `lxml.etree.get_default_parser`.
+ *
+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
+ */
+ private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
+ LXMLDefaultParser() {
+ this =
+ API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
+ }
- override DataFlow::Node getAnInput() { none() }
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ // as highlighted by
+ // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
+ // by default XXE is allow. so as long as the default parser has not been
+ // overridden, the result is also vuln to XXE.
+ kind.isXxe()
+ // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`.
+ }
+ }
- // NOTE: it's not possible to change settings of a parser after constructing it
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
- kind.isXxe() and
- (
- // resolve_entities has default True
- not exists(this.getArgByName("resolve_entities"))
- or
- this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t)
- )
- or
- (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
- this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t)
+ /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
+ private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) {
+ t.start() and
+ result = origin
or
- kind.isDtdRetrieval() and
- this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
- this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
+ exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t))
+ }
+
+ /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
+ DataFlow::Node instance(InstanceSource origin) {
+ instance(DataFlow::TypeTracker::end(), origin).flowsTo(result)
+ }
+
+ /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
+ DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) {
+ exists(InstanceSource origin | result = instance(origin) and origin.vulnerable(kind))
+ }
+
+ /**
+ * A call to the `feed` method of an `lxml` parser.
+ */
+ private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
+ LXMLParserFeedCall() { this.calls(instance(_), "feed") }
+
+ override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
+
+ override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ this.calls(instanceVulnerableTo(kind), "feed")
+ }
}
}
@@ -376,40 +425,13 @@ private module Lxml {
]
}
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
- // TODO: This should be done with type-tracking
- exists(XML::XMLParser xmlParser |
- xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
- )
- or
- kind.isXxe() and not exists(this.getArgByName("parser"))
- }
- }
-
- /**
- * A call to the `feed` method of an `lxml` parser.
- */
- private class LXMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
- LXMLEtreeParserFeedCall() {
- exists(API::Node parserInstance |
- parserInstance =
- API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getReturn()
- or
- parserInstance =
- API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getReturn()
- |
- this = parserInstance.getMember("feed").getACall()
- )
- }
-
- override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
+ DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
- // TODO: This should be done with type-tracking
- exists(XML::XMLParser xmlParser |
- xmlParser = this.getObject().getALocalSource() and
- xmlParser.vulnerable(kind)
- )
+ this.getParserArg() = XMLParser::instanceVulnerableTo(kind)
+ or
+ kind.isXxe() and
+ not exists(this.getParserArg())
}
}
}
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
index db8b667e70a5..47ade6431221 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
@@ -34,6 +34,7 @@
# XXE-safe
parser = lxml.etree.XMLParser(resolve_entities=False)
+lxml.etree.fromstring(x, parser) # $ input=x
lxml.etree.fromstring(x, parser=parser) # $ input=x
# XXE-vuln
From df8e0fce68c1ea11bacaf789caebfbd7e5391376 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 22:02:48 +0100
Subject: [PATCH 69/79] Python: Minor fixup of qldoc
---
.../experimental/semmle/python/frameworks/Xml.qll | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 4af068cad317..a3f79967b67c 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -153,7 +153,11 @@ private module SaxBasedParsing {
result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
}
- /** Gets a reference to a XML sax parser that has `feature_external_ges` turned on */
+ /**
+ * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.class
+ *
+ * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
+ */
private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) {
t.start() and
exists(SaxParserSetFeatureCall call |
@@ -189,7 +193,11 @@ private module SaxBasedParsing {
)
}
- /** Gets a reference to a XML sax parser that has been made unsafe for `kind`. */
+ /**
+ * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.class
+ *
+ * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
+ */
DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() {
result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end())
}
From 837daaae3b5f2fba2e405f8cf7900a9c51999769 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 22:04:48 +0100
Subject: [PATCH 70/79] Python: Remove XMLParser concept
---
.../experimental/semmle/python/Concepts.qll | 39 -------------------
.../XmlEntityInjectionCustomizations.qll | 13 -------
2 files changed, 52 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll
index 22616c0a5d2b..29ce05501ca9 100644
--- a/python/ql/src/experimental/semmle/python/Concepts.qll
+++ b/python/ql/src/experimental/semmle/python/Concepts.qll
@@ -76,45 +76,6 @@ module XML {
abstract predicate vulnerable(XMLVulnerabilityKind kind);
}
}
-
- /**
- * A data-flow node that constructs an XML parser.
- *
- * Extend this class to model new APIs. If you want to refine existing API models,
- * extend `XMLParser` instead.
- */
- class XMLParser extends DataFlow::Node instanceof XMLParser::Range {
- /**
- * Gets the argument containing the content to parse.
- */
- DataFlow::Node getAnInput() { result = super.getAnInput() }
-
- /**
- * Holds if the parser is vulnerable to `kind`.
- */
- predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) }
- }
-
- /** Provides classes for modeling XML parsers. */
- module XMLParser {
- /**
- * A data-flow node that constructs an XML parser.
- *
- * Extend this class to model new APIs. If you want to refine existing API models,
- * extend `XMLParser` instead.
- */
- abstract class Range extends DataFlow::Node {
- /**
- * Gets the argument containing the content to parse.
- */
- abstract DataFlow::Node getAnInput();
-
- /**
- * Holds if the parser is vulnerable to `kind`.
- */
- abstract predicate vulnerable(XMLVulnerabilityKind kind);
- }
- }
}
/** Provides classes for modeling LDAP query execution-related APIs. */
diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
index 7de0c0c4b9c2..44c5da0bcea1 100644
--- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
+++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
@@ -61,19 +61,6 @@ module XmlEntityInjection {
override string getVulnerableKind() { xmlParsing.vulnerable(result) }
}
- /**
- * An input to an XML parser, considered as a flow sink.
- *
- * See `XML::XMLParser`
- */
- class XMLParserInputAsSink extends Sink {
- XML::XMLParser xmlParser;
-
- XMLParserInputAsSink() { this = xmlParser.getAnInput() }
-
- override string getVulnerableKind() { xmlParser.vulnerable(result) }
- }
-
/**
* A source of remote user input, considered as a flow source.
*/
From 0d69dc854c47df55eddb11d500b8cbe9b04f1d75 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 22:06:26 +0100
Subject: [PATCH 71/79] Python: Minor qldoc improvement
---
python/ql/src/experimental/semmle/python/Concepts.qll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll
index 29ce05501ca9..e8837e233ebc 100644
--- a/python/ql/src/experimental/semmle/python/Concepts.qll
+++ b/python/ql/src/experimental/semmle/python/Concepts.qll
@@ -51,7 +51,7 @@ module XML {
DataFlow::Node getAnInput() { result = super.getAnInput() }
/**
- * Holds if the parsing method or the parser holding it is vulnerable to `kind`.
+ * Holds if this XML parsing is vulnerable to `kind`.
*/
predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) }
}
@@ -71,7 +71,7 @@ module XML {
abstract DataFlow::Node getAnInput();
/**
- * Holds if the parsing method or the parser holding it is vulnerable to `kind`.
+ * Holds if this XML parsing is vulnerable to `kind`.
*/
abstract predicate vulnerable(XMLVulnerabilityKind kind);
}
From 3f6c55e8aeb3c930d730bb719b778811ffa6dbf1 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 3 Mar 2022 22:09:31 +0100
Subject: [PATCH 72/79] Python: Rename `vulnerable` predicate => `vulnerableTo`
---
.../experimental/semmle/python/Concepts.qll | 4 ++--
.../semmle/python/frameworks/Xml.qll | 24 +++++++++----------
.../XmlEntityInjectionCustomizations.qll | 2 +-
.../XML/ExperimentalXmlConceptsTests.ql | 2 +-
4 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll
index e8837e233ebc..7ebe90969221 100644
--- a/python/ql/src/experimental/semmle/python/Concepts.qll
+++ b/python/ql/src/experimental/semmle/python/Concepts.qll
@@ -53,7 +53,7 @@ module XML {
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
- predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) }
+ predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) }
}
/** Provides classes for modeling XML parsing APIs. */
@@ -73,7 +73,7 @@ module XML {
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
- abstract predicate vulnerable(XMLVulnerabilityKind kind);
+ abstract predicate vulnerableTo(XMLVulnerabilityKind kind);
}
}
}
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index a3f79967b67c..1d34e017f031 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -66,7 +66,7 @@ private module XmlEtree {
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
}
@@ -103,7 +103,7 @@ private module XmlEtree {
]
}
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// note: it does not matter what `xml.etree` parser you are using, you cannot
// change the security features anyway :|
kind.isBillionLaughs() or kind.isQuadraticBlowup()
@@ -218,7 +218,7 @@ private module SaxBasedParsing {
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// always vuln to these
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
or
@@ -251,7 +251,7 @@ private module SaxBasedParsing {
]
}
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// always vuln to these
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
or
@@ -290,7 +290,7 @@ private module SaxBasedParsing {
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
or
@@ -317,7 +317,7 @@ private module Lxml {
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode {
/** Holds if this instance is vulnerable to `kind`. */
- abstract predicate vulnerable(XML::XMLVulnerabilityKind kind);
+ abstract predicate vulnerableTo(XML::XMLVulnerabilityKind kind);
}
/**
@@ -331,7 +331,7 @@ private module Lxml {
}
// NOTE: it's not possible to change settings of a parser after constructing it
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
kind.isXxe() and
(
// resolve_entities has default True
@@ -360,7 +360,7 @@ private module Lxml {
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
}
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// as highlighted by
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
// by default XXE is allow. so as long as the default parser has not been
@@ -385,7 +385,7 @@ private module Lxml {
/** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) {
- exists(InstanceSource origin | result = instance(origin) and origin.vulnerable(kind))
+ exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind))
}
/**
@@ -396,7 +396,7 @@ private module Lxml {
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.calls(instanceVulnerableTo(kind), "feed")
}
}
@@ -435,7 +435,7 @@ private module Lxml {
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.getParserArg() = XMLParser::instanceVulnerableTo(kind)
or
kind.isXxe() and
@@ -455,7 +455,7 @@ private module Xmltodict {
result in [this.getArg(0), this.getArgByName("xml_input")]
}
- override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
+ override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f)
}
diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
index 44c5da0bcea1..745658bbce7b 100644
--- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
+++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
@@ -58,7 +58,7 @@ module XmlEntityInjection {
XMLParsingInputAsSink() { this = xmlParsing.getAnInput() }
- override string getVulnerableKind() { xmlParsing.vulnerable(result) }
+ override string getVulnerableKind() { xmlParsing.vulnerableTo(result) }
}
/**
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql
index 8ca33765d64f..81bc391d0e55 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql
@@ -22,7 +22,7 @@ class XmlParsingTest extends InlineExpectationsTest {
)
or
exists(XML::XMLVulnerabilityKind kind |
- parsing.vulnerable(kind) and
+ parsing.vulnerableTo(kind) and
location = parsing.getLocation() and
element = parsing.toString() and
value = "'" + kind + "'" and
From 683c2fa8254ebd56ec04a8e0fadb7bdb129c29e7 Mon Sep 17 00:00:00 2001
From: Jorge <46056498+jorgectf@users.noreply.github.com>
Date: Fri, 4 Mar 2022 01:02:56 +0100
Subject: [PATCH 73/79] Apply suggestions from code review
---
python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 1d34e017f031..86c17374e086 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -154,7 +154,7 @@ private module SaxBasedParsing {
}
/**
- * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.class
+ * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
@@ -194,7 +194,7 @@ private module SaxBasedParsing {
}
/**
- * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.class
+ * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
@@ -301,7 +301,7 @@ private module SaxBasedParsing {
private module Lxml {
/**
- * Provides models for `lxml.etree` parsers
+ * Provides models for `lxml.etree` parsers.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
From 3cd165d5b757be7651f6f9ade20bca773b27e582 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 4 Mar 2022 10:15:50 +0100
Subject: [PATCH 74/79] Python: Apply suggestions from code review
Co-authored-by: Jorge <46056498+jorgectf@users.noreply.github.com>
---
.../src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql | 6 ++----
python/ql/src/experimental/semmle/python/Concepts.qll | 2 +-
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql
index 0e3deebf6016..4177daf29c10 100644
--- a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql
+++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql
@@ -1,17 +1,15 @@
/**
* @name SimpleXMLRPCServer DoS vulnerability
* @description SimpleXMLRPCServer is vulnerable to DoS attacks from untrusted user input
- * @kind path-problem
+ * @kind problem
* @problem.severity warning
* @precision high
- * @id py/simple-xml-rpc-server
+ * @id py/simple-xml-rpc-server-dos
* @tags security
* external/cwe/cwe-776
*/
private import python
-private import semmle.python.dataflow.new.DataFlow
-private import semmle.python.Concepts
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll
index 7ebe90969221..491267d057f2 100644
--- a/python/ql/src/experimental/semmle/python/Concepts.qll
+++ b/python/ql/src/experimental/semmle/python/Concepts.qll
@@ -22,7 +22,7 @@ module XML {
*/
class XMLVulnerabilityKind extends string {
XMLVulnerabilityKind() {
- this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval",]
+ this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"]
}
/** Holds for Billion Laughs vulnerability kind. */
From d6cbfec43435204bb5e7350d26d9c636073b6652 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 4 Mar 2022 09:46:49 +0100
Subject: [PATCH 75/79] Python: huge_tree tests were wrong
Nice spotted @jorgectf!
---
.../frameworks/XML/lxml_etree.py | 4 ++--
.../library-tests/frameworks/XML/poc/PoC.py | 20 ++++++++++++++-----
2 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
index 47ade6431221..7c62ed1ac6ae 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
@@ -45,9 +45,9 @@
parser = lxml.etree.XMLParser(huge_tree=True)
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' vuln='XXE'
-# Billion laughs, but not XXE
+# Safe for both Billion laughs and XXE
parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
-lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
+lxml.etree.fromstring(x, parser=parser) # $ input=x SPURIOUS: vuln='Billion Laughs' vuln='Quadratic Blowup'
# DTD retrival vuln (also XXE)
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py
index b38ff9889e9d..adcace1aa0a6 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py
@@ -318,11 +318,21 @@ def test_billion_laughs_manually_enabled():
@expects_timeout
def test_quadratic_blowup_manually_enabled():
parser = lxml.etree.XMLParser(huge_tree=True)
- try:
- _root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
- assert False
- except lxml.etree.XMLSyntaxError as e:
- assert "Detected an entity reference loop" in str(e)
+ root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
+
+ @staticmethod
+ def test_billion_laughs_huge_tree_not_enough():
+ parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False)
+ root = lxml.etree.fromstring(billion_laughs, parser=parser)
+ assert root.tag == "lolz"
+ assert root.text == None
+
+ @staticmethod
+ def test_quadratic_blowup_huge_tree_not_enough():
+ parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False)
+ root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
+ assert root.tag == "foo"
+ assert root.text == None
@staticmethod
def test_ok_xml():
From f0131afc5449459f1562862c557ed537b0ab3a4c Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 4 Mar 2022 09:49:00 +0100
Subject: [PATCH 76/79] Python: Fix `huge_tree` modeling
---
python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 3 ++-
.../experimental/library-tests/frameworks/XML/lxml_etree.py | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 86c17374e086..533b97436512 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -341,7 +341,8 @@ private module Lxml {
)
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
- this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t)
+ this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and
+ not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t)
or
kind.isDtdRetrieval() and
this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
index 7c62ed1ac6ae..22930a58af37 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py
@@ -47,7 +47,7 @@
# Safe for both Billion laughs and XXE
parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
-lxml.etree.fromstring(x, parser=parser) # $ input=x SPURIOUS: vuln='Billion Laughs' vuln='Quadratic Blowup'
+lxml.etree.fromstring(x, parser=parser) # $ input=x
# DTD retrival vuln (also XXE)
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
From 1a9620a87a4aa9ae406774681431bb2e3274cc88 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 4 Mar 2022 10:01:02 +0100
Subject: [PATCH 77/79] Python: Add conditional assignment check for sax parser
---
.../library-tests/frameworks/XML/xml_sax.py | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py
index 89bbec3f1f57..158e62ffae6b 100644
--- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py
+++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py
@@ -45,3 +45,20 @@ def func(cond):
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
+
+def check_conditional_assignment(cond):
+ parser = xml.sax.make_parser()
+ if cond:
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ else:
+ parser.setFeature(xml.sax.handler.feature_external_ges, False)
+ parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
+
+def check_conditional_assignment2(cond):
+ parser = xml.sax.make_parser()
+ if cond:
+ flag_value = True
+ else:
+ flag_value = False
+ parser.setFeature(xml.sax.handler.feature_external_ges, flag_value)
+ parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
From ef045a6789cd4c7cbe04fba0e15b40461ba9ea75 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 4 Mar 2022 10:18:30 +0100
Subject: [PATCH 78/79] Python: Fix typo in set_default_parser
---
python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 533b97436512..18ba6c5a572c 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -367,7 +367,7 @@ private module Lxml {
// by default XXE is allow. so as long as the default parser has not been
// overridden, the result is also vuln to XXE.
kind.isXxe()
- // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`.
+ // TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`.
}
}
From 0e9da4aadb420f1b327403f991e7891bb962bfb6 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 8 Mar 2022 11:25:10 +0100
Subject: [PATCH 79/79] Python: Resolve name conflict over `XML` module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Not the prettiest solution... but it works ¯\_(ツ)_/¯
---
.../src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql | 2 +-
python/ql/src/experimental/semmle/python/Concepts.qll | 6 +++++-
python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 2 ++
.../security/dataflow/XmlEntityInjectionCustomizations.qll | 2 +-
4 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql
index 4177daf29c10..cda0633690c5 100644
--- a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql
+++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql
@@ -17,7 +17,7 @@ from DataFlow::CallCfgNode call, string kinds
where
call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and
kinds =
- strictconcat(XML::XMLVulnerabilityKind kind |
+ strictconcat(ExperimentalXML::XMLVulnerabilityKind kind |
kind.isBillionLaughs() or kind.isQuadraticBlowup()
|
kind, ", "
diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll
index 491267d057f2..ce5617071845 100644
--- a/python/ql/src/experimental/semmle/python/Concepts.qll
+++ b/python/ql/src/experimental/semmle/python/Concepts.qll
@@ -14,7 +14,11 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
-module XML {
+/**
+ * Since there is both XML module in normal and experimental Concepts,
+ * we have to rename the experimental module as this.
+ */
+module ExperimentalXML {
/**
* A kind of XML vulnerability.
*
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
index 18ba6c5a572c..a2f36f66f2e3 100644
--- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -8,6 +8,8 @@ private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
+module XML = ExperimentalXML;
+
private module XmlEtree {
/**
* Provides models for `xml.etree` parsers
diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
index 745658bbce7b..e420c738a978 100644
--- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
+++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll
@@ -54,7 +54,7 @@ module XmlEntityInjection {
* See `XML::XMLParsing`.
*/
class XMLParsingInputAsSink extends Sink {
- XML::XMLParsing xmlParsing;
+ ExperimentalXML::XMLParsing xmlParsing;
XMLParsingInputAsSink() { this = xmlParsing.getAnInput() }