churchyard / rpms / python3

Forked from rpms/python3 5 years ago
Clone

Blame 00324-disallow-control-chars-in-http-urls.patch

d3803a6
From 7e200e0763f5b71c199aaf98bd5588f291585619 Mon Sep 17 00:00:00 2001
d3803a6
From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= <miro@hroncok.cz>
d3803a6
Date: Tue, 7 May 2019 17:28:47 +0200
d3803a6
Subject: [PATCH] bpo-30458: Disallow control chars in http URLs. (GH-12755)
d3803a6
 (GH-13154)
d3803a6
MIME-Version: 1.0
d3803a6
Content-Type: text/plain; charset=UTF-8
d3803a6
Content-Transfer-Encoding: 8bit
d3803a6
d3803a6
Disallow control chars in http URLs in urllib.urlopen.  This addresses a potential security problem for applications that do not sanity check their URLs where http request headers could be injected.
d3803a6
d3803a6
Disable https related urllib tests on a build without ssl (GH-13032)
d3803a6
These tests require an SSL enabled build. Skip these tests when python is built without SSL to fix test failures.
d3803a6
d3803a6
Use http.client.InvalidURL instead of ValueError as the new error case's exception. (GH-13044)
d3803a6
d3803a6
Backport Co-Authored-By: Miro HronĨok <miro@hroncok.cz>
d3803a6
---
d3803a6
 Lib/http/client.py                            | 15 ++++++
d3803a6
 Lib/test/test_urllib.py                       | 53 +++++++++++++++++++
d3803a6
 Lib/test/test_xmlrpc.py                       |  7 ++-
d3803a6
 .../2019-04-10-08-53-30.bpo-30458.51E-DA.rst  |  1 +
d3803a6
 4 files changed, 75 insertions(+), 1 deletion(-)
d3803a6
 create mode 100644 Misc/NEWS.d/next/Security/2019-04-10-08-53-30.bpo-30458.51E-DA.rst
d3803a6
d3803a6
diff --git a/Lib/http/client.py b/Lib/http/client.py
d3803a6
index 1de151c38e..2afd452fe3 100644
d3803a6
--- a/Lib/http/client.py
d3803a6
+++ b/Lib/http/client.py
d3803a6
@@ -140,6 +140,16 @@ _MAXHEADERS = 100
d3803a6
 _is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
d3803a6
 _is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
d3803a6
 
d3803a6
+# These characters are not allowed within HTTP URL paths.
d3803a6
+#  See https://tools.ietf.org/html/rfc3986#section-3.3 and the
d3803a6
+#  https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
d3803a6
+# Prevents CVE-2019-9740.  Includes control characters such as \r\n.
d3803a6
+# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
d3803a6
+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
d3803a6
+# Arguably only these _should_ allowed:
d3803a6
+#  _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
d3803a6
+# We are more lenient for assumed real world compatibility purposes.
d3803a6
+
d3803a6
 # We always set the Content-Length header for these methods because some
d3803a6
 # servers will otherwise respond with a 411
d3803a6
 _METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
d3803a6
@@ -1101,6 +1111,11 @@ class HTTPConnection:
d3803a6
         self._method = method
d3803a6
         if not url:
d3803a6
             url = '/'
d3803a6
+        # Prevent CVE-2019-9740.
d3803a6
+        match = _contains_disallowed_url_pchar_re.search(url)
d3803a6
+        if match:
d3803a6
+            raise InvalidURL(f"URL can't contain control characters. {url!r} "
d3803a6
+                             f"(found at least {match.group()!r})")
d3803a6
         request = '%s %s %s' % (method, url, self._http_vsn_str)
d3803a6
 
d3803a6
         # Non-ASCII characters should have been eliminated earlier
d3803a6
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
d3803a6
index 2ac73b58d8..7214492eca 100644
d3803a6
--- a/Lib/test/test_urllib.py
d3803a6
+++ b/Lib/test/test_urllib.py
d3803a6
@@ -329,6 +329,59 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
d3803a6
         finally:
d3803a6
             self.unfakehttp()
d3803a6
 
d3803a6
+    @unittest.skipUnless(ssl, "ssl module required")
d3803a6
+    def test_url_with_control_char_rejected(self):
d3803a6
+        for char_no in list(range(0, 0x21)) + [0x7f]:
d3803a6
+            char = chr(char_no)
d3803a6
+            schemeless_url = f"//localhost:7777/test{char}/"
d3803a6
+            self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
d3803a6
+            try:
d3803a6
+                # We explicitly test urllib.request.urlopen() instead of the top
d3803a6
+                # level 'def urlopen()' function defined in this... (quite ugly)
d3803a6
+                # test suite.  They use different url opening codepaths.  Plain
d3803a6
+                # urlopen uses FancyURLOpener which goes via a codepath that
d3803a6
+                # calls urllib.parse.quote() on the URL which makes all of the
d3803a6
+                # above attempts at injection within the url _path_ safe.
d3803a6
+                escaped_char_repr = repr(char).replace('\\', r'\\')
d3803a6
+                InvalidURL = http.client.InvalidURL
d3803a6
+                with self.assertRaisesRegex(
d3803a6
+                    InvalidURL, f"contain control.*{escaped_char_repr}"):
d3803a6
+                    urllib.request.urlopen(f"http:{schemeless_url}")
d3803a6
+                with self.assertRaisesRegex(
d3803a6
+                    InvalidURL, f"contain control.*{escaped_char_repr}"):
d3803a6
+                    urllib.request.urlopen(f"https:{schemeless_url}")
d3803a6
+                # This code path quotes the URL so there is no injection.
d3803a6
+                resp = urlopen(f"http:{schemeless_url}")
d3803a6
+                self.assertNotIn(char, resp.geturl())
d3803a6
+            finally:
d3803a6
+                self.unfakehttp()
d3803a6
+
d3803a6
+    @unittest.skipUnless(ssl, "ssl module required")
d3803a6
+    def test_url_with_newline_header_injection_rejected(self):
d3803a6
+        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
d3803a6
+        host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
d3803a6
+        schemeless_url = "//" + host + ":8080/test/?test=a"
d3803a6
+        try:
d3803a6
+            # We explicitly test urllib.request.urlopen() instead of the top
d3803a6
+            # level 'def urlopen()' function defined in this... (quite ugly)
d3803a6
+            # test suite.  They use different url opening codepaths.  Plain
d3803a6
+            # urlopen uses FancyURLOpener which goes via a codepath that
d3803a6
+            # calls urllib.parse.quote() on the URL which makes all of the
d3803a6
+            # above attempts at injection within the url _path_ safe.
d3803a6
+            InvalidURL = http.client.InvalidURL
d3803a6
+            with self.assertRaisesRegex(
d3803a6
+                InvalidURL, r"contain control.*\\r.*(found at least . .)"):
d3803a6
+                urllib.request.urlopen(f"http:{schemeless_url}")
d3803a6
+            with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
d3803a6
+                urllib.request.urlopen(f"https:{schemeless_url}")
d3803a6
+            # This code path quotes the URL so there is no injection.
d3803a6
+            resp = urlopen(f"http:{schemeless_url}")
d3803a6
+            self.assertNotIn(' ', resp.geturl())
d3803a6
+            self.assertNotIn('\r', resp.geturl())
d3803a6
+            self.assertNotIn('\n', resp.geturl())
d3803a6
+        finally:
d3803a6
+            self.unfakehttp()
d3803a6
+
d3803a6
     def test_read_0_9(self):
d3803a6
         # "0.9" response accepted (but not "simple responses" without
d3803a6
         # a status line)
d3803a6
diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py
d3803a6
index 32263f7f0b..0e002ec4ef 100644
d3803a6
--- a/Lib/test/test_xmlrpc.py
d3803a6
+++ b/Lib/test/test_xmlrpc.py
d3803a6
@@ -945,7 +945,12 @@ class SimpleServerTestCase(BaseServerTestCase):
d3803a6
     def test_partial_post(self):
d3803a6
         # Check that a partial POST doesn't make the server loop: issue #14001.
d3803a6
         conn = http.client.HTTPConnection(ADDR, PORT)
d3803a6
-        conn.request('POST', '/RPC2 HTTP/1.0\r\nContent-Length: 100\r\n\r\nbye')
d3803a6
+        conn.send('POST /RPC2 HTTP/1.0\r\n'
d3803a6
+                  'Content-Length: 100\r\n\r\n'
d3803a6
+                  'bye HTTP/1.1\r\n'
d3803a6
+                  f'Host: {ADDR}:{PORT}\r\n'
d3803a6
+                  'Accept-Encoding: identity\r\n'
d3803a6
+                  'Content-Length: 0\r\n\r\n'.encode('ascii'))
d3803a6
         conn.close()
d3803a6
 
d3803a6
     def test_context_manager(self):
d3803a6
diff --git a/Misc/NEWS.d/next/Security/2019-04-10-08-53-30.bpo-30458.51E-DA.rst b/Misc/NEWS.d/next/Security/2019-04-10-08-53-30.bpo-30458.51E-DA.rst
d3803a6
new file mode 100644
d3803a6
index 0000000000..ed8027fb4d
d3803a6
--- /dev/null
d3803a6
+++ b/Misc/NEWS.d/next/Security/2019-04-10-08-53-30.bpo-30458.51E-DA.rst
d3803a6
@@ -0,0 +1 @@
d3803a6
+Address CVE-2019-9740 by disallowing URL paths with embedded whitespace or control characters through into the underlying http client request.  Such potentially malicious header injection URLs now cause an http.client.InvalidURL exception to be raised.
d3803a6
-- 
d3803a6
2.21.0
d3803a6