#109 Fix handling of pre-normalization characters in urlsplit
Closed 5 months ago by churchyard. Opened 5 months ago by cstratak.
rpms/ cstratak/python3 master  into  master

@@ -0,0 +1,42 @@ 

+ diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py

+ index 0faf2bb..d0365ec 100644

+ --- a/Lib/test/test_urlparse.py

+ +++ b/Lib/test/test_urlparse.py

+ @@ -1011,6 +1011,12 @@ class UrlParseTestCase(unittest.TestCase):

+          self.assertIn('\u2100', denorm_chars)

+          self.assertIn('\uFF03', denorm_chars)

+  

+ +        # bpo-36742: Verify port separators are ignored when they

+ +        # existed prior to decomposition

+ +        urllib.parse.urlsplit('http://\u30d5\u309a:80')

+ +        with self.assertRaises(ValueError):

+ +            urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')

+ +

+          for scheme in ["http", "https", "ftp"]:

+              for c in denorm_chars:

+                  url = "{}://netloc{}false.netloc/path".format(scheme, c)

+ diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py

+ index 8b6c9b1..e2f7b69 100644

+ --- a/Lib/urllib/parse.py

+ +++ b/Lib/urllib/parse.py

+ @@ -402,13 +402,16 @@ def _checknetloc(netloc):

+      # looking for characters like \u2100 that expand to 'a/c'

+      # IDNA uses NFKC equivalence, so normalize for this check

+      import unicodedata

+ -    netloc2 = unicodedata.normalize('NFKC', netloc)

+ -    if netloc == netloc2:

+ +    n = netloc.rpartition('@')[2] # ignore anything to the left of '@'

+ +    n = n.replace(':', '')        # ignore characters already included

+ +    n = n.replace('#', '')        # but not the surrounding text

+ +    n = n.replace('?', '')

+ +    netloc2 = unicodedata.normalize('NFKC', n)

+ +    if n == netloc2:

+          return

+ -    _, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay

+      for c in '/?#@:':

+          if c in netloc2:

+ -            raise ValueError("netloc '" + netloc2 + "' contains invalid " +

+ +            raise ValueError("netloc '" + netloc + "' contains invalid " +

+                               "characters under NFKC normalization")

+  

+  def urlsplit(url, scheme='', allow_fragments=True):

file modified
+11 -1

@@ -17,7 +17,7 @@ 

  #global prerel ...

  %global upstream_version %{general_version}%{?prerel}

  Version: %{general_version}%{?prerel:~%{prerel}}

- Release: 2%{?dist}

+ Release: 3%{?dist}

  License: Python

  

  

@@ -282,6 +282,12 @@ 

  # So we mark the command as unsupported - and the tests are skipped

  Patch316: 00316-mark-bdist_wininst-unsupported.patch

  

+ # 00320 #

+ # Fix handling of pre-normalization characters in urlsplit()

+ # This fixes a regression introduced by the fix for CVE-2019-9636

+ # Fixed upstream: https://bugs.python.org/issue36742

+ Patch320: 00320-fix-pre-normalization-chars-in-urlsplit.patch

+ 

  # (New patches go here ^^^)

  #

  # When adding new patches to "python" and "python3" in Fedora, EL, etc.,

@@ -575,6 +581,7 @@ 

  %patch251 -p1

  %patch274 -p1

  %patch316 -p1

+ %patch320 -p1

  

  

  # Remove files that should be generated by the build

@@ -1494,6 +1501,9 @@ 

  # ======================================================

  

  %changelog

+ * Tue May 07 2019 Charalampos Stratakis <cstratak@redhat.com> - 3.7.3-3

+ - Fix handling of pre-normalization characters in urlsplit

+ 

  * Wed Apr 17 2019 Patrik Kopkan <pkopkan@redhat.com> - 3.7.3-2

  - Makes man python3.7m show python3.7 man pages (#1612241)

  

no initial comment

Note: Patch number 320 is allocated to the actual CVE fix. I don't mind having it used here as well, but is it on purpose?

Yes it's on purpose, essentially this is a fixup for the CVE, hence in interpreters where the CVE is not fixed yet I add the CVE fix + this patch in one.

Metadata Update from @churchyard:
- Pull-request tagged with: merge - rebase - CI

5 months ago

It seems that the CI failed as while running findleaks the process was killed.

That is a known and tracked thing. If everything else passed, you should be good.

rebased onto 1b92cc7

5 months ago

Pull-Request has been closed by churchyard

5 months ago