Blob Blame History Raw
From b0e29fbdf31bb94b11cb8a7cc830b4a76467afa3 Mon Sep 17 00:00:00 2001
From: William S Fulton <wsf@fultondesigns.co.uk>
Date: Mon, 4 Dec 2017 18:41:55 +0000
Subject: [PATCH] Add missing checks for failures in calls to
 PyUnicode_AsUTF8String.

Previously a seg fault could occur when passing invalid UTF8 strings (low
surrogates), eg passing u"\udcff" to the C layer (Python 3).
---
 CHANGES.current                               |  8 ++++++-
 Doc/Manual/Python.html                        | 22 ++++++++++++++++---
 Doc/Manual/Varargs.html                       |  5 ++++-
 Examples/python/multimap/example.i            | 12 +++++++++-
 .../python/unicode_strings_runme.py           | 10 +++++++++
 Examples/test-suite/python_varargs_typemap.i  |  5 ++++-
 Examples/test-suite/unicode_strings.i         |  2 ++
 Lib/python/pyerrors.swg                       | 11 ++++++----
 Lib/python/pyhead.swg                         | 16 ++++++++------
 Lib/python/pyinit.swg                         |  4 ++--
 Lib/python/pyrun.swg                          | 10 ++++++---
 Lib/python/pystrings.swg                      | 12 ++++++++--
 12 files changed, 92 insertions(+), 25 deletions(-)

#diff --git a/CHANGES.current b/CHANGES.current
#index 5cab80172..06b958f18 100644
#--- a/CHANGES.current
#+++ b/CHANGES.current
#@@ -6,8 +6,14 @@ the issue number to the end of the URL: https://github.com/swig/swig/issues/
# 
# Version 4.0.0 (in progress)
# ===========================
#+
#+2017-12-04: wsfulton
#+            [Python] Add missing checks for failures in calls to PyUnicode_AsUTF8String. Previously a 
#+            seg fault could occur when passing invalid UTF8 strings (low surrogates), eg passing
#+            u"\udcff" to the C layer (Python 3).
#+
# 2017-11-24: joequant
#-	    Fix github #1124 and return R_NilValue for null pointers
#+	    Fix #1124 and return R_NilValue for null pointers
# 
# 2017-11-29: wsfulton
#             [Java] director exception handling improvements.
#diff --git a/Doc/Manual/Python.html b/Doc/Manual/Python.html
#index 0c0023dea..27ce084bd 100644
#--- a/Doc/Manual/Python.html
#+++ b/Doc/Manual/Python.html
#@@ -6521,14 +6521,16 @@ string that cannot be completely decoded as UTF-8:
# <div class="code"><pre>
# %module example
# 
#-%include &lt;std_string.i&gt;
#-
# %inline %{
# 
#-const char* non_utf8_c_str(void) {
#+const char * non_utf8_c_str(void) {
#   return "h\xe9llo w\xc3\xb6rld";
# }
# 
#+void instring(const char *s) {
#+  ...
#+}
#+
# %}
# </pre></div>
# 
#@@ -6590,6 +6592,20 @@ For more details about the <tt>surrogateescape</tt> error handler, please see
# <a href="https://www.python.org/dev/peps/pep-0383/">PEP 383</a>.
# </p>
# 
#+<p>
#+When Python 3 strings are passed to the C/C++ layer, they are expected to be valid UTF8 Unicode strings too.
#+For example, when the <tt>instring</tt> method above is wrapped and called, any invalid UTF8 Unicode code strings
#+will result in a TypeError because the attempted conversion fails:
#+</p>
#+
#+<div class="targetlang"><pre>
#+&gt;&gt;&gt; example.instring('h\xe9llo')
#+&gt;&gt;&gt; example.instring('h\udce9llo')
#+Traceback (most recent call last):
#+  File "&lt;stdin&gt;", line 1, in &lt;module&gt;
#+TypeError: in method 'instring', argument 1 of type 'char const *'
#+</pre></div>
#+
# <p>
# In some cases, users may wish to instead handle all byte strings as bytes
# objects in Python 3. This can be accomplished by adding
#diff --git a/Doc/Manual/Varargs.html b/Doc/Manual/Varargs.html
#index eba816382..014a38cae 100644
#--- a/Doc/Manual/Varargs.html
#+++ b/Doc/Manual/Varargs.html
#@@ -529,8 +529,11 @@ like this:
#       SWIG_fail;
#     }
#     pystr = PyUnicode_AsUTF8String(pyobj);
#+    if (!pystr) {
#+      SWIG_fail;
#+    }
#     str = strdup(PyBytes_AsString(pystr));
#-    Py_XDECREF(pystr);
#+    Py_DECREF(pystr);
# %#else  
#     if (!PyString_Check(pyobj)) {
#       PyErr_SetString(PyExc_ValueError, "Expected a string");
diff --git a/Examples/python/multimap/example.i b/Examples/python/multimap/example.i
index 66c0f74c6..3ff5d52c0 100644
--- a/Examples/python/multimap/example.i
+++ b/Examples/python/multimap/example.i
@@ -39,7 +39,11 @@ extern int    gcd(int x, int y);
 %#if PY_VERSION_HEX >= 0x03000000
     {
       PyObject *utf8str = PyUnicode_AsUTF8String(s);
-      const char *cstr = PyBytes_AsString(utf8str);
+      const char *cstr;
+      if (!utf8str) {
+        SWIG_fail;
+      }
+      cstr = PyBytes_AsString(utf8str);
       $2[i] = strdup(cstr);
       Py_DECREF(utf8str);
     }
@@ -72,6 +76,9 @@ extern int gcdmain(int argc, char *argv[]);
     SWIG_fail;
   }
   utf8str = PyUnicode_AsUTF8String($input);
+  if (!utf8str) {
+    SWIG_fail;
+  }
   PyBytes_AsStringAndSize(utf8str, &cstr, &len);
   $1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
   $2 = (int)len;
@@ -105,6 +112,9 @@ extern int count(char *bytes, int len, char c);
   char *cstr;
   Py_ssize_t len;
   PyObject *utf8str = PyUnicode_AsUTF8String($input);
+  if (!utf8str) {
+    SWIG_fail;
+  }
   PyBytes_AsStringAndSize(utf8str, &cstr, &len);
   $1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
   $2 = (int)len;
diff --git a/Examples/test-suite/python/unicode_strings_runme.py b/Examples/test-suite/python/unicode_strings_runme.py
index fa9c51437..39e93b0fc 100644
--- a/Examples/test-suite/python/unicode_strings_runme.py
+++ b/Examples/test-suite/python/unicode_strings_runme.py
@@ -25,3 +25,13 @@ if sys.version_info[0:2] < (3, 0):
     check(unicode_strings.charstring(unicode("hello4")), "hello4")
     unicode_strings.charstring(u"hell\xb05")
     unicode_strings.charstring(u"hell\u00f66")
+
+low_surrogate_string = u"\udcff"
+try:
+    unicode_strings.instring(low_surrogate_string)
+    # Will succeed with Python 2
+except TypeError, e:
+    # Python 3 will fail the PyUnicode_AsUTF8String conversion resulting in a TypeError.
+    # The real error is actually:
+    # UnicodeEncodeError: 'utf-8' codec can't encode character '\udcff' in position 0: surrogates not allowed
+    pass
diff --git a/Examples/test-suite/python_varargs_typemap.i b/Examples/test-suite/python_varargs_typemap.i
index f05fb98eb..d809bf1fa 100644
--- a/Examples/test-suite/python_varargs_typemap.i
+++ b/Examples/test-suite/python_varargs_typemap.i
@@ -23,8 +23,11 @@
        SWIG_fail;
     }
     pystr = PyUnicode_AsUTF8String(pyobj);
+    if (!pystr) {
+      SWIG_fail;
+    }
     str = strdup(PyBytes_AsString(pystr));
-    Py_XDECREF(pystr);
+    Py_DECREF(pystr);
 %#else  
     if (!PyString_Check(pyobj)) {
        PyErr_SetString(PyExc_ValueError, "Expected a string");
diff --git a/Examples/test-suite/unicode_strings.i b/Examples/test-suite/unicode_strings.i
index 9be3748e6..e7266266e 100644
--- a/Examples/test-suite/unicode_strings.i
+++ b/Examples/test-suite/unicode_strings.i
@@ -20,4 +20,6 @@ char *charstring(char *s) {
   return s;
 }
 
+void instring(const char *s) {
+}
 %}
diff --git a/Lib/python/pyerrors.swg b/Lib/python/pyerrors.swg
index fe7313554..463afae15 100644
--- a/Lib/python/pyerrors.swg
+++ b/Lib/python/pyerrors.swg
@@ -53,14 +53,17 @@ SWIG_Python_AddErrorMsg(const char* mesg)
   PyObject *value = 0;
   PyObject *traceback = 0;
 
-  if (PyErr_Occurred()) PyErr_Fetch(&type, &value, &traceback);
+  if (PyErr_Occurred())
+    PyErr_Fetch(&type, &value, &traceback);
   if (value) {
-    char *tmp;
     PyObject *old_str = PyObject_Str(value);
+    const char *tmp = SWIG_Python_str_AsChar(old_str);
     PyErr_Clear();
     Py_XINCREF(type);
-
-    PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
+    if (tmp)
+      PyErr_Format(type, "%s %s", tmp, mesg);
+    else
+      PyErr_Format(type, "%s", mesg);
     SWIG_Python_str_DelForPy3(tmp);
     Py_DECREF(old_str);
     Py_DECREF(value);
diff --git a/Lib/python/pyhead.swg b/Lib/python/pyhead.swg
index 55eb95a6d..2fa8b5b4c 100644
--- a/Lib/python/pyhead.swg
+++ b/Lib/python/pyhead.swg
@@ -38,14 +38,16 @@ SWIGINTERN char*
 SWIG_Python_str_AsChar(PyObject *str)
 {
 #if PY_VERSION_HEX >= 0x03000000
-  char *cstr;
-  char *newstr;
-  Py_ssize_t len;
+  char *newstr = 0;
   str = PyUnicode_AsUTF8String(str);
-  PyBytes_AsStringAndSize(str, &cstr, &len);
-  newstr = (char *) malloc(len+1);
-  memcpy(newstr, cstr, len+1);
-  Py_XDECREF(str);
+  if (str) {
+    char *cstr;
+    Py_ssize_t len;
+    PyBytes_AsStringAndSize(str, &cstr, &len);
+    newstr = (char *) malloc(len+1);
+    memcpy(newstr, cstr, len+1);
+    Py_XDECREF(str);
+  }
   return newstr;
 #else
   return PyString_AsString(str);
diff --git a/Lib/python/pyinit.swg b/Lib/python/pyinit.swg
index fe45ac941..826f8411b 100644
--- a/Lib/python/pyinit.swg
+++ b/Lib/python/pyinit.swg
@@ -84,10 +84,10 @@ swig_varlink_str(swig_varlinkobject *v) {
 
 SWIGINTERN int
 swig_varlink_print(swig_varlinkobject *v, FILE *fp, int SWIGUNUSEDPARM(flags)) {
-  char *tmp;
   PyObject *str = swig_varlink_str(v);
+  const char *tmp = SWIG_Python_str_AsChar(str);
   fprintf(fp,"Swig global variables ");
-  fprintf(fp,"%s\n", tmp = SWIG_Python_str_AsChar(str));
+  fprintf(fp,"%s\n", tmp ? tmp : "Invalid global variable");
   SWIG_Python_str_DelForPy3(tmp);
   Py_DECREF(str);
   return 0;
diff --git a/Lib/python/pyrun.swg b/Lib/python/pyrun.swg
index efc476613..430d3af18 100644
--- a/Lib/python/pyrun.swg
+++ b/Lib/python/pyrun.swg
@@ -1672,14 +1672,16 @@ SWIG_Python_AddErrMesg(const char* mesg, int infront)
     PyObject *traceback = 0;
     PyErr_Fetch(&type, &value, &traceback);
     if (value) {
-      char *tmp;
       PyObject *old_str = PyObject_Str(value);
+      const char *tmp = SWIG_Python_str_AsChar(old_str);
+      if (!tmp)
+        tmp = "Invalid error message";
       Py_XINCREF(type);
       PyErr_Clear();
       if (infront) {
-	PyErr_Format(type, "%s %s", mesg, tmp = SWIG_Python_str_AsChar(old_str));
+	PyErr_Format(type, "%s %s", mesg, tmp);
       } else {
-	PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
+	PyErr_Format(type, "%s %s", tmp, mesg);
       }
       SWIG_Python_str_DelForPy3(tmp);
       Py_DECREF(old_str);
@@ -1805,6 +1807,8 @@ SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) {
       Py_INCREF(name);
     } else {
       encoded_name = PyUnicode_AsUTF8String(name);
+      if (!encoded_name)
+        return -1;
     }
     PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", tp->tp_name, PyString_AsString(encoded_name));
     Py_DECREF(encoded_name);
diff --git a/Lib/python/pystrings.swg b/Lib/python/pystrings.swg
index fd37855eb..301e0f3e1 100644
--- a/Lib/python/pystrings.swg
+++ b/Lib/python/pystrings.swg
@@ -16,6 +16,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
 %#endif
   {
     char *cstr; Py_ssize_t len;
+    int ret = SWIG_OK;
 %#if PY_VERSION_HEX>=0x03000000
 %#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
     if (!alloc && cptr) {
@@ -26,7 +27,10 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
         return SWIG_RuntimeError;
     }
     obj = PyUnicode_AsUTF8String(obj);
-    if(alloc) *alloc = SWIG_NEWOBJ;
+    if (!obj)
+      return SWIG_TypeError;
+    if (alloc)
+      *alloc = SWIG_NEWOBJ;
 %#endif
     PyBytes_AsStringAndSize(obj, &cstr, &len);
 %#else
@@ -64,6 +68,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
 %#endif
 %#else
 	*cptr = SWIG_Python_str_AsChar(obj);
+        if (!*cptr)
+          ret = SWIG_TypeError;
 %#endif
       }
     }
@@ -71,7 +77,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
 %#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
     Py_XDECREF(obj);
 %#endif
-    return SWIG_OK;
+    return ret;
   } else {
 %#if defined(SWIG_PYTHON_2_UNICODE)
 %#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
@@ -84,6 +90,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
         return SWIG_RuntimeError;
       }
       obj = PyUnicode_AsUTF8String(obj);
+      if (!obj)
+        return SWIG_TypeError;
       if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) {
         if (cptr) {
           if (alloc) *alloc = SWIG_NEWOBJ;
-- 
2.21.1