Blob Blame History Raw
From 6fa9aa172ba262356502aec2a3047610251c6093 Mon Sep 17 00:00:00 2001
From: Tim Rühsen <tim.ruehsen@gmx.de>
Date: Fri, 16 Mar 2018 10:59:39 +0100
Subject: [PATCH] Return error (IDN2_ICOV_FAIL) on charset conversion errors

---
 lib/decode.c           | 36 +++++++++++++++++++++++++++---------
 lib/lookup.c           |  7 +++++--
 lib/register.c         |  8 ++++++--
 src/idn2.c             |  3 ++-
 tests/test-tounicode.c | 21 ++++++++++++---------
 5 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/lib/decode.c b/lib/decode.c
index f55d029..f15d39e 100644
--- a/lib/decode.c
+++ b/lib/decode.c
@@ -356,20 +356,34 @@ idn2_to_unicode_8zlz (const char * input, char ** output, int flags)
 {
   int rc;
   uint8_t *output_u8, *output_l8;
+  const char *encoding;
 
   rc = idn2_to_unicode_8z8z (input, (char **) &output_u8, flags);
   if (rc != IDN2_OK || !input)
     return rc;
 
-  output_l8 = (uint8_t*)u8_strconv_to_locale (output_u8);
-  free (output_u8);
+  encoding = locale_charset ();
+  output_l8 = (uint8_t*) u8_strconv_to_encoding (output_u8, encoding, iconveh_error);
 
-  if (output)
-    *output = (char *) output_l8;
+  if (!output_l8)
+    {
+      if (errno == ENOMEM)
+        rc = IDN2_MALLOC;
+      else
+        rc = IDN2_ENCODING_ERROR;
+
+      free(output_l8);
+    }
   else
-    free (output_l8);
+    {
+      if (output)
+        *output = (char *) output_l8;
+      rc = IDN2_OK;
+    }
 
-  return IDN2_OK;
+  free (output_u8);
+
+  return rc;
 }
 
 /**
@@ -389,7 +403,8 @@ idn2_to_unicode_8zlz (const char * input, char ** output, int flags)
  *   %IDN2_OK: The conversion was successful.
  *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
  *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
- *   %IDN2_ENCODING_ERROR: Character conversion failed.
+ *   %IDN2_ENCODING_ERROR: Output character conversion failed.
+ *   %IDN2_ICONV_FAIL: Input character conversion failed.
  *   %IDN2_MALLOC: Memory allocation failed.
  *
  * Since: 2.0.0
@@ -398,6 +413,7 @@ int
 idn2_to_unicode_lzlz (const char * input, char ** output, int flags)
 {
   uint8_t *input_l8;
+  const char *encoding;
   int rc;
 
   if (!input)
@@ -407,12 +423,14 @@ idn2_to_unicode_lzlz (const char * input, char ** output, int flags)
       return IDN2_OK;
     }
 
-  input_l8 = u8_strconv_from_locale (input);
+  encoding = locale_charset ();
+  input_l8 = u8_strconv_from_encoding (input, encoding, iconveh_error);
+
   if (!input_l8)
     {
       if (errno == ENOMEM)
 	return IDN2_MALLOC;
-      return IDN2_ENCODING_ERROR;
+      return IDN2_ICONV_FAIL;
     }
 
   rc = idn2_to_unicode_8zlz ((char*)input_l8, output, flags);
diff --git a/lib/lookup.c b/lib/lookup.c
index 9094aeb..e64182a 100644
--- a/lib/lookup.c
+++ b/lib/lookup.c
@@ -514,8 +514,11 @@ idn2_lookup_ul (const char * src, char ** lookupname, int flags)
 
   if (src)
     {
-      utf8src = u8_strconv_from_locale (src);
-      if (utf8src == NULL)
+      const char *encoding = locale_charset ();
+
+      utf8src = u8_strconv_from_encoding (src, encoding, iconveh_error);
+
+      if (!utf8src)
 	{
 	  if (errno == ENOMEM)
 	    return IDN2_MALLOC;
diff --git a/lib/register.c b/lib/register.c
index 910ff18..5407ea2 100644
--- a/lib/register.c
+++ b/lib/register.c
@@ -231,7 +231,8 @@ idn2_register_u8 (const uint8_t * ulabel, const uint8_t * alabel,
  *   %IDN2_UALABEL_MISMATCH is returned, when either of the input
  *   labels are too long %IDN2_TOO_BIG_LABEL is returned, when @alabel
  *   does does not appear to be a proper A-label %IDN2_INVALID_ALABEL
- *   is returned, or another error code is returned.
+ *   is returned, when @ulabel locale to UTF-8 conversion failed
+ *   %IDN2_ICONV_FAIL is returned, or another error code is returned.
  **/
 int
 idn2_register_ul (const char *ulabel, const char *alabel,
@@ -242,7 +243,10 @@ idn2_register_ul (const char *ulabel, const char *alabel,
 
   if (ulabel)
     {
-      utf8ulabel = u8_strconv_from_locale (ulabel);
+      const char *encoding = locale_charset ();
+
+      utf8ulabel = u8_strconv_from_encoding (ulabel, encoding, iconveh_error);
+
       if (utf8ulabel == NULL)
 	{
 	  if (errno == ENOMEM)
diff --git a/src/idn2.c b/src/idn2.c
index 4a13a82..906ae01 100644
--- a/src/idn2.c
+++ b/src/idn2.c
@@ -108,8 +108,9 @@ hexdump (const char *prefix, const char *str)
   uint32_t *u32;
   size_t u32len;
   size_t i;
+  const char *encoding = locale_charset ();
 
-  u8 = u8_strconv_from_locale (str);
+  u8 = u8_strconv_from_encoding (str, encoding, iconveh_error);
   if (u8)
     u32 = u8_to_u32 (u8, strlen ((char *) u8), NULL, &u32len);
 
diff --git a/tests/test-tounicode.c b/tests/test-tounicode.c
index a0ede86..9f77779 100644
--- a/tests/test-tounicode.c
+++ b/tests/test-tounicode.c
@@ -262,7 +262,7 @@ const test_t test[] = {
     {
       0
     },
-    IDN2_ENCODING_ERROR
+    IDN2_ENCODING_ERROR /* or IDN2_ICONV_FAIL with idn2_to_unicode_lzlz() due to bad UTF-8 input */
   },
 
   /* Test vectors from https://bugs.debian.org/610617 */
@@ -352,7 +352,7 @@ const test_t test[] = {
   },
 };
 
-static int debug = 0;
+static int debug = 1;
 static int error_count = 0;
 static int break_on_error = 0;
 
@@ -406,10 +406,11 @@ _u32_strcmp(const uint32_t *s1, const uint32_t *s2)
 static void
 _check_4z(const test_t *t, int rc, uint32_t *ucs4, const char *funcname)
 {
-  if (rc != t->rc_expected)
+  if (rc != t->rc_expected && !(rc == IDN2_ICONV_FAIL && t->rc_expected == IDN2_ENCODING_ERROR))
     {
-      fail ("%s() entry %u failed: %s\n",
-	funcname, (unsigned) (t - test), idn2_strerror (rc));
+      fprintf (stderr, "Test[%u] '%s' failed (got %d, expected %d):\n",
+        (unsigned) (t - test), t->name, rc, t->rc_expected);
+      fail ("  %s(): %s\n", funcname, idn2_strerror (rc));
     }
   else if (rc == IDN2_OK)
     {
@@ -443,6 +444,7 @@ main (void)
   uint32_t *ucs4, *punycode_u32;
   uint8_t *utf8;
   char *utf8_lz;
+  const char *encoding;
   size_t outlen, outlen2;
   int rc, skip_lz = 0;
   unsigned i;
@@ -451,11 +453,12 @@ main (void)
    * At least on Debian with libunistring 0.9.6+really0.9.3-0.1 and LC_ALL=C valgrind
    * reports Conditional jump or move depends on uninitialised value */
   setlocale (LC_ALL, "C.UTF-8");
+  encoding = locale_charset();
 
   if (debug)
-    printf("charset=%s\n", locale_charset());
+    printf("charset=%s\n", encoding);
 
-  if (strcmp(locale_charset(), "UTF-8") != 0)
+  if (strcmp(encoding, "UTF-8") != 0)
     skip_lz = 1;
 
   for (i = 0; i < sizeof (test) / sizeof (test[0]); i++)
@@ -503,7 +506,7 @@ main (void)
       rc = idn2_to_unicode_8zlz (t->punycode, &utf8_lz, 0);
       if (rc == IDN2_OK)
 	{
-	  utf8 = u8_strconv_from_locale (utf8_lz);
+          utf8 = u8_strconv_from_encoding (utf8_lz, encoding, iconveh_error);
 	  free (utf8_lz);
 	  ucs4 = u8_to_u32 (utf8, u8_strlen (utf8) + 1, NULL, &outlen);
 	  free (utf8);
@@ -517,7 +520,7 @@ main (void)
       rc = idn2_to_unicode_lzlz (t->punycode, (char **) &utf8_lz, 0);
       if (rc == IDN2_OK)
 	{
-	  utf8 = u8_strconv_from_locale (utf8_lz);
+          utf8 = u8_strconv_from_encoding (utf8_lz, encoding, iconveh_error);
 	  free (utf8_lz);
 	  ucs4 = u8_to_u32 (utf8, u8_strlen (utf8) + 1, NULL, &outlen);
 	  free (utf8);
--
libgit2 0.26.0

From dd11133faa276744fb2b11424778c62820bbd547 Mon Sep 17 00:00:00 2001
From: Tim Rühsen <tim.ruehsen@gmx.de>
Date: Fri, 16 Mar 2018 11:18:02 +0100
Subject: [PATCH] Fix memleak in idn2_to_unicode_8zlz()

---
 lib/decode.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/decode.c b/lib/decode.c
index f15d39e..db80d65 100644
--- a/lib/decode.c
+++ b/lib/decode.c
@@ -378,6 +378,9 @@ idn2_to_unicode_8zlz (const char * input, char ** output, int flags)
     {
       if (output)
         *output = (char *) output_l8;
+      else
+        free (output_l8);
+
       rc = IDN2_OK;
     }
 
--
libgit2 0.26.0