Blob Blame History Raw
From bbc51fd471fd92a40458e4ba667799b9dfaa67f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
Date: Fri, 17 Jan 2014 21:28:41 -0500
Subject: [PATCH] core: do not print invalid utf-8 in error messages

(cherry picked from commit 550a40eceb7d1917152fc9317bf2696708d52bc2)

Conflicts:
	TODO
---
 src/shared/fileio.c  | 11 +++++++----
 src/shared/utf8.c    | 26 ++++++++++++++++++++++++++
 src/shared/utf8.h    |  3 +++
 src/test/test-utf8.c | 17 +++++++++++++++++
 4 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/src/shared/fileio.c b/src/shared/fileio.c
index d28e38a..121cd57 100644
--- a/src/shared/fileio.c
+++ b/src/shared/fileio.c
@@ -467,15 +467,18 @@ static int parse_env_file_push(const char *filename, unsigned line,
         va_list aq, *ap = userdata;
 
         if (!utf8_is_valid(key)) {
-                log_error("%s:%u: invalid UTF-8 for key '%s', ignoring.",
-                          filename, line, key);
+                _cleanup_free_ char *p = utf8_escape_invalid(key);
+
+                log_error("%s:%u: invalid UTF-8 in key '%s', ignoring.",
+                          filename, line, p);
                 return -EINVAL;
         }
 
         if (value && !utf8_is_valid(value)) {
-                /* FIXME: filter UTF-8 */
+                _cleanup_free_ char *p = utf8_escape_invalid(value);
+
                 log_error("%s:%u: invalid UTF-8 value for key %s: '%s', ignoring.",
-                          filename, line, key, value);
+                          filename, line, key, p);
                 return -EINVAL;
         }
 
diff --git a/src/shared/utf8.c b/src/shared/utf8.c
index 31120af..2b70d45 100644
--- a/src/shared/utf8.c
+++ b/src/shared/utf8.c
@@ -172,6 +172,32 @@ const char *utf8_is_valid(const char *str) {
         return str;
 }
 
+char *utf8_escape_invalid(const char *str) {
+        char *p, *s;
+
+        assert(str);
+
+        p = s = malloc(strlen(str) * 4 + 1);
+        if (!p)
+                return NULL;
+
+        while (*str) {
+                int len;
+
+                len = utf8_encoded_valid_unichar(str);
+                if (len > 0) {
+                        s = mempcpy(s, str, len);
+                        str += len;
+                } else {
+                        s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER));
+                        str += 1;
+                }
+        }
+        *s = '\0';
+
+        return p;
+}
+
 char *ascii_is_valid(const char *str) {
         const char *p;
 
diff --git a/src/shared/utf8.h b/src/shared/utf8.h
index 96a03ea..f93dfb8 100644
--- a/src/shared/utf8.h
+++ b/src/shared/utf8.h
@@ -25,8 +25,11 @@
 
 #include "macro.h"
 
+#define UTF8_REPLACEMENT_CHARACTER "\xef\xbf\xbd"
+
 const char *utf8_is_valid(const char *s) _pure_;
 char *ascii_is_valid(const char *s) _pure_;
+char *utf8_escape_invalid(const char *s);
 
 bool utf8_is_printable(const char* str, size_t length) _pure_;
 
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
index f0182ee..53c1d47 100644
--- a/src/test/test-utf8.c
+++ b/src/test/test-utf8.c
@@ -66,12 +66,29 @@ static void test_utf8_encoded_valid_unichar(void) {
 
 }
 
+static void test_utf8_escaping(void) {
+        _cleanup_free_ char *p1, *p2, *p3;
+
+        p1 = utf8_escape_invalid("goo goo goo");
+        puts(p1);
+        assert_se(utf8_is_valid(p1));
+
+        p2 = utf8_escape_invalid("\341\204\341\204");
+        puts(p2);
+        assert_se(utf8_is_valid(p2));
+
+        p3 = utf8_escape_invalid("\341\204");
+        puts(p3);
+        assert_se(utf8_is_valid(p3));
+}
+
 int main(int argc, char *argv[]) {
         test_utf8_is_valid();
         test_utf8_is_printable();
         test_ascii_is_valid();
         test_ascii_filter();
         test_utf8_encoded_valid_unichar();
+        test_utf8_escaping();
 
         return 0;
 }