a52f674
From bbc51fd471fd92a40458e4ba667799b9dfaa67f0 Mon Sep 17 00:00:00 2001
ca73a27
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
ca73a27
Date: Fri, 17 Jan 2014 21:28:41 -0500
ca73a27
Subject: [PATCH] core: do not print invalid utf-8 in error messages
ca73a27
a52f674
(cherry picked from commit 550a40eceb7d1917152fc9317bf2696708d52bc2)
a52f674
ca73a27
Conflicts:
ca73a27
	TODO
ca73a27
---
ca73a27
 src/shared/fileio.c  | 11 +++++++----
ca73a27
 src/shared/utf8.c    | 26 ++++++++++++++++++++++++++
ca73a27
 src/shared/utf8.h    |  3 +++
ca73a27
 src/test/test-utf8.c | 17 +++++++++++++++++
ca73a27
 4 files changed, 53 insertions(+), 4 deletions(-)
ca73a27
ca73a27
diff --git a/src/shared/fileio.c b/src/shared/fileio.c
1cc3df3
index d28e38a621..121cd571aa 100644
ca73a27
--- a/src/shared/fileio.c
ca73a27
+++ b/src/shared/fileio.c
ca73a27
@@ -467,15 +467,18 @@ static int parse_env_file_push(const char *filename, unsigned line,
ca73a27
         va_list aq, *ap = userdata;
ca73a27
 
ca73a27
         if (!utf8_is_valid(key)) {
ca73a27
-                log_error("%s:%u: invalid UTF-8 for key '%s', ignoring.",
ca73a27
-                          filename, line, key);
ca73a27
+                _cleanup_free_ char *p = utf8_escape_invalid(key);
ca73a27
+
ca73a27
+                log_error("%s:%u: invalid UTF-8 in key '%s', ignoring.",
ca73a27
+                          filename, line, p);
ca73a27
                 return -EINVAL;
ca73a27
         }
ca73a27
 
ca73a27
         if (value && !utf8_is_valid(value)) {
ca73a27
-                /* FIXME: filter UTF-8 */
ca73a27
+                _cleanup_free_ char *p = utf8_escape_invalid(value);
ca73a27
+
ca73a27
                 log_error("%s:%u: invalid UTF-8 value for key %s: '%s', ignoring.",
ca73a27
-                          filename, line, key, value);
ca73a27
+                          filename, line, key, p);
ca73a27
                 return -EINVAL;
ca73a27
         }
ca73a27
 
ca73a27
diff --git a/src/shared/utf8.c b/src/shared/utf8.c
1cc3df3
index 31120af046..2b70d45e97 100644
ca73a27
--- a/src/shared/utf8.c
ca73a27
+++ b/src/shared/utf8.c
ca73a27
@@ -172,6 +172,32 @@ const char *utf8_is_valid(const char *str) {
ca73a27
         return str;
ca73a27
 }
ca73a27
 
ca73a27
+char *utf8_escape_invalid(const char *str) {
ca73a27
+        char *p, *s;
ca73a27
+
ca73a27
+        assert(str);
ca73a27
+
ca73a27
+        p = s = malloc(strlen(str) * 4 + 1);
ca73a27
+        if (!p)
ca73a27
+                return NULL;
ca73a27
+
ca73a27
+        while (*str) {
ca73a27
+                int len;
ca73a27
+
ca73a27
+                len = utf8_encoded_valid_unichar(str);
ca73a27
+                if (len > 0) {
ca73a27
+                        s = mempcpy(s, str, len);
ca73a27
+                        str += len;
ca73a27
+                } else {
ca73a27
+                        s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER));
ca73a27
+                        str += 1;
ca73a27
+                }
ca73a27
+        }
ca73a27
+        *s = '\0';
ca73a27
+
ca73a27
+        return p;
ca73a27
+}
ca73a27
+
ca73a27
 char *ascii_is_valid(const char *str) {
ca73a27
         const char *p;
ca73a27
 
ca73a27
diff --git a/src/shared/utf8.h b/src/shared/utf8.h
1cc3df3
index 96a03ea7cb..f93dfb8676 100644
ca73a27
--- a/src/shared/utf8.h
ca73a27
+++ b/src/shared/utf8.h
ca73a27
@@ -25,8 +25,11 @@
ca73a27
 
ca73a27
 #include "macro.h"
ca73a27
 
ca73a27
+#define UTF8_REPLACEMENT_CHARACTER "\xef\xbf\xbd"
ca73a27
+
ca73a27
 const char *utf8_is_valid(const char *s) _pure_;
ca73a27
 char *ascii_is_valid(const char *s) _pure_;
ca73a27
+char *utf8_escape_invalid(const char *s);
ca73a27
 
ca73a27
 bool utf8_is_printable(const char* str, size_t length) _pure_;
ca73a27
 
ca73a27
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
1cc3df3
index f0182ee9af..53c1d475b7 100644
ca73a27
--- a/src/test/test-utf8.c
ca73a27
+++ b/src/test/test-utf8.c
ca73a27
@@ -66,12 +66,29 @@ static void test_utf8_encoded_valid_unichar(void) {
ca73a27
 
ca73a27
 }
ca73a27
 
ca73a27
+static void test_utf8_escaping(void) {
ca73a27
+        _cleanup_free_ char *p1, *p2, *p3;
ca73a27
+
ca73a27
+        p1 = utf8_escape_invalid("goo goo goo");
ca73a27
+        puts(p1);
ca73a27
+        assert_se(utf8_is_valid(p1));
ca73a27
+
ca73a27
+        p2 = utf8_escape_invalid("\341\204\341\204");
ca73a27
+        puts(p2);
ca73a27
+        assert_se(utf8_is_valid(p2));
ca73a27
+
ca73a27
+        p3 = utf8_escape_invalid("\341\204");
ca73a27
+        puts(p3);
ca73a27
+        assert_se(utf8_is_valid(p3));
ca73a27
+}
ca73a27
+
ca73a27
 int main(int argc, char *argv[]) {
ca73a27
         test_utf8_is_valid();
ca73a27
         test_utf8_is_printable();
ca73a27
         test_ascii_is_valid();
ca73a27
         test_ascii_filter();
ca73a27
         test_utf8_encoded_valid_unichar();
ca73a27
+        test_utf8_escaping();
ca73a27
 
ca73a27
         return 0;
ca73a27
 }