Blob Blame History Raw
From c0619bd0c5eeb3d2f8af8b37575e11847664272c Mon Sep 17 00:00:00 2001
From: Vincent Untz <vuntz@suse.com>
Date: Thu, 21 Jun 2012 14:37:41 +0200
Subject: [PATCH] Do not use pickle for serialization in memcache, but JSON

We don't want to use pickle as it can execute arbitrary code. JSON is
safer. However, note that it supports serialization for only some
specific subset of object types; this should be enough for what we need,
though.

To avoid issues on upgrades (unability to read pickled values, and cache
poisoning for old servers not understanding JSON), we add a
memcache_serialization_support configuration option, with the following
values:

 0 = older, insecure pickle serialization (compatible, default in this release)
 1 = json serialization but pickles can still be read (still insecure)
 2 = json serialization only (secure, suggested, and the future default)

To avoid an instant full cache flush, existing installations should
upgrade with 0, then set to 1 and reload, then after some time (24
hours) set to 2 and reload. Support for 0 and 1 will be removed in
future versions.

Part of bug 1006414.

Patch Set 2: Added Vincent Untz <vuntz@suse.com> to AUTHORS

Change-Id: Id7d6d547b103b4f23ebf5be98b88f09ec6027ce4
---
 doc/manpages/proxy-server.conf.5             |   15 ++++++++
 etc/memcache.conf-sample                     |   10 +++++
 etc/proxy-server.conf-sample                 |   12 ++++++
 swift/common/memcached.py                    |   48 +++++++++++++++++++++-----
 swift/common/middleware/memcache.py          |   30 ++++++++++++----
 test/unit/common/middleware/test_memcache.py |    5 ++-
 test/unit/common/test_memcached.py           |   22 ++++++++++++
 7 files changed, 125 insertions(+), 17 deletions(-)

diff --git a/doc/manpages/proxy-server.conf.5 b/doc/manpages/proxy-server.conf.5
index 4979e4d..5cf5a7e 100644
--- a/doc/manpages/proxy-server.conf.5
+++ b/doc/manpages/proxy-server.conf.5
@@ -205,6 +205,21 @@ Enables the ability to log request headers. The default is False.
 .IP \fBmemcache_servers\fR
 The memcache servers that are available. This can be a list separated by commas. The default 
 is 127.0.0.1:11211.
+.IP \fBmemcache_serialization_support\fR
+This sets how memcache values are serialized and deserialized:
+.RE
+
+.PD 0
+.RS 10
+.IP "0 = older, insecure pickle serialization (default)"
+.IP "1 = json serialization but pickles can still be read (still insecure)"
+.IP "2 = json serialization only (secure)"
+.RE
+
+.RS 10
+To avoid an instant full cache flush, existing installations should upgrade with 0, then set to 1 and reload, then after some time (24 hours) set to 2 and reload. In the future, the ability to use pickle serialization will be removed.
+
+If not set in the configuration file, the value for memcache_serialization_support will be read from /etc/swift/memcache.conf if it exists (see memcache.conf-sample). Otherwise, the default value as indicated above will be used.
 .RE
 
 
diff --git a/etc/memcache.conf-sample b/etc/memcache.conf-sample
index 580d94a..cedfc19 100644
--- a/etc/memcache.conf-sample
+++ b/etc/memcache.conf-sample
@@ -3,3 +3,13 @@
 # several other conf files under [filter:cache] for example. You can specify
 # multiple servers separated with commas, as in: 10.1.2.3:11211,10.1.2.4:11211
 # memcache_servers = 127.0.0.1:11211
+#
+# Sets how memcache values are serialized and deserialized:
+# 0 = older, insecure pickle serialization (compatible, default in this release)
+# 1 = json serialization but pickles can still be read (still insecure)
+# 2 = json serialization only (secure, suggested, and the future default)
+# To avoid an instant full cache flush, existing installations should
+# upgrade with 0, then set to 1 and reload, then after some time (24 hours)
+# set to 2 and reload.
+# In the future, the ability to use pickle serialization will be removed.
+# memcache_serialization_support = 0
diff --git a/etc/proxy-server.conf-sample b/etc/proxy-server.conf-sample
index 148616b..18f711a 100644
--- a/etc/proxy-server.conf-sample
+++ b/etc/proxy-server.conf-sample
@@ -122,6 +122,18 @@ use = egg:swift#memcache
 # default to the value below. You can specify multiple servers separated with
 # commas, as in: 10.1.2.3:11211,10.1.2.4:11211
 # memcache_servers = 127.0.0.1:11211
+#
+# Sets how memcache values are serialized and deserialized:
+# 0 = older, insecure pickle serialization (compatible, default in this release)
+# 1 = json serialization but pickles can still be read (still insecure)
+# 2 = json serialization only (secure, suggested, and the future default)
+# If not set here, the value for memcache_serialization_support will be read
+# from /etc/swift/memcache.conf (see memcache.conf-sample).
+# To avoid an instant full cache flush, existing installations should
+# upgrade with 0, then set to 1 and reload, then after some time (24 hours)
+# set to 2 and reload.
+# In the future, the ability to use pickle serialization will be removed.
+# memcache_serialization_support = 0
 
 [filter:ratelimit]
 use = egg:swift#ratelimit
diff --git a/swift/common/memcached.py b/swift/common/memcached.py
index ecd9332..82ebb7a 100644
--- a/swift/common/memcached.py
+++ b/swift/common/memcached.py
@@ -27,11 +27,17 @@ import time
 from bisect import bisect
 from hashlib import md5
 
+try:
+    import simplejson as json
+except ImportError:
+    import json
+
 DEFAULT_MEMCACHED_PORT = 11211
 
 CONN_TIMEOUT = 0.3
 IO_TIMEOUT = 2.0
 PICKLE_FLAG = 1
+JSON_FLAG = 2
 NODE_WEIGHT = 50
 PICKLE_PROTOCOL = 2
 TRY_COUNT = 3
@@ -57,7 +63,8 @@ class MemcacheRing(object):
     """
 
     def __init__(self, servers, connect_timeout=CONN_TIMEOUT,
-                 io_timeout=IO_TIMEOUT, tries=TRY_COUNT):
+                 io_timeout=IO_TIMEOUT, tries=TRY_COUNT,
+                 allow_pickle=False, allow_unpickle=False):
         self._ring = {}
         self._errors = dict(((serv, []) for serv in servers))
         self._error_limited = dict(((serv, 0) for serv in servers))
@@ -69,6 +76,8 @@ class MemcacheRing(object):
         self._client_cache = dict(((server, []) for server in servers))
         self._connect_timeout = connect_timeout
         self._io_timeout = io_timeout
+        self._allow_pickle = allow_pickle
+        self._allow_unpickle = allow_unpickle or allow_pickle
 
     def _exception_occurred(self, server, e, action='talking'):
         if isinstance(e, socket.timeout):
@@ -130,16 +139,21 @@ class MemcacheRing(object):
 
         :param key: key
         :param value: value
-        :param serialize: if True, value is pickled before sending to memcache
+        :param serialize: if True, value is serialized with JSON before sending
+                          to memcache, or with pickle if configured to use
+                          pickle instead of JSON (to avoid cache poisoning)
         :param timeout: ttl in memcache
         """
         key = md5hash(key)
         if timeout > 0:
             timeout += time.time()
         flags = 0
-        if serialize:
+        if serialize and self._allow_pickle:
             value = pickle.dumps(value, PICKLE_PROTOCOL)
             flags |= PICKLE_FLAG
+        elif serialize:
+            value = json.dumps(value)
+            flags |= JSON_FLAG
         for (server, fp, sock) in self._get_conns(key):
             try:
                 sock.sendall('set %s %d %d %s noreply\r\n%s\r\n' % \
@@ -151,8 +165,9 @@ class MemcacheRing(object):
 
     def get(self, key):
         """
-        Gets the object specified by key.  It will also unpickle the object
-        before returning if it is pickled in memcache.
+        Gets the object specified by key.  It will also unserialize the object
+        before returning if it is serialized in memcache with JSON, or if it
+        is pickled and unpickling is allowed.
 
         :param key: key
         :returns: value of the key in memcache
@@ -168,7 +183,12 @@ class MemcacheRing(object):
                         size = int(line[3])
                         value = fp.read(size)
                         if int(line[2]) & PICKLE_FLAG:
-                            value = pickle.loads(value)
+                            if self._allow_unpickle:
+                                value = pickle.loads(value)
+                            else:
+                                value = None
+                        elif int(line[2]) & JSON_FLAG:
+                            value = json.loads(value)
                         fp.readline()
                     line = fp.readline().strip().split()
                 self._return_conn(server, fp, sock)
@@ -258,7 +278,9 @@ class MemcacheRing(object):
         :param mapping: dictonary of keys and values to be set in memcache
         :param servery_key: key to use in determining which server in the ring
                             is used
-        :param serialize: if True, value is pickled before sending to memcache
+        :param serialize: if True, value is serialized with JSON before sending
+                          to memcache, or with pickle if configured to use
+                          pickle instead of JSON (to avoid cache poisoning)
         :param timeout: ttl for memcache
         """
         server_key = md5hash(server_key)
@@ -268,9 +290,12 @@ class MemcacheRing(object):
         for key, value in mapping.iteritems():
             key = md5hash(key)
             flags = 0
-            if serialize:
+            if serialize and self._allow_pickle:
                 value = pickle.dumps(value, PICKLE_PROTOCOL)
                 flags |= PICKLE_FLAG
+            elif serialize:
+                value = json.dumps(value)
+                flags |= JSON_FLAG
             msg += ('set %s %d %d %s noreply\r\n%s\r\n' %
                     (key, flags, timeout, len(value), value))
         for (server, fp, sock) in self._get_conns(server_key):
@@ -302,7 +327,12 @@ class MemcacheRing(object):
                         size = int(line[3])
                         value = fp.read(size)
                         if int(line[2]) & PICKLE_FLAG:
-                            value = pickle.loads(value)
+                            if self._allow_unpickle:
+                                value = pickle.loads(value)
+                            else:
+                                value = None
+                        elif int(line[2]) & JSON_FLAG:
+                            value = json.loads(value)
                         responses[line[1]] = value
                         fp.readline()
                     line = fp.readline().strip().split()
diff --git a/swift/common/middleware/memcache.py b/swift/common/middleware/memcache.py
index eb988bd..20121c9 100644
--- a/swift/common/middleware/memcache.py
+++ b/swift/common/middleware/memcache.py
@@ -27,20 +27,36 @@ class MemcacheMiddleware(object):
     def __init__(self, app, conf):
         self.app = app
         self.memcache_servers = conf.get('memcache_servers')
-        if not self.memcache_servers:
+        serialization_format = conf.get('memcache_serialization_support')
+
+        if not self.memcache_servers or serialization_format is None:
             path = os.path.join(conf.get('swift_dir', '/etc/swift'),
                                 'memcache.conf')
             memcache_conf = ConfigParser()
             if memcache_conf.read(path):
-                try:
-                    self.memcache_servers = \
-                        memcache_conf.get('memcache', 'memcache_servers')
-                except (NoSectionError, NoOptionError):
-                    pass
+                if not self.memcache_servers:
+                    try:
+                        self.memcache_servers = \
+                            memcache_conf.get('memcache', 'memcache_servers')
+                    except (NoSectionError, NoOptionError):
+                        pass
+                if serialization_format is None:
+                    try:
+                        serialization_format = \
+                            memcache_conf.get('memcache',
+                                              'memcache_serialization_support')
+                    except (NoSectionError, NoOptionError):
+                        pass
+
         if not self.memcache_servers:
             self.memcache_servers = '127.0.0.1:11211'
+        if serialization_format is None:
+            serialization_format = 0
+
         self.memcache = MemcacheRing(
-            [s.strip() for s in self.memcache_servers.split(',') if s.strip()])
+            [s.strip() for s in self.memcache_servers.split(',') if s.strip()],
+            allow_pickle=(serialization_format == 0),
+            allow_unpickle=(serialization_format <= 1))
 
     def __call__(self, env, start_response):
         env['swift.cache'] = self.memcache
diff --git a/test/unit/common/middleware/test_memcache.py b/test/unit/common/middleware/test_memcache.py
index 6b94bd1..e217a96 100644
--- a/test/unit/common/middleware/test_memcache.py
+++ b/test/unit/common/middleware/test_memcache.py
@@ -47,6 +47,8 @@ class SetConfigParser(object):
         if section == 'memcache':
             if option == 'memcache_servers':
                 return '1.2.3.4:5'
+            elif option == 'memcache_serialization_support':
+                return '2'
             else:
                 raise NoOptionError(option)
         else:
@@ -86,7 +88,8 @@ class TestCacheMiddleware(unittest.TestCase):
         exc = None
         try:
             app = memcache.MemcacheMiddleware(
-                    FakeApp(), {'memcache_servers': '1.2.3.4:5'})
+                    FakeApp(), {'memcache_servers': '1.2.3.4:5',
+                                'memcache_serialization_support': '2'})
         except Exception, err:
             exc = err
         finally:
diff --git a/test/unit/common/test_memcached.py b/test/unit/common/test_memcached.py
index dff6e80..3016d10 100644
--- a/test/unit/common/test_memcached.py
+++ b/test/unit/common/test_memcached.py
@@ -1,3 +1,4 @@
+ # -*- coding: utf8 -*-
 # Copyright (c) 2010-2012 OpenStack, LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -166,6 +167,9 @@ class TestMemcached(unittest.TestCase):
         self.assertEquals(memcache_client.get('some_key'), [1, 2, 3])
         memcache_client.set('some_key', [4, 5, 6])
         self.assertEquals(memcache_client.get('some_key'), [4, 5, 6])
+        memcache_client.set('some_key', ['simple str', 'utf8 str éà'])
+        # As per http://wiki.openstack.org/encoding, we should expect to have unicode
+        self.assertEquals(memcache_client.get('some_key'), ['simple str', u'utf8 str éà'])
         self.assert_(float(mock.cache.values()[0][1]) == 0)
         esttimeout = time.time() + 10
         memcache_client.set('some_key', [1, 2, 3], timeout=10)
@@ -244,6 +248,24 @@ class TestMemcached(unittest.TestCase):
         self.assertEquals(memcache_client.get_multi(('some_key2', 'some_key1',
             'not_exists'), 'multi_key'), [[4, 5, 6], [1, 2, 3], None])
 
+    def test_serialization(self):
+        memcache_client = memcached.MemcacheRing(['1.2.3.4:11211'],
+                                                 allow_pickle=True)
+        mock = MockMemcached()
+        memcache_client._client_cache['1.2.3.4:11211'] = [(mock, mock)] * 2
+        memcache_client.set('some_key', [1, 2, 3])
+        self.assertEquals(memcache_client.get('some_key'), [1, 2, 3])
+        memcache_client._allow_pickle = False
+        memcache_client._allow_unpickle = True
+        self.assertEquals(memcache_client.get('some_key'), [1, 2, 3])
+        memcache_client._allow_unpickle = False
+        self.assertEquals(memcache_client.get('some_key'), None)
+        memcache_client.set('some_key', [1, 2, 3])
+        self.assertEquals(memcache_client.get('some_key'), [1, 2, 3])
+        memcache_client._allow_unpickle = True
+        self.assertEquals(memcache_client.get('some_key'), [1, 2, 3])
+        memcache_client._allow_pickle = True
+        self.assertEquals(memcache_client.get('some_key'), [1, 2, 3])
 
 if __name__ == '__main__':
     unittest.main()