--- Python-3.4.0a4/Lib/hashlib.py.hashlib-fips 2013-11-07 13:29:43.046881440 +0100 +++ Python-3.4.0a4/Lib/hashlib.py 2013-11-07 13:42:04.438486289 +0100 @@ -23,6 +23,16 @@ Choose your hash function wisely. Some have known collision weaknesses. sha384 and sha512 will be slow on 32 bit platforms. +If the underlying implementation supports "FIPS mode", and this is enabled, it +may restrict the available hashes to only those that are compliant with FIPS +regulations. For example, it may deny the use of MD5, on the grounds that this +is not secure for uses such as authentication, system integrity checking, or +digital signatures. If you need to use such a hash for non-security purposes +(such as indexing into a data structure for speed), you can override the keyword +argument "usedforsecurity" from True to False to signify that your code is not +relying on the hash for security purposes, and this will allow the hash to be +usable even in FIPS mode. + Hash objects have these methods: - update(arg): Update the hash object with the bytes in arg. Repeated calls are equivalent to a single call with the concatenation of all @@ -64,6 +74,19 @@ 'algorithms_available', 'pbkdf2_hmac') +import functools +def __ignore_usedforsecurity(func): + """Used for sha3_* functions. Until OpenSSL implements them, we want + to use them from Python _sha3 module, but we want them to accept + usedforsecurity argument too.""" + # TODO: remove this function when OpenSSL implements sha3 + @functools.wraps(func) + def inner(*args, **kwargs): + if 'usedforsecurity' in kwargs: + kwargs.pop('usedforsecurity') + return func(*args, **kwargs) + return inner + def __get_builtin_constructor(name): try: if name in ('SHA1', 'sha1'): @@ -109,34 +132,41 @@ f = getattr(_hashlib, 'openssl_' + name) # Allow the C module to raise ValueError. The function will be # defined but the hash not actually available thanks to OpenSSL. - f() + # We pass "usedforsecurity=False" to disable FIPS-based restrictions: + # at this stage we're merely seeing if the function is callable, + # rather than using it for actual work. + f(usedforsecurity=False) # Use the C function directly (very fast) return f except (AttributeError, ValueError): + # TODO: We want to just raise here when OpenSSL implements sha3 + # because we want to make sure that Fedora uses everything from OpenSSL return __get_builtin_constructor(name) -def __py_new(name, data=b''): - """new(name, data=b'') - Return a new hashing object using the named algorithm; - optionally initialized with data (which must be bytes). +def __py_new(name, data=b'', usedforsecurity=True): + """new(name, data=b'', usedforsecurity=True) - Return a new hashing object using + the named algorithm; optionally initialized with data (which must be bytes). + The 'usedforsecurity' keyword argument does nothing, and is for compatibilty + with the OpenSSL implementation """ return __get_builtin_constructor(name)(data) -def __hash_new(name, data=b''): - """new(name, data=b'') - Return a new hashing object using the named algorithm; - optionally initialized with data (which must be bytes). +def __hash_new(name, data=b'', usedforsecurity=True): + """new(name, data=b'', usedforsecurity=True) - Return a new hashing object using + the named algorithm; optionally initialized with data (which must be bytes). + + Override 'usedforsecurity' to False when using for non-security purposes in + a FIPS environment """ try: - return _hashlib.new(name, data) + return _hashlib.new(name, data, usedforsecurity) except ValueError: - # If the _hashlib module (OpenSSL) doesn't support the named - # hash, try using our builtin implementations. - # This allows for SHA224/256 and SHA384/512 support even though - # the OpenSSL library prior to 0.9.8 doesn't provide them. + # TODO: We want to just raise here when OpenSSL implements sha3 + # because we want to make sure that Fedora uses everything from OpenSSL return __get_builtin_constructor(name)(data) - try: import _hashlib new = __hash_new @@ -216,7 +246,10 @@ # try them all, some may not work due to the OpenSSL # version not supporting that algorithm. try: - globals()[__func_name] = __get_hash(__func_name) + func = __get_hash(__func_name) + if 'sha3_' in __func_name: + func = __ignore_usedforsecurity(func) + globals()[__func_name] = func except ValueError: import logging logging.exception('code for hash %s was not found.', __func_name) @@ -224,3 +257,4 @@ # Cleanup locals() del __always_supported, __func_name, __get_hash del __py_new, __hash_new, __get_openssl_constructor +del __ignore_usedforsecurity --- Python-3.4.0a4/Lib/test/test_hashlib.py.hashlib-fips 2013-11-07 13:43:08.763454594 +0100 +++ Python-3.4.0a4/Lib/test/test_hashlib.py 2013-11-07 13:55:23.233038101 +0100 @@ -26,6 +26,20 @@ c_hashlib = import_fresh_module('hashlib', fresh=['_hashlib']) py_hashlib = import_fresh_module('hashlib', blocked=['_hashlib']) +def openssl_enforces_fips(): + # Use the "openssl" command (if present) to try to determine if the local + # OpenSSL is configured to enforce FIPS + from subprocess import Popen, PIPE + try: + p = Popen(['openssl', 'md5'], + stdin=PIPE, stdout=PIPE, stderr=PIPE) + except OSError: + # "openssl" command not found + return False + stdout, stderr = p.communicate(input=b'abc') + return b'unknown cipher' in stderr +OPENSSL_ENFORCES_FIPS = openssl_enforces_fips() + def hexstr(s): assert isinstance(s, bytes), repr(s) h = "0123456789abcdef" @@ -34,6 +48,16 @@ r += h[(i >> 4) & 0xF] + h[i & 0xF] return r +# hashlib and _hashlib-based functions support a "usedforsecurity" keyword +# argument, and FIPS mode requires that it be used overridden with a False +# value for these selftests to work. Other cryptographic code within Python +# doesn't support this keyword. +# Modify a function to one in which "usedforsecurity=False" is added to the +# keyword arguments: +def suppress_fips(f): + def g(*args, **kwargs): + return f(*args, usedforsecurity=False, **kwargs) + return g class HashLibTestCase(unittest.TestCase): supported_hash_names = ( 'md5', 'MD5', 'sha1', 'SHA1', @@ -66,11 +90,11 @@ # For each algorithm, test the direct constructor and the use # of hashlib.new given the algorithm name. for algorithm, constructors in self.constructors_to_test.items(): - constructors.add(getattr(hashlib, algorithm)) + constructors.add(suppress_fips(getattr(hashlib, algorithm))) def _test_algorithm_via_hashlib_new(data=None, _alg=algorithm): if data is None: - return hashlib.new(_alg) - return hashlib.new(_alg, data) + return suppress_fips(hashlib.new)(_alg) + return suppress_fips(hashlib.new)(_alg, data) constructors.add(_test_algorithm_via_hashlib_new) _hashlib = self._conditional_import_module('_hashlib') @@ -82,22 +106,9 @@ for algorithm, constructors in self.constructors_to_test.items(): constructor = getattr(_hashlib, 'openssl_'+algorithm, None) if constructor: - constructors.add(constructor) + constructors.add(suppress_fips(constructor)) - _md5 = self._conditional_import_module('_md5') - if _md5: - self.constructors_to_test['md5'].add(_md5.md5) - _sha1 = self._conditional_import_module('_sha1') - if _sha1: - self.constructors_to_test['sha1'].add(_sha1.sha1) - _sha256 = self._conditional_import_module('_sha256') - if _sha256: - self.constructors_to_test['sha224'].add(_sha256.sha224) - self.constructors_to_test['sha256'].add(_sha256.sha256) - _sha512 = self._conditional_import_module('_sha512') - if _sha512: - self.constructors_to_test['sha384'].add(_sha512.sha384) - self.constructors_to_test['sha512'].add(_sha512.sha512) + # TODO: remove this after sha3 is available through OpenSSL _sha3 = self._conditional_import_module('_sha3') if _sha3: self.constructors_to_test['sha3_224'].add(_sha3.sha3_224) @@ -547,6 +558,65 @@ self.assertEqual(expected_hash, hasher.hexdigest()) + def test_issue9146(self): + # Ensure that various ways to use "MD5" from "hashlib" don't segfault: + m = hashlib.md5(usedforsecurity=False) + m.update(b'abc\n') + self.assertEquals(m.hexdigest(), "0bee89b07a248e27c83fc3d5951213c1") + + m = hashlib.new('md5', usedforsecurity=False) + m.update(b'abc\n') + self.assertEquals(m.hexdigest(), "0bee89b07a248e27c83fc3d5951213c1") + + m = hashlib.md5(b'abc\n', usedforsecurity=False) + self.assertEquals(m.hexdigest(), "0bee89b07a248e27c83fc3d5951213c1") + + m = hashlib.new('md5', b'abc\n', usedforsecurity=False) + self.assertEquals(m.hexdigest(), "0bee89b07a248e27c83fc3d5951213c1") + + @unittest.skipUnless(OPENSSL_ENFORCES_FIPS, + 'FIPS enforcement required for this test.') + def test_hashlib_fips_mode(self): + # Ensure that we raise a ValueError on vanilla attempts to use MD5 + # in hashlib in a FIPS-enforced setting: + with self.assertRaisesRegexp(ValueError, '.*unknown cipher'): + m = hashlib.md5() + + if not self._conditional_import_module('_md5'): + with self.assertRaisesRegexp(ValueError, '.*unknown cipher'): + m = hashlib.new('md5') + + @unittest.skipUnless(OPENSSL_ENFORCES_FIPS, + 'FIPS enforcement required for this test.') + def test_hashopenssl_fips_mode(self): + # Verify the _hashlib module's handling of md5: + _hashlib = self._conditional_import_module('_hashlib') + if _hashlib: + assert hasattr(_hashlib, 'openssl_md5') + + # Ensure that _hashlib raises a ValueError on vanilla attempts to + # use MD5 in a FIPS-enforced setting: + with self.assertRaisesRegexp(ValueError, '.*unknown cipher'): + m = _hashlib.openssl_md5() + with self.assertRaisesRegexp(ValueError, '.*unknown cipher'): + m = _hashlib.new('md5') + + # Ensure that in such a setting we can whitelist a callsite with + # usedforsecurity=False and have it succeed: + m = _hashlib.openssl_md5(usedforsecurity=False) + m.update(b'abc\n') + self.assertEquals(m.hexdigest(), "0bee89b07a248e27c83fc3d5951213c1") + + m = _hashlib.new('md5', usedforsecurity=False) + m.update(b'abc\n') + self.assertEquals(m.hexdigest(), "0bee89b07a248e27c83fc3d5951213c1") + + m = _hashlib.openssl_md5(b'abc\n', usedforsecurity=False) + self.assertEquals(m.hexdigest(), "0bee89b07a248e27c83fc3d5951213c1") + + m = _hashlib.new('md5', b'abc\n', usedforsecurity=False) + self.assertEquals(m.hexdigest(), "0bee89b07a248e27c83fc3d5951213c1") + class KDFTests(unittest.TestCase): @@ -628,6 +698,7 @@ with self.assertRaisesRegex(ValueError, 'unsupported hash type'): pbkdf2('unknown', b'pass', b'salt', 1) + @unittest.skip('skipped on Fedora, as we always use OpenSSL pbkdf2_hmac') def test_pbkdf2_hmac_py(self): self._test_pbkdf2_hmac(py_hashlib.pbkdf2_hmac) --- Python-3.4.0a4/Modules/_hashopenssl.c.hashlib-fips 2013-11-07 13:55:47.466025086 +0100 +++ Python-3.4.0a4/Modules/_hashopenssl.c 2013-11-07 14:14:32.745272791 +0100 @@ -19,6 +19,8 @@ /* EVP is the preferred interface to hashing in OpenSSL */ +#include +#include #include #include /* We use the object interface to discover what hashes OpenSSL supports. */ @@ -48,11 +50,19 @@ static PyTypeObject EVPtype; +/* Struct to hold all the cached information we need on a specific algorithm. + We have one of these per algorithm */ +typedef struct { + PyObject *name_obj; + EVP_MD_CTX ctxs[2]; + /* ctx_ptrs will point to ctxs unless an error occurred, when it will + be NULL: */ + EVP_MD_CTX *ctx_ptrs[2]; + PyObject *error_msgs[2]; +} EVPCachedInfo; -#define DEFINE_CONSTS_FOR_NEW(Name) \ - static PyObject *CONST_ ## Name ## _name_obj = NULL; \ - static EVP_MD_CTX CONST_new_ ## Name ## _ctx; \ - static EVP_MD_CTX *CONST_new_ ## Name ## _ctx_p = NULL; +#define DEFINE_CONSTS_FOR_NEW(Name) \ + static EVPCachedInfo cached_info_ ##Name; DEFINE_CONSTS_FOR_NEW(md5) DEFINE_CONSTS_FOR_NEW(sha1) @@ -125,6 +135,48 @@ } } +static void +mc_ctx_init(EVP_MD_CTX *ctx, int usedforsecurity) +{ + EVP_MD_CTX_init(ctx); + + /* + If the user has declared that this digest is being used in a + non-security role (e.g. indexing into a data structure), set + the exception flag for openssl to allow it + */ + if (!usedforsecurity) { +#ifdef EVP_MD_CTX_FLAG_NON_FIPS_ALLOW + EVP_MD_CTX_set_flags(ctx, + EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); +#endif + } +} + +/* Get an error msg for the last error as a PyObject */ +static PyObject * +error_msg_for_last_error(void) +{ + char *errstr; + + errstr = ERR_error_string(ERR_peek_last_error(), NULL); + ERR_clear_error(); + + return PyUnicode_FromString(errstr); /* Can be NULL */ +} + +static void +set_evp_exception(void) +{ + char *errstr; + + errstr = ERR_error_string(ERR_peek_last_error(), NULL); + ERR_clear_error(); + + PyErr_SetString(PyExc_ValueError, errstr); +} + + /* Internal methods for a hash object */ static void @@ -309,15 +361,16 @@ static int EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"name", "string", NULL}; + static char *kwlist[] = {"name", "string", "usedforsecurity", NULL}; PyObject *name_obj = NULL; PyObject *data_obj = NULL; + int usedforsecurity = 1; Py_buffer view; char *nameStr; const EVP_MD *digest; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:HASH", kwlist, - &name_obj, &data_obj)) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:HASH", kwlist, + &name_obj, &data_obj, &usedforsecurity)) { return -1; } @@ -338,7 +391,12 @@ PyBuffer_Release(&view); return -1; } - EVP_DigestInit(&self->ctx, digest); + mc_ctx_init(&self->ctx, usedforsecurity); + if (!EVP_DigestInit_ex(&self->ctx, digest, NULL)) { + set_evp_exception(); + PyBuffer_Release(&view); + return -1; + } self->name = name_obj; Py_INCREF(self->name); @@ -422,7 +480,8 @@ static PyObject * EVPnew(PyObject *name_obj, const EVP_MD *digest, const EVP_MD_CTX *initial_ctx, - const unsigned char *cp, Py_ssize_t len) + const unsigned char *cp, Py_ssize_t len, + int usedforsecurity) { EVPobject *self; @@ -437,7 +495,12 @@ if (initial_ctx) { EVP_MD_CTX_copy(&self->ctx, initial_ctx); } else { - EVP_DigestInit(&self->ctx, digest); + mc_ctx_init(&self->ctx, usedforsecurity); + if (!EVP_DigestInit_ex(&self->ctx, digest, NULL)) { + set_evp_exception(); + Py_DECREF(self); + return NULL; + } } if (cp && len) { @@ -461,21 +524,29 @@ An optional string argument may be provided and will be\n\ automatically hashed.\n\ \n\ -The MD5 and SHA1 algorithms are always supported.\n"); +The MD5 and SHA1 algorithms are always supported.\n\ +\n\ +An optional \"usedforsecurity=True\" keyword argument is provided for use in\n\ +environments that enforce FIPS-based restrictions. Some implementations of\n\ +OpenSSL can be configured to prevent the usage of non-secure algorithms (such\n\ +as MD5). If you have a non-security use for these algorithms (e.g. a hash\n\ +table), you can override this argument by marking the callsite as\n\ +\"usedforsecurity=False\"."); static PyObject * EVP_new(PyObject *self, PyObject *args, PyObject *kwdict) { - static char *kwlist[] = {"name", "string", NULL}; + static char *kwlist[] = {"name", "string", "usedforsecurity", NULL}; PyObject *name_obj = NULL; PyObject *data_obj = NULL; + int usedforsecurity = 1; Py_buffer view = { 0 }; PyObject *ret_obj; char *name; const EVP_MD *digest; - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "O|O:new", kwlist, - &name_obj, &data_obj)) { + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "O|Oi:new", kwlist, + &name_obj, &data_obj, &usedforsecurity)) { return NULL; } @@ -489,7 +560,8 @@ digest = EVP_get_digestbyname(name); - ret_obj = EVPnew(name_obj, digest, NULL, (unsigned char*)view.buf, view.len); + ret_obj = EVPnew(name_obj, digest, NULL, (unsigned char*)view.buf, view.len, + usedforsecurity); if (data_obj) PyBuffer_Release(&view); @@ -744,57 +816,115 @@ /* - * This macro generates constructor function definitions for specific - * hash algorithms. These constructors are much faster than calling - * the generic one passing it a python string and are noticably - * faster than calling a python new() wrapper. Thats important for + * This macro and function generates a family of constructor function + * definitions for specific hash algorithms. These constructors are much + * faster than calling the generic one passing it a python string and are + * noticably faster than calling a python new() wrapper. That's important for * code that wants to make hashes of a bunch of small strings. */ #define GEN_CONSTRUCTOR(NAME) \ static PyObject * \ - EVP_new_ ## NAME (PyObject *self, PyObject *args) \ + EVP_new_ ## NAME (PyObject *self, PyObject *args, PyObject *kwdict) \ { \ - PyObject *data_obj = NULL; \ - Py_buffer view = { 0 }; \ - PyObject *ret_obj; \ - \ - if (!PyArg_ParseTuple(args, "|O:" #NAME , &data_obj)) { \ - return NULL; \ - } \ - \ - if (data_obj) \ - GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view); \ - \ - ret_obj = EVPnew( \ - CONST_ ## NAME ## _name_obj, \ - NULL, \ - CONST_new_ ## NAME ## _ctx_p, \ - (unsigned char*)view.buf, \ - view.len); \ - \ - if (data_obj) \ - PyBuffer_Release(&view); \ - return ret_obj; \ + return implement_specific_EVP_new(self, args, kwdict, \ + "|Oi:" #NAME, \ + &cached_info_ ## NAME ); \ + } + +static PyObject * +implement_specific_EVP_new(PyObject *self, PyObject *args, PyObject *kwdict, + const char *format, + EVPCachedInfo *cached_info) +{ + static char *kwlist[] = {"string", "usedforsecurity", NULL}; + PyObject *data_obj = NULL; + Py_buffer view = { 0 }; + int usedforsecurity = 1; + int idx; + PyObject *ret_obj = NULL; + + assert(cached_info); + + if (!PyArg_ParseTupleAndKeywords(args, kwdict, format, kwlist, + &data_obj, &usedforsecurity)) { + return NULL; + } + + if (data_obj) + GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view); + + idx = usedforsecurity ? 1 : 0; + + /* + * If an error occurred during creation of the global content, the ctx_ptr + * will be NULL, and the error_msg will hopefully be non-NULL: + */ + if (cached_info->ctx_ptrs[idx]) { + /* We successfully initialized this context; copy it: */ + ret_obj = EVPnew(cached_info->name_obj, + NULL, + cached_info->ctx_ptrs[idx], + (unsigned char*)view.buf, view.len, + usedforsecurity); + } else { + /* Some kind of error happened initializing the global context for + this (digest, usedforsecurity) pair. + Raise an exception with the saved error message: */ + if (cached_info->error_msgs[idx]) { + PyErr_SetObject(PyExc_ValueError, cached_info->error_msgs[idx]); + } else { + PyErr_SetString(PyExc_ValueError, "Error initializing hash"); + } } + if (data_obj) + PyBuffer_Release(&view); + + return ret_obj; +} + /* a PyMethodDef structure for the constructor */ #define CONSTRUCTOR_METH_DEF(NAME) \ - {"openssl_" #NAME, (PyCFunction)EVP_new_ ## NAME, METH_VARARGS, \ + {"openssl_" #NAME, (PyCFunction)EVP_new_ ## NAME, \ + METH_VARARGS|METH_KEYWORDS, \ PyDoc_STR("Returns a " #NAME \ " hash object; optionally initialized with a string") \ } -/* used in the init function to setup a constructor: initialize OpenSSL - constructor constants if they haven't been initialized already. */ -#define INIT_CONSTRUCTOR_CONSTANTS(NAME) do { \ - if (CONST_ ## NAME ## _name_obj == NULL) { \ - CONST_ ## NAME ## _name_obj = PyUnicode_FromString(#NAME); \ - if (EVP_get_digestbyname(#NAME)) { \ - CONST_new_ ## NAME ## _ctx_p = &CONST_new_ ## NAME ## _ctx; \ - EVP_DigestInit(CONST_new_ ## NAME ## _ctx_p, EVP_get_digestbyname(#NAME)); \ - } \ - } \ +/* + Macro/function pair to set up the constructors. + + Try to initialize a context for each hash twice, once with + EVP_MD_CTX_FLAG_NON_FIPS_ALLOW and once without. + + Any that have errors during initialization will end up with a NULL ctx_ptrs + entry, and err_msgs will be set (unless we're very low on memory) +*/ +#define INIT_CONSTRUCTOR_CONSTANTS(NAME) do { \ + init_constructor_constant(&cached_info_ ## NAME, #NAME); \ } while (0); +static void +init_constructor_constant(EVPCachedInfo *cached_info, const char *name) +{ + assert(cached_info); + cached_info->name_obj = PyUnicode_FromString(name); + if (EVP_get_digestbyname(name)) { + int i; + for (i=0; i<2; i++) { + mc_ctx_init(&cached_info->ctxs[i], i); + if (EVP_DigestInit_ex(&cached_info->ctxs[i], + EVP_get_digestbyname(name), NULL)) { + /* Success: */ + cached_info->ctx_ptrs[i] = &cached_info->ctxs[i]; + } else { + /* Failure: */ + cached_info->ctx_ptrs[i] = NULL; + cached_info->error_msgs[i] = error_msg_for_last_error(); + } + } + } +} + GEN_CONSTRUCTOR(md5) GEN_CONSTRUCTOR(sha1) @@ -845,12 +974,10 @@ { PyObject *m, *openssl_md_meth_names; - OpenSSL_add_all_digests(); + SSL_load_error_strings(); + SSL_library_init(); - /* TODO build EVP_functions openssl_* entries dynamically based - * on what hashes are supported rather than listing many - * but having some be unsupported. Only init appropriate - * constants. */ + OpenSSL_add_all_digests(); Py_TYPE(&EVPtype) = &PyType_Type; if (PyType_Ready(&EVPtype) < 0)