Blob Blame History Raw
Bugzilla: 1035000
Upstream-status: 3.13 and submitted for 3.13

From adb466891c981db26df5b23ae5a7062e47dfd323 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 30 Oct 2013 11:15:24 +0000
Subject: [PATCH 01/10] KEYS: Fix a race between negating a key and reading the
 error set

key_reject_and_link() marking a key as negative and setting the error with
which it was negated races with keyring searches and other things that read
that error.

The fix is to switch the order in which the assignments are done in
key_reject_and_link() and to use memory barriers.

Kudos to Dave Wysochanski <dwysocha@redhat.com> and Scott Mayhew
<smayhew@redhat.com> for tracking this down.

This may be the cause of:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000070
IP: [<ffffffff81219011>] wait_for_key_construction+0x31/0x80
PGD c6b2c3067 PUD c59879067 PMD 0
Oops: 0000 [#1] SMP
last sysfs file: /sys/devices/system/cpu/cpu3/cache/index2/shared_cpu_map
CPU 0
Modules linked in: ...

Pid: 13359, comm: amqzxma0 Not tainted 2.6.32-358.20.1.el6.x86_64 #1 IBM System x3650 M3 -[7945PSJ]-/00J6159
RIP: 0010:[<ffffffff81219011>] wait_for_key_construction+0x31/0x80
RSP: 0018:ffff880c6ab33758  EFLAGS: 00010246
RAX: ffffffff81219080 RBX: 0000000000000000 RCX: 0000000000000002
RDX: ffffffff81219060 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffff880c6ab33768 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000000 R12: ffff880adfcbce40
R13: ffffffffa03afb84 R14: ffff880adfcbce40 R15: ffff880adfcbce43
FS:  00007f29b8042700(0000) GS:ffff880028200000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000070 CR3: 0000000c613dc000 CR4: 00000000000007f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process amqzxma0 (pid: 13359, threadinfo ffff880c6ab32000, task ffff880c610deae0)
Stack:
 ffff880adfcbce40 0000000000000000 ffff880c6ab337b8 ffffffff81219695
<d> 0000000000000000 ffff880a000000d0 ffff880c6ab337a8 000000000000000f
<d> ffffffffa03afb93 000000000000000f ffff88186c7882c0 0000000000000014
Call Trace:
 [<ffffffff81219695>] request_key+0x65/0xa0
 [<ffffffffa03a0885>] nfs_idmap_request_key+0xc5/0x170 [nfs]
 [<ffffffffa03a0eb4>] nfs_idmap_lookup_id+0x34/0x80 [nfs]
 [<ffffffffa03a1255>] nfs_map_group_to_gid+0x75/0xa0 [nfs]
 [<ffffffffa039a9ad>] decode_getfattr_attrs+0xbdd/0xfb0 [nfs]
 [<ffffffff81057310>] ? __dequeue_entity+0x30/0x50
 [<ffffffff8100988e>] ? __switch_to+0x26e/0x320
 [<ffffffffa039ae03>] decode_getfattr+0x83/0xe0 [nfs]
 [<ffffffffa039b610>] ? nfs4_xdr_dec_getattr+0x0/0xa0 [nfs]
 [<ffffffffa039b69f>] nfs4_xdr_dec_getattr+0x8f/0xa0 [nfs]
 [<ffffffffa02dada4>] rpcauth_unwrap_resp+0x84/0xb0 [sunrpc]
 [<ffffffffa039b610>] ? nfs4_xdr_dec_getattr+0x0/0xa0 [nfs]
 [<ffffffffa02cf923>] call_decode+0x1b3/0x800 [sunrpc]
 [<ffffffff81096de0>] ? wake_bit_function+0x0/0x50
 [<ffffffffa02cf770>] ? call_decode+0x0/0x800 [sunrpc]
 [<ffffffffa02d99a7>] __rpc_execute+0x77/0x350 [sunrpc]
 [<ffffffff81096c67>] ? bit_waitqueue+0x17/0xd0
 [<ffffffffa02d9ce1>] rpc_execute+0x61/0xa0 [sunrpc]
 [<ffffffffa02d03a5>] rpc_run_task+0x75/0x90 [sunrpc]
 [<ffffffffa02d04c2>] rpc_call_sync+0x42/0x70 [sunrpc]
 [<ffffffffa038ff80>] _nfs4_call_sync+0x30/0x40 [nfs]
 [<ffffffffa038836c>] _nfs4_proc_getattr+0xac/0xc0 [nfs]
 [<ffffffff810aac87>] ? futex_wait+0x227/0x380
 [<ffffffffa038b856>] nfs4_proc_getattr+0x56/0x80 [nfs]
 [<ffffffffa0371403>] __nfs_revalidate_inode+0xe3/0x220 [nfs]
 [<ffffffffa037158e>] nfs_revalidate_mapping+0x4e/0x170 [nfs]
 [<ffffffffa036f147>] nfs_file_read+0x77/0x130 [nfs]
 [<ffffffff811811aa>] do_sync_read+0xfa/0x140
 [<ffffffff81096da0>] ? autoremove_wake_function+0x0/0x40
 [<ffffffff8100bb8e>] ? apic_timer_interrupt+0xe/0x20
 [<ffffffff8100b9ce>] ? common_interrupt+0xe/0x13
 [<ffffffff81228ffb>] ? selinux_file_permission+0xfb/0x150
 [<ffffffff8121bed6>] ? security_file_permission+0x16/0x20
 [<ffffffff81181a95>] vfs_read+0xb5/0x1a0
 [<ffffffff81181bd1>] sys_read+0x51/0x90
 [<ffffffff810dc685>] ? __audit_syscall_exit+0x265/0x290
 [<ffffffff8100b072>] system_call_fastpath+0x16/0x1b

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Dave Wysochanski <dwysocha@redhat.com>
cc: Scott Mayhew <smayhew@redhat.com>
---
 security/keys/key.c         | 3 ++-
 security/keys/keyring.c     | 1 +
 security/keys/request_key.c | 4 +++-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/security/keys/key.c b/security/keys/key.c
index d331ea9..55d110f 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -557,9 +557,10 @@ int key_reject_and_link(struct key *key,
 	if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) {
 		/* mark the key as being negatively instantiated */
 		atomic_inc(&key->user->nikeys);
+		key->type_data.reject_error = -error;
+		smp_wmb();
 		set_bit(KEY_FLAG_NEGATIVE, &key->flags);
 		set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
-		key->type_data.reject_error = -error;
 		now = current_kernel_time();
 		key->expiry = now.tv_sec + timeout;
 		key_schedule_gc(key->expiry + key_gc_delay);
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 9b6f6e0..8c05ebd 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -551,6 +551,7 @@ static int keyring_search_iterator(const void *object, void *iterator_data)
 	if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
 		/* we set a different error code if we pass a negative key */
 		if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
+			smp_rmb();
 			ctx->result = ERR_PTR(key->type_data.reject_error);
 			kleave(" = %d [neg]", ctx->skipped_ret);
 			goto skipped;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index df94827..3814119 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -596,8 +596,10 @@ int wait_for_key_construction(struct key *key, bool intr)
 			  intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
 	if (ret < 0)
 		return ret;
-	if (test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+	if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
+		smp_rmb();
 		return key->type_data.reject_error;
+	}
 	return key_validate(key);
 }
 EXPORT_SYMBOL(wait_for_key_construction);
-- 
1.8.3.1


From 3a35b12cb5167463dd6061bb29da9116fc08625b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 30 Oct 2013 11:15:24 +0000
Subject: [PATCH 02/10] KEYS: Fix keyring quota misaccounting on key
 replacement and unlink

If a key is displaced from a keyring by a matching one, then four more bytes
of quota are allocated to the keyring - despite the fact that the keyring does
not change in size.

Further, when a key is unlinked from a keyring, the four bytes of quota
allocated the link isn't recovered and returned to the user's pool.

The first can be tested by repeating:

	keyctl add big_key a fred @s
	cat /proc/key-users

(Don't put it in a shell loop otherwise the garbage collector won't have time
to clear the displaced keys, thus affecting the result).

This was causing the kerberos keyring to run out of room fairly quickly.

The second can be tested by:

	cat /proc/key-users
	a=`keyctl add user a a @s`
	cat /proc/key-users
	keyctl unlink $a
	sleep 1 # Give RCU a chance to delete the key
	cat /proc/key-users

assuming no system activity that otherwise adds/removes keys, the amount of
key data allocated should go up (say 40/20000 -> 47/20000) and then return to
the original value at the end.

Reported-by: Stephen Gallagher <sgallagh@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 security/keys/keyring.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 8c05ebd..d80311e 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -1063,12 +1063,6 @@ int __key_link_begin(struct key *keyring,
 	if (index_key->type == &key_type_keyring)
 		down_write(&keyring_serialise_link_sem);
 
-	/* check that we aren't going to overrun the user's quota */
-	ret = key_payload_reserve(keyring,
-				  keyring->datalen + KEYQUOTA_LINK_BYTES);
-	if (ret < 0)
-		goto error_sem;
-
 	/* Create an edit script that will insert/replace the key in the
 	 * keyring tree.
 	 */
@@ -1078,17 +1072,25 @@ int __key_link_begin(struct key *keyring,
 				  NULL);
 	if (IS_ERR(edit)) {
 		ret = PTR_ERR(edit);
-		goto error_quota;
+		goto error_sem;
+	}
+
+	/* If we're not replacing a link in-place then we're going to need some
+	 * extra quota.
+	 */
+	if (!edit->dead_leaf) {
+		ret = key_payload_reserve(keyring,
+					  keyring->datalen + KEYQUOTA_LINK_BYTES);
+		if (ret < 0)
+			goto error_cancel;
 	}
 
 	*_edit = edit;
 	kleave(" = 0");
 	return 0;
 
-error_quota:
-	/* undo the quota changes */
-	key_payload_reserve(keyring,
-			    keyring->datalen - KEYQUOTA_LINK_BYTES);
+error_cancel:
+	assoc_array_cancel_edit(edit);
 error_sem:
 	if (index_key->type == &key_type_keyring)
 		up_write(&keyring_serialise_link_sem);
@@ -1146,7 +1148,7 @@ void __key_link_end(struct key *keyring,
 	if (index_key->type == &key_type_keyring)
 		up_write(&keyring_serialise_link_sem);
 
-	if (edit) {
+	if (edit && !edit->dead_leaf) {
 		key_payload_reserve(keyring,
 				    keyring->datalen - KEYQUOTA_LINK_BYTES);
 		assoc_array_cancel_edit(edit);
@@ -1243,6 +1245,7 @@ int key_unlink(struct key *keyring, struct key *key)
 		goto error;
 
 	assoc_array_apply_edit(edit);
+	key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES);
 	ret = 0;
 
 error:
-- 
1.8.3.1


From 196d3798421b8e331a538a5ea9b4ce7789c0f048 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 14 Nov 2013 13:02:31 +0000
Subject: [PATCH 03/10] KEYS: Fix keyring content gc scanner

Key pointers stored in the keyring are marked in bit 1 to indicate if they
point to a keyring.  We need to strip off this bit before using the pointer
when iterating over the keyring for the purpose of looking for links to garbage
collect.

This means that expirable keyrings aren't correctly expiring because the
checker is seeing their key pointer with 2 added to it.

Since the fix for this involves knowing about the internals of the keyring,
key_gc_keyring() is moved to keyring.c and merged into keyring_gc().

This can be tested by:

	echo 2 >/proc/sys/kernel/keys/gc_delay
	keyctl timeout `keyctl add keyring qwerty "" @s` 2
	cat /proc/keys
	sleep 5; cat /proc/keys

which should see a keyring called "qwerty" appear in the session keyring and
then disappear after it expires, and:

	echo 2 >/proc/sys/kernel/keys/gc_delay
	a=`keyctl get_persistent @s`
	b=`keyctl add keyring 0 "" $a`
	keyctl add user a a $b
	keyctl timeout $b 2
	cat /proc/keys
	sleep 5; cat /proc/keys

which should see a keyring called "0" with a key called "a" in it appear in the
user's persistent keyring (which will be attached to the session keyring) and
then both the "0" keyring and the "a" key should disappear when the "0" keyring
expires.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Simo Sorce <simo@redhat.com>
---
 security/keys/gc.c      | 42 +-----------------------------------------
 security/keys/keyring.c | 45 +++++++++++++++++++++++++++++++++++----------
 2 files changed, 36 insertions(+), 51 deletions(-)

diff --git a/security/keys/gc.c b/security/keys/gc.c
index cce621c..d3222b6 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -130,46 +130,6 @@ void key_gc_keytype(struct key_type *ktype)
 	kleave("");
 }
 
-static int key_gc_keyring_func(const void *object, void *iterator_data)
-{
-	const struct key *key = object;
-	time_t *limit = iterator_data;
-	return key_is_dead(key, *limit);
-}
-
-/*
- * Garbage collect pointers from a keyring.
- *
- * Not called with any locks held.  The keyring's key struct will not be
- * deallocated under us as only our caller may deallocate it.
- */
-static void key_gc_keyring(struct key *keyring, time_t limit)
-{
-	int result;
-
-	kenter("%x{%s}", keyring->serial, keyring->description ?: "");
-
-	if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
-			      (1 << KEY_FLAG_REVOKED)))
-		goto dont_gc;
-
-	/* scan the keyring looking for dead keys */
-	rcu_read_lock();
-	result = assoc_array_iterate(&keyring->keys,
-				     key_gc_keyring_func, &limit);
-	rcu_read_unlock();
-	if (result == true)
-		goto do_gc;
-
-dont_gc:
-	kleave(" [no gc]");
-	return;
-
-do_gc:
-	keyring_gc(keyring, limit);
-	kleave(" [gc]");
-}
-
 /*
  * Garbage collect a list of unreferenced, detached keys
  */
@@ -388,7 +348,7 @@ found_unreferenced_key:
 	 */
 found_keyring:
 	spin_unlock(&key_serial_lock);
-	key_gc_keyring(key, limit);
+	keyring_gc(key, limit);
 	goto maybe_resched;
 
 	/* We found a dead key that is still referenced.  Reset its type and
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index d80311e..69f0cb7 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -1304,7 +1304,7 @@ static void keyring_revoke(struct key *keyring)
 	}
 }
 
-static bool gc_iterator(void *object, void *iterator_data)
+static bool keyring_gc_select_iterator(void *object, void *iterator_data)
 {
 	struct key *key = keyring_ptr_to_key(object);
 	time_t *limit = iterator_data;
@@ -1315,22 +1315,47 @@ static bool gc_iterator(void *object, void *iterator_data)
 	return true;
 }
 
+static int keyring_gc_check_iterator(const void *object, void *iterator_data)
+{
+	const struct key *key = keyring_ptr_to_key(object);
+	time_t *limit = iterator_data;
+
+	key_check(key);
+	return key_is_dead(key, *limit);
+}
+
 /*
- * Collect garbage from the contents of a keyring, replacing the old list with
- * a new one with the pointers all shuffled down.
+ * Garbage collect pointers from a keyring.
  *
- * Dead keys are classed as oned that are flagged as being dead or are revoked,
- * expired or negative keys that were revoked or expired before the specified
- * limit.
+ * Not called with any locks held.  The keyring's key struct will not be
+ * deallocated under us as only our caller may deallocate it.
  */
 void keyring_gc(struct key *keyring, time_t limit)
 {
-	kenter("{%x,%s}", key_serial(keyring), keyring->description);
+	int result;
+
+	kenter("%x{%s}", keyring->serial, keyring->description ?: "");
 
+	if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
+			      (1 << KEY_FLAG_REVOKED)))
+		goto dont_gc;
+
+	/* scan the keyring looking for dead keys */
+	rcu_read_lock();
+	result = assoc_array_iterate(&keyring->keys,
+				     keyring_gc_check_iterator, &limit);
+	rcu_read_unlock();
+	if (result == true)
+		goto do_gc;
+
+dont_gc:
+	kleave(" [no gc]");
+	return;
+
+do_gc:
 	down_write(&keyring->sem);
 	assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops,
-		       gc_iterator, &limit);
+		       keyring_gc_select_iterator, &limit);
 	up_write(&keyring->sem);
-
-	kleave("");
+	kleave(" [gc]");
 }
-- 
1.8.3.1


From 49fbad9064d603b093ee3e101463ccf6756f5120 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Wed, 30 Oct 2013 11:23:02 +0800
Subject: [PATCH 04/10] KEYS: fix error return code in big_key_instantiate()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 security/keys/big_key.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 5f9defc..2cf5e62 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -71,8 +71,10 @@ int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 		 * TODO: Encrypt the stored data with a temporary key.
 		 */
 		file = shmem_file_setup("", datalen, 0);
-		if (IS_ERR(file))
+		if (IS_ERR(file)) {
+			ret = PTR_ERR(file);
 			goto err_quota;
+		}
 
 		written = kernel_write(file, prep->data, prep->datalen, 0);
 		if (written != datalen) {
-- 
1.8.3.1


From 2900f2b4200258a1be949a5e3644e7d4b55c4e82 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 13 Nov 2013 16:51:06 +0000
Subject: [PATCH 05/10] KEYS: Fix error handling in big_key instantiation

In the big_key_instantiate() function we return 0 if kernel_write() returns us
an error rather than returning an error.  This can potentially lead to
dentry_open() giving a BUG when called from big_key_read() with an unset
tmpfile path.

	------------[ cut here ]------------
	kernel BUG at fs/open.c:798!
	...
	RIP: 0010:[<ffffffff8119bbd1>] dentry_open+0xd1/0xe0
	...
	Call Trace:
	 [<ffffffff812350c5>] big_key_read+0x55/0x100
	 [<ffffffff81231084>] keyctl_read_key+0xb4/0xe0
	 [<ffffffff81231e58>] SyS_keyctl+0xf8/0x1d0
	 [<ffffffff815bb799>] system_call_fastpath+0x16/0x1b

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Stephen Gallagher <sgallagh@redhat.com>
---
 security/keys/big_key.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 2cf5e62..7f44c32 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -78,6 +78,7 @@ int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 
 		written = kernel_write(file, prep->data, prep->datalen, 0);
 		if (written != datalen) {
+			ret = written;
 			if (written >= 0)
 				ret = -ENOMEM;
 			goto err_fput;
-- 
1.8.3.1


From b6b0e230e3d26b31ab075455c2ebdde9b194f8f5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 2 Dec 2013 11:24:18 +0000
Subject: [PATCH 06/10] KEYS: Pre-clear struct key on allocation

The second word of key->payload does not get initialised in key_alloc(), but
the big_key type is relying on it having been cleared.  The problem comes when
big_key fails to instantiate a large key and doesn't then set the payload.  The
big_key_destroy() op is called from the garbage collector and this assumes that
the dentry pointer stored in the second word will be NULL if instantiation did
not complete.

Therefore just pre-clear the entire struct key on allocation rather than trying
to be clever and only initialising to 0 only those bits that aren't otherwise
initialised.

The lack of initialisation can lead to a bug report like the following if
big_key failed to initialise its file:

	general protection fault: 0000 [#1] SMP
	Modules linked in: ...
	CPU: 0 PID: 51 Comm: kworker/0:1 Not tainted 3.10.0-53.el7.x86_64 #1
	Hardware name: Dell Inc. PowerEdge 1955/0HC513, BIOS 1.4.4 12/09/2008
	Workqueue: events key_garbage_collector
	task: ffff8801294f5680 ti: ffff8801296e2000 task.ti: ffff8801296e2000
	RIP: 0010:[<ffffffff811b4a51>] dput+0x21/0x2d0
	...
	Call Trace:
	 [<ffffffff811a7b06>] path_put+0x16/0x30
	 [<ffffffff81235604>] big_key_destroy+0x44/0x60
	 [<ffffffff8122dc4b>] key_gc_unused_keys.constprop.2+0x5b/0xe0
	 [<ffffffff8122df2f>] key_garbage_collector+0x1df/0x3c0
	 [<ffffffff8107759b>] process_one_work+0x17b/0x460
	 [<ffffffff8107834b>] worker_thread+0x11b/0x400
	 [<ffffffff81078230>] ? rescuer_thread+0x3e0/0x3e0
	 [<ffffffff8107eb00>] kthread+0xc0/0xd0
	 [<ffffffff8107ea40>] ? kthread_create_on_node+0x110/0x110
	 [<ffffffff815c4bec>] ret_from_fork+0x7c/0xb0
	 [<ffffffff8107ea40>] ? kthread_create_on_node+0x110/0x110

Reported-by: Patrik Kis <pkis@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Stephen Gallagher <sgallagh@redhat.com>
---
 security/keys/key.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/security/keys/key.c b/security/keys/key.c
index 55d110f..6e21c11 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -272,7 +272,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 	}
 
 	/* allocate and initialise the key and its description */
-	key = kmem_cache_alloc(key_jar, GFP_KERNEL);
+	key = kmem_cache_zalloc(key_jar, GFP_KERNEL);
 	if (!key)
 		goto no_memory_2;
 
@@ -293,18 +293,12 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 	key->uid = uid;
 	key->gid = gid;
 	key->perm = perm;
-	key->flags = 0;
-	key->expiry = 0;
-	key->payload.data = NULL;
-	key->security = NULL;
 
 	if (!(flags & KEY_ALLOC_NOT_IN_QUOTA))
 		key->flags |= 1 << KEY_FLAG_IN_QUOTA;
 	if (flags & KEY_ALLOC_TRUSTED)
 		key->flags |= 1 << KEY_FLAG_TRUSTED;
 
-	memset(&key->type_data, 0, sizeof(key->type_data));
-
 #ifdef KEY_DEBUGGING
 	key->magic = KEY_DEBUG_MAGIC;
 #endif
-- 
1.8.3.1


From 505f63b47ecea475750c45ad3ba3e3ba73872509 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 2 Dec 2013 11:24:18 +0000
Subject: [PATCH 07/10] KEYS: Fix the keyring hash function

The keyring hash function (used by the associative array) is supposed to clear
the bottommost nibble of the index key (where the hash value resides) for
keyrings and make sure it is non-zero for non-keyrings.  This is done to make
keyrings cluster together on one branch of the tree separately to other keys.

Unfortunately, the wrong mask is used, so only the bottom two bits are
examined and cleared and not the whole bottom nibble.  This means that keys
and keyrings can still be successfully searched for under most circumstances
as the hash is consistent in its miscalculation, but if a keyring's
associative array bottom node gets filled up then approx 75% of the keyrings
will not be put into the 0 branch.

The consequence of this is that a key in a keyring linked to by another
keyring, ie.

	keyring A -> keyring B -> key

may not be found if the search starts at keyring A and then descends into
keyring B because search_nested_keyrings() only searches up the 0 branch (as it
"knows" all keyrings must be there and not elsewhere in the tree).

The fix is to use the right mask.

This can be tested with:

	r=`keyctl newring sandbox @s`
	for ((i=0; i<=16; i++)); do keyctl newring ring$i $r; done
	for ((i=0; i<=16; i++)); do keyctl add user a$i a %:ring$i; done
	for ((i=0; i<=16; i++)); do keyctl search $r user a$i; done

This creates a sandbox keyring, then creates 17 keyrings therein (labelled
ring0..ring16).  This causes the root node of the sandbox's associative array
to overflow and for the tree to have extra nodes inserted.

Each keyring then is given a user key (labelled aN for ringN) for us to search
for.

We then search for the user keys we added, starting from the sandbox.  If
working correctly, it should return the same ordered list of key IDs as
for...keyctl add... did.  Without this patch, it reports ENOKEY "Required key
not available" for some of the keys.  Just which keys get this depends as the
kernel pointer to the key type forms part of the hash function.

Reported-by: Nalin Dahyabhai <nalin@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Stephen Gallagher <sgallagh@redhat.com>
---
 security/keys/keyring.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 69f0cb7..0adbc77 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -160,7 +160,7 @@ static u64 mult_64x32_and_fold(u64 x, u32 y)
 static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key)
 {
 	const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP;
-	const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK;
+	const unsigned long fan_mask = ASSOC_ARRAY_FAN_MASK;
 	const char *description = index_key->description;
 	unsigned long hash, type;
 	u32 piece;
@@ -194,10 +194,10 @@ static unsigned long hash_key_type_and_desc(const struct keyring_index_key *inde
 	 * ordinary keys by making sure the lowest level segment in the hash is
 	 * zero for keyrings and non-zero otherwise.
 	 */
-	if (index_key->type != &key_type_keyring && (hash & level_mask) == 0)
+	if (index_key->type != &key_type_keyring && (hash & fan_mask) == 0)
 		return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1;
-	if (index_key->type == &key_type_keyring && (hash & level_mask) != 0)
-		return (hash + (hash << level_shift)) & ~level_mask;
+	if (index_key->type == &key_type_keyring && (hash & fan_mask) != 0)
+		return (hash + (hash << level_shift)) & ~fan_mask;
 	return hash;
 }
 
-- 
1.8.3.1


From cc72a3bd65c6e4594669fea8ac94966e570ec6aa Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 2 Dec 2013 11:24:18 +0000
Subject: [PATCH 08/10] KEYS: Fix multiple key add into associative array

If sufficient keys (or keyrings) are added into a keyring such that a node in
the associative array's tree overflows (each node has a capacity N, currently
16) and such that all N+1 keys have the same index key segment for that level
of the tree (the level'th nibble of the index key), then assoc_array_insert()
calls ops->diff_objects() to indicate at which bit position the two index keys
vary.

However, __key_link_begin() passes a NULL object to assoc_array_insert() with
the intention of supplying the correct pointer later before we commit the
change.  This means that keyring_diff_objects() is given a NULL pointer as one
of its arguments which it does not expect.  This results in an oops like the
attached.

With the previous patch to fix the keyring hash function, this can be forced
much more easily by creating a keyring and only adding keyrings to it.  Add any
other sort of key and a different insertion path is taken - all 16+1 objects
must want to cluster in the same node slot.

This can be tested by:

	r=`keyctl newring sandbox @s`
	for ((i=0; i<=16; i++)); do keyctl newring ring$i $r; done

This should work fine, but oopses when the 17th keyring is added.

Since ops->diff_objects() is always called with the first pointer pointing to
the object to be inserted (ie. the NULL pointer), we can fix the problem by
changing the to-be-inserted object pointer to point to the index key passed
into assoc_array_insert() instead.

Whilst we're at it, we also switch the arguments so that they are the same as
for ->compare_object().

BUG: unable to handle kernel NULL pointer dereference at 0000000000000088
IP: [<ffffffff81191ee4>] hash_key_type_and_desc+0x18/0xb0
...
RIP: 0010:[<ffffffff81191ee4>] hash_key_type_and_desc+0x18/0xb0
...
Call Trace:
 [<ffffffff81191f9d>] keyring_diff_objects+0x21/0xd2
 [<ffffffff811f09ef>] assoc_array_insert+0x3b6/0x908
 [<ffffffff811929a7>] __key_link_begin+0x78/0xe5
 [<ffffffff81191a2e>] key_create_or_update+0x17d/0x36a
 [<ffffffff81192e0a>] SyS_add_key+0x123/0x183
 [<ffffffff81400ddb>] tracesys+0xdd/0xe2

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Stephen Gallagher <sgallagh@redhat.com>
---
 Documentation/assoc_array.txt | 6 +++---
 include/linux/assoc_array.h   | 6 +++---
 lib/assoc_array.c             | 4 ++--
 security/keys/keyring.c       | 7 +++----
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt
index f4faec0..2f2c6cd 100644
--- a/Documentation/assoc_array.txt
+++ b/Documentation/assoc_array.txt
@@ -164,10 +164,10 @@ This points to a number of methods, all of which need to be provided:
 
  (4) Diff the index keys of two objects.
 
-	int (*diff_objects)(const void *a, const void *b);
+	int (*diff_objects)(const void *object, const void *index_key);
 
-     Return the bit position at which the index keys of two objects differ or
-     -1 if they are the same.
+     Return the bit position at which the index key of the specified object
+     differs from the given index key or -1 if they are the same.
 
 
  (5) Free an object.
diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h
index 9a193b8..a89df3b 100644
--- a/include/linux/assoc_array.h
+++ b/include/linux/assoc_array.h
@@ -41,10 +41,10 @@ struct assoc_array_ops {
 	/* Is this the object we're looking for? */
 	bool (*compare_object)(const void *object, const void *index_key);
 
-	/* How different are two objects, to a bit position in their keys? (or
-	 * -1 if they're the same)
+	/* How different is an object from an index key, to a bit position in
+	 * their keys? (or -1 if they're the same)
 	 */
-	int (*diff_objects)(const void *a, const void *b);
+	int (*diff_objects)(const void *object, const void *index_key);
 
 	/* Method to free an object. */
 	void (*free_object)(void *object);
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
index 17edeaf..1b6a44f 100644
--- a/lib/assoc_array.c
+++ b/lib/assoc_array.c
@@ -759,8 +759,8 @@ all_leaves_cluster_together:
 	pr_devel("all leaves cluster together\n");
 	diff = INT_MAX;
 	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
-		int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
-					  assoc_array_ptr_to_leaf(node->slots[i]));
+		int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]),
+					  index_key);
 		if (x < diff) {
 			BUG_ON(x < 0);
 			diff = x;
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 0adbc77..3dd8445 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -279,12 +279,11 @@ static bool keyring_compare_object(const void *object, const void *data)
  * Compare the index keys of a pair of objects and determine the bit position
  * at which they differ - if they differ.
  */
-static int keyring_diff_objects(const void *_a, const void *_b)
+static int keyring_diff_objects(const void *object, const void *data)
 {
-	const struct key *key_a = keyring_ptr_to_key(_a);
-	const struct key *key_b = keyring_ptr_to_key(_b);
+	const struct key *key_a = keyring_ptr_to_key(object);
 	const struct keyring_index_key *a = &key_a->index_key;
-	const struct keyring_index_key *b = &key_b->index_key;
+	const struct keyring_index_key *b = data;
 	unsigned long seg_a, seg_b;
 	int level, i;
 
-- 
1.8.3.1


From 6d2303664c4dd852c49fe68140b308d5b0c4a082 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 2 Dec 2013 11:24:19 +0000
Subject: [PATCH 09/10] KEYS: Fix searching of nested keyrings

If a keyring contains more than 16 keyrings (the capacity of a single node in
the associative array) then those keyrings are split over multiple nodes
arranged as a tree.

If search_nested_keyrings() is called to search the keyring then it will
attempt to manually walk over just the 0 branch of the associative array tree
where all the keyring links are stored.  This works provided the key is found
before the algorithm steps from one node containing keyrings to a child node
or if there are sufficiently few keyring links that the keyrings are all in
one node.

However, if the algorithm does need to step from a node to a child node, it
doesn't change the node pointer unless a shortcut also gets transited.  This
means that the algorithm will keep scanning the same node over and over again
without terminating and without returning.

To fix this, move the internal-pointer-to-node translation from inside the
shortcut transit handler so that it applies it to node arrival as well.

This can be tested by:

	r=`keyctl newring sandbox @s`
	for ((i=0; i<=16; i++)); do keyctl newring ring$i $r; done
	for ((i=0; i<=16; i++)); do keyctl add user a$i a %:ring$i; done
	for ((i=0; i<=16; i++)); do keyctl search $r user a$i; done
	for ((i=17; i<=20; i++)); do keyctl search $r user a$i; done

The searches should all complete successfully (or with an error for 17-20),
but instead one or more of them will hang.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Stephen Gallagher <sgallagh@redhat.com>
---
 security/keys/keyring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 3dd8445..d46cbc5 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -690,8 +690,8 @@ descend_to_node:
 		smp_read_barrier_depends();
 		ptr = ACCESS_ONCE(shortcut->next_node);
 		BUG_ON(!assoc_array_ptr_is_node(ptr));
-		node = assoc_array_ptr_to_node(ptr);
 	}
+	node = assoc_array_ptr_to_node(ptr);
 
 begin_node:
 	kdebug("begin_node");
-- 
1.8.3.1


From 6a57d6a93f0d17b3e23134fec556aea585cb5392 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 2 Dec 2013 11:24:19 +0000
Subject: [PATCH 10/10] security: shmem: implement kernel private shmem inodes

We have a problem where the big_key key storage implementation uses a
shmem backed inode to hold the key contents.  Because of this detail of
implementation LSM checks are being done between processes trying to
read the keys and the tmpfs backed inode.  The LSM checks are already
being handled on the key interface level and should not be enforced at
the inode level (since the inode is an implementation detail, not a
part of the security model)

This patch implements a new function shmem_kernel_file_setup() which
returns the equivalent to shmem_file_setup() only the underlying inode
has S_PRIVATE set.  This means that all LSM checks for the inode in
question are skipped.  It should only be used for kernel internal
operations where the inode is not exposed to userspace without proper
LSM checking.  It is possible that some other users of
shmem_file_setup() should use the new interface, but this has not been
explored.

Reproducing this bug is a little bit difficult.  The steps I used on
Fedora are:

 (1) Turn off selinux enforcing:

	setenforce 0

 (2) Create a huge key

	k=`dd if=/dev/zero bs=8192 count=1 | keyctl padd big_key test-key @s`

 (3) Access the key in another context:

	runcon system_u:system_r:httpd_t:s0-s0:c0.c1023 keyctl print $k >/dev/null

 (4) Examine the audit logs:

	ausearch -m AVC -i --subject httpd_t | audit2allow

If the last command's output includes a line that looks like:

	allow httpd_t user_tmpfs_t:file { open read };

There was an inode check between httpd and the tmpfs filesystem.  With
this patch no such denial will be seen.  (NOTE! you should clear your
audit log if you have tested for this previously)

(Please return you box to enforcing)

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Hugh Dickins <hughd@google.com>
cc: linux-mm@kvack.org
---
 include/linux/shmem_fs.h |  2 ++
 mm/shmem.c               | 36 +++++++++++++++++++++++++++++-------
 security/keys/big_key.c  |  2 +-
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 30aa0dc..9d55438 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -47,6 +47,8 @@ extern int shmem_init(void);
 extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
 extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
+extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
+					    unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
 extern void shmem_unlock_mapping(struct address_space *mapping);
diff --git a/mm/shmem.c b/mm/shmem.c
index e43dc55..1c4124e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2913,13 +2913,8 @@ static struct dentry_operations anon_ops = {
 	.d_dname = simple_dname
 };
 
-/**
- * shmem_file_setup - get an unlinked file living in tmpfs
- * @name: name for dentry (to be seen in /proc/<pid>/maps
- * @size: size to be set for the file
- * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
- */
-struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
+static struct file *__shmem_file_setup(const char *name, loff_t size,
+				       unsigned long flags, unsigned int i_flags)
 {
 	struct file *res;
 	struct inode *inode;
@@ -2952,6 +2947,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	if (!inode)
 		goto put_dentry;
 
+	inode->i_flags |= i_flags;
 	d_instantiate(path.dentry, inode);
 	inode->i_size = size;
 	clear_nlink(inode);	/* It is unlinked */
@@ -2972,6 +2968,32 @@ put_memory:
 	shmem_unacct_size(flags, size);
 	return res;
 }
+
+/**
+ * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
+ * 	kernel internal.  There will be NO LSM permission checks against the
+ * 	underlying inode.  So users of this interface must do LSM checks at a
+ * 	higher layer.  The one user is the big_key implementation.  LSM checks
+ * 	are provided at the key level rather than the inode level.
+ * @name: name for dentry (to be seen in /proc/<pid>/maps
+ * @size: size to be set for the file
+ * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
+ */
+struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
+{
+	return __shmem_file_setup(name, size, flags, S_PRIVATE);
+}
+
+/**
+ * shmem_file_setup - get an unlinked file living in tmpfs
+ * @name: name for dentry (to be seen in /proc/<pid>/maps
+ * @size: size to be set for the file
+ * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
+ */
+struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
+{
+	return __shmem_file_setup(name, size, flags, 0);
+}
 EXPORT_SYMBOL_GPL(shmem_file_setup);
 
 /**
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 7f44c32..8137b27 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -70,7 +70,7 @@ int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 		 *
 		 * TODO: Encrypt the stored data with a temporary key.
 		 */
-		file = shmem_file_setup("", datalen, 0);
+		file = shmem_kernel_file_setup("", datalen, 0);
 		if (IS_ERR(file)) {
 			ret = PTR_ERR(file);
 			goto err_quota;
-- 
1.8.3.1