Blob Blame History Raw
From 8f3e2dab9567ee8c4e104f484941a858f353ea02 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Fri, 10 Jul 2015 17:25:53 -0700
Subject: [PATCH 7/7] x86/nmi/64: Use DF to avoid userspace RSP confusing
 nested NMI detection

commit dc68c0f2ec634b2cfecf879235564da58d422cee upstream.

We have a tricky bug in the nested NMI code: if we see RSP pointing
to the NMI stack on NMI entry from kernel mode, we assume that we
are executing a nested NMI.

This isn't quite true.  A malicious userspace program can point RSP
at the NMI stack, issue SYSCALL, and arrange for an NMI to happen
while RSP is still pointing at the NMI stack.

Fix it with a sneaky trick.  Set DF in the region of code that the RSP
check is intended to detect.  IRET will clear DF atomically.

(Note: other than paravirt, there's little need for all this complexity.
 We could check RIP instead of RSP.)

Fixes CVE-2015-3291.

Cc: stable@vger.kernel.org
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
[bwh: Backported to 4.0: adjust filename, context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
 arch/x86/kernel/entry_64.S | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index be2c4ba..ce4431c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1580,7 +1580,14 @@ ENTRY(nmi)
 	/*
 	 * Now test if the previous stack was an NMI stack.  This covers
 	 * the case where we interrupt an outer NMI after it clears
-	 * "NMI executing" but before IRET.
+	 * "NMI executing" but before IRET.  We need to be careful, though:
+	 * there is one case in which RSP could point to the NMI stack
+	 * despite there being no NMI active: naughty userspace controls
+	 * RSP at the very beginning of the SYSCALL targets.  We can
+	 * pull a fast one on naughty userspace, though: we program
+	 * SYSCALL to mask DF, so userspace cannot cause DF to be set
+	 * if it controls the kernel's RSP.  We set DF before we clear
+	 * "NMI executing".
 	 */
 	lea	6*8(%rsp), %rdx
 	/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
@@ -1591,10 +1598,16 @@ ENTRY(nmi)
 	cmpq	%rdx, 4*8(%rsp)
 	/* If it is below the NMI stack, it is a normal NMI */
 	jb	first_nmi
-	/* Ah, it is within the NMI stack, treat it as nested */
+
+	/* Ah, it is within the NMI stack. */
+
+	testb	$(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp)
+	jz	first_nmi	/* RSP was user controlled. */
 
 	CFI_REMEMBER_STATE
 
+	/* This is a nested NMI. */
+
 nested_nmi:
 	/*
 	 * Modify the "iret" frame to point to repeat_nmi, forcing another
@@ -1709,8 +1722,16 @@ nmi_restore:
 	/* Point RSP at the "iret" frame. */
 	REMOVE_PT_GPREGS_FROM_STACK 6*8
 
-	/* Clear "NMI executing". */
-	movq $0, 5*8(%rsp)
+	/*
+	 * Clear "NMI executing".  Set DF first so that we can easily
+	 * distinguish the remaining code between here and IRET from
+	 * the SYSCALL entry and exit paths.  On a native kernel, we
+	 * could just inspect RIP, but, on paravirt kernels,
+	 * INTERRUPT_RETURN can translate into a jump into a
+	 * hypercall page.
+	 */
+	std
+	movq	$0, 5*8(%rsp)		/* clear "NMI executing" */
 
 	/*
 	 * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
-- 
2.4.3