faddeb1
From: Andy Lutomirski <luto@amacapital.net>
faddeb1
Date: Wed, 8 Oct 2014 09:02:13 -0700
faddeb1
Subject: [PATCH] x86,kvm,vmx: Preserve CR4 across VM entry
faddeb1
faddeb1
CR4 isn't constant; at least the TSD and PCE bits can vary.
faddeb1
faddeb1
TBH, treating CR0 and CR3 as constant scares me a bit, too, but it looks
faddeb1
like it's correct.
faddeb1
faddeb1
This adds a branch and a read from cr4 to each vm entry.  Because it is
faddeb1
extremely likely that consecutive entries into the same vcpu will have
faddeb1
the same host cr4 value, this fixes up the vmcs instead of restoring cr4
faddeb1
after the fact.  A subsequent patch will add a kernel-wide cr4 shadow,
faddeb1
reducing the overhead in the common case to just two memory reads and a
faddeb1
branch.
faddeb1
faddeb1
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
faddeb1
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
faddeb1
Cc: stable@vger.kernel.org
faddeb1
Cc: Petr Matousek <pmatouse@redhat.com>
faddeb1
Cc: Gleb Natapov <gleb@kernel.org>
faddeb1
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
faddeb1
---
faddeb1
 arch/x86/kvm/vmx.c | 16 ++++++++++++++--
faddeb1
 1 file changed, 14 insertions(+), 2 deletions(-)
faddeb1
faddeb1
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
faddeb1
index bfe11cf124a1..6a118fa378b5 100644
faddeb1
--- a/arch/x86/kvm/vmx.c
faddeb1
+++ b/arch/x86/kvm/vmx.c
faddeb1
@@ -453,6 +453,7 @@ struct vcpu_vmx {
faddeb1
 		int           gs_ldt_reload_needed;
faddeb1
 		int           fs_reload_needed;
faddeb1
 		u64           msr_host_bndcfgs;
faddeb1
+		unsigned long vmcs_host_cr4;	/* May not match real cr4 */
faddeb1
 	} host_state;
faddeb1
 	struct {
faddeb1
 		int vm86_active;
faddeb1
@@ -4235,11 +4236,16 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
faddeb1
 	u32 low32, high32;
faddeb1
 	unsigned long tmpl;
faddeb1
 	struct desc_ptr dt;
faddeb1
+	unsigned long cr4;
faddeb1
 
faddeb1
 	vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS);  /* 22.2.3 */
faddeb1
-	vmcs_writel(HOST_CR4, read_cr4());  /* 22.2.3, 22.2.5 */
faddeb1
 	vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
faddeb1
 
faddeb1
+	/* Save the most likely value for this task's CR4 in the VMCS. */
faddeb1
+	cr4 = read_cr4();
faddeb1
+	vmcs_writel(HOST_CR4, cr4);			/* 22.2.3, 22.2.5 */
faddeb1
+	vmx->host_state.vmcs_host_cr4 = cr4;
faddeb1
+
faddeb1
 	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
faddeb1
 #ifdef CONFIG_X86_64
faddeb1
 	/*
faddeb1
@@ -7376,7 +7382,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
faddeb1
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
faddeb1
 {
faddeb1
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
faddeb1
-	unsigned long debugctlmsr;
faddeb1
+	unsigned long debugctlmsr, cr4;
faddeb1
 
faddeb1
 	/* Record the guest's net vcpu time for enforced NMI injections. */
faddeb1
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
faddeb1
@@ -7397,6 +7403,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
faddeb1
 	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
faddeb1
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
faddeb1
 
faddeb1
+	cr4 = read_cr4();
faddeb1
+	if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
faddeb1
+		vmcs_writel(HOST_CR4, cr4);
faddeb1
+		vmx->host_state.vmcs_host_cr4 = cr4;
faddeb1
+	}
faddeb1
+
faddeb1
 	/* When single-stepping over STI and MOV SS, we must clear the
faddeb1
 	 * corresponding interruptibility bits in the guest state. Otherwise
faddeb1
 	 * vmentry fails as it then expects bit 14 (BS) in pending debug
faddeb1
-- 
faddeb1
1.9.3
faddeb1