Blob Blame History Raw
From 946dd2eefae2faeecbeb9662e66935c8070f64f5 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Tue, 16 Jan 2018 14:23:33 +0000
Subject: [PATCH 01/45] xen/arm: Introduce enable callback to enable a
 capabilities on each online CPU

Once Xen knows what features/workarounds present on the platform, it
might be necessary to configure each online CPU.

Introduce a new callback "enable" that will be called on each online CPU to
configure the "capability".

The code is based on Linux v4.14 (where cpufeature.c comes from), the
explanation of why using stop_machine_run is kept as we have similar
problem in the future.

Lastly introduce enable_errata_workaround that will be called once CPUs
have booted and before the hardware domain is created.

This is part of XSA-254.

Signed-of-by: Julien Grall <julien.grall@linaro.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
(cherry picked from commit 7500495155aacce437878cb576f45224ae984f40)

Conflicts:
	xen/include/asm-arm/cpufeature.h

(cherry picked from commit 2ec7ccbffc6b788f65e55498e4347c1ee3a44b01)
---
 xen/arch/arm/cpuerrata.c         |  6 ++++++
 xen/arch/arm/cpufeature.c        | 29 +++++++++++++++++++++++++++++
 xen/arch/arm/setup.c             |  1 +
 xen/include/asm-arm/cpuerrata.h  |  1 +
 xen/include/asm-arm/cpufeature.h |  3 +++
 5 files changed, 40 insertions(+)

diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index a3e8ddabb0..d422fd832b 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -65,6 +65,12 @@ void check_local_cpu_errata(void)
 {
     update_cpu_capabilities(arm_errata, "enabled workaround for");
 }
+
+void __init enable_errata_workarounds(void)
+{
+    enable_cpu_capabilities(arm_errata);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/cpufeature.c b/xen/arch/arm/cpufeature.c
index 088625bbac..c1cd425067 100644
--- a/xen/arch/arm/cpufeature.c
+++ b/xen/arch/arm/cpufeature.c
@@ -20,6 +20,7 @@
 #include <xen/types.h>
 #include <xen/init.h>
 #include <xen/smp.h>
+#include <xen/stop_machine.h>
 #include <asm/cpufeature.h>
 
 DECLARE_BITMAP(cpu_hwcaps, ARM_NCAPS);
@@ -40,6 +41,34 @@ void update_cpu_capabilities(const struct arm_cpu_capabilities *caps,
     }
 }
 
+/*
+ * Run through the enabled capabilities and enable() it on all active
+ * CPUs.
+ */
+void __init enable_cpu_capabilities(const struct arm_cpu_capabilities *caps)
+{
+    for ( ; caps->matches; caps++ )
+    {
+        if ( !cpus_have_cap(caps->capability) )
+            continue;
+
+        if ( caps->enable )
+        {
+            int ret;
+
+            /*
+             * Use stop_machine_run() as it schedules the work allowing
+             * us to modify PSTATE, instead of on_each_cpu() which uses
+             * an IPI, giving us a PSTATE that disappears when we
+             * return.
+             */
+            ret = stop_machine_run(caps->enable, (void *)caps, NR_CPUS);
+            /* stop_machine_run should never fail at this stage of the boot. */
+            BUG_ON(ret);
+        }
+    }
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index f4e9605bdc..6116caa7ec 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -850,6 +850,7 @@ void __init start_xen(unsigned long boot_phys_offset,
      * stop_machine (tasklets initialized via an initcall).
      */
     apply_alternatives_all();
+    enable_errata_workarounds();
 
     /* Create initial domain 0. */
     /* The vGIC for DOM0 is exactly emulating the hardware GIC */
diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h
index 8c57c6a7cc..24bb958839 100644
--- a/xen/include/asm-arm/cpuerrata.h
+++ b/xen/include/asm-arm/cpuerrata.h
@@ -6,6 +6,7 @@
 #include <asm/alternative.h>
 
 void check_local_cpu_errata(void);
+void enable_errata_workarounds(void);
 
 #ifdef CONFIG_HAS_ALTERNATIVE
 
diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
index c0a25ae13e..45d57cbec9 100644
--- a/xen/include/asm-arm/cpufeature.h
+++ b/xen/include/asm-arm/cpufeature.h
@@ -72,6 +72,7 @@ struct arm_cpu_capabilities {
     const char *desc;
     u16 capability;
     bool_t (*matches)(const struct arm_cpu_capabilities *);
+    int (*enable)(void *); /* Called on every active CPUs */
     union {
         struct {    /* To be used for eratum handling only */
             u32 midr_model;
@@ -83,6 +84,8 @@ struct arm_cpu_capabilities {
 void update_cpu_capabilities(const struct arm_cpu_capabilities *caps,
                              const char *info);
 
+void enable_cpu_capabilities(const struct arm_cpu_capabilities *caps);
+
 #endif /* __ASSEMBLY__ */
 
 #endif
-- 
2.14.3


From 85990bf53addcdb0ce8e458a3d8fad199710ac59 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Tue, 16 Jan 2018 14:23:34 +0000
Subject: [PATCH 02/45] xen/arm64: Add missing MIDR values for Cortex-A72, A73
 and A75

Cortex-A72, A73 and A75 MIDR will be used to a follow-up for hardening
the branch predictor.

This is part of XSA-254.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Acked-by: Stefano Stabellini <sstabellini@kernel.org>
(cherry picked from commit 7975bff524c4e2c30efbf144de753f151d974e53)
(cherry picked from commit 50450c1f33dc72f2138a671d738934f796be3318)
---
 xen/include/asm-arm/processor.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index 416388425d..2c9c193755 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -49,10 +49,16 @@
 #define ARM_CPU_PART_CORTEX_A15     0xC0F
 #define ARM_CPU_PART_CORTEX_A53     0xD03
 #define ARM_CPU_PART_CORTEX_A57     0xD07
+#define ARM_CPU_PART_CORTEX_A72     0xD08
+#define ARM_CPU_PART_CORTEX_A73     0xD09
+#define ARM_CPU_PART_CORTEX_A75     0xD0A
 
 #define MIDR_CORTEX_A15 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A15)
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
+#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
+#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)
 
 /* MPIDR Multiprocessor Affinity Register */
 #define _MPIDR_UP           (30)
-- 
2.14.3


From cf0b584c8c5030588bc47a3614ad860af7482c53 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Tue, 16 Jan 2018 14:23:35 +0000
Subject: [PATCH 03/45] xen/arm: cpuerrata: Add MIDR_ALL_VERSIONS

Introduce a new macro MIDR_ALL_VERSIONS to match all variant/revision of a
given CPU model.

This is part of XSA-254.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
(cherry picked from commit ba73070af43a38d200413f446d6a718e108867b6)

Conflicts:
	xen/arch/arm/cpuerrata.c

(cherry picked from commit 3790833ef16b95653424ec9b145e460ec1a56d16)
---
 xen/arch/arm/cpuerrata.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index d422fd832b..5dad0fe264 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -8,6 +8,12 @@
     .midr_range_min = min,              \
     .midr_range_max = max
 
+#define MIDR_ALL_VERSIONS(model)        \
+    .matches = is_affected_midr_range,  \
+    .midr_model = model,                \
+    .midr_range_min = 0,                \
+    .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK)
+
 static bool_t __maybe_unused
 is_affected_midr_range(const struct arm_cpu_capabilities *entry)
 {
-- 
2.14.3


From 44139fed7c794eb4e47a9bb93061e325bd57fe8c Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Tue, 16 Jan 2018 14:23:36 +0000
Subject: [PATCH 04/45] xen/arm64: Add skeleton to harden the branch predictor
 aliasing attacks

Aliasing attacked against CPU branch predictors can allow an attacker to
redirect speculative control flow on some CPUs and potentially divulge
information from one context to another.

This patch adds initial skeleton code behind a new Kconfig option to
enable implementation-specific mitigations against these attacks for
CPUs that are affected.

Most of the mitigations will have to be applied when entering to the
hypervisor from the guest context. For safety, it is applied at every
exception entry. So there are potential for optimizing when receiving
an exception at the same level.

Because the attack is against branch predictor, it is not possible to
safely use branch instruction before the mitigation is applied.
Therefore, this has to be done in the vector entry before jump to the
helper handling a given exception.

On Arm64, each vector can hold 32 instructions. This leave us 31
instructions for the mitigation. The last one is the branch instruction
to the helper.

Because a platform may have CPUs with different micro-architectures,
per-CPU vector table needs to be provided. Realistically, only a few
different mitigations will be necessary. So provide a small set of
vector tables. They will be re-used and patch with the mitigations
on-demand.

This is based on the work done in Linux (see [1]).

This is part of XSA-254.

[1] git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
branch ktpi

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
(cherry picked from commit 4c4fddc166cf528aca49540bcc9ee4f196b01dac)
(cherry picked from commit fba48eff18c02d716c95b92df804a755620be82e)

Conflicts:
	xen/arch/arm/cpuerrata.c
	xen/include/asm-arm/cpuerrata.h
	xen/include/asm-arm/cpufeature.h
---
 xen/arch/arm/Kconfig             |  20 ++++++
 xen/arch/arm/arm64/Makefile      |   1 +
 xen/arch/arm/arm64/bpi.S         |  64 ++++++++++++++++++
 xen/arch/arm/cpuerrata.c         | 139 +++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/traps.c             |   5 +-
 xen/include/asm-arm/cpufeature.h |   3 +-
 xen/include/asm-arm/processor.h  |   5 +-
 7 files changed, 233 insertions(+), 4 deletions(-)
 create mode 100644 xen/arch/arm/arm64/bpi.S

diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index 2e023d11f4..a71f9c0c73 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -159,6 +159,26 @@ config ARM64_ERRATUM_834220
 
 endmenu
 
+config HARDEN_BRANCH_PREDICTOR
+	bool "Harden the branch predictor against aliasing attacks" if EXPERT
+	default y
+	help
+	  Speculation attacks against some high-performance processors rely on
+	  being able to manipulate the branch predictor for a victim context by
+	  executing aliasing branches in the attacker context.  Such attacks
+	  can be partially mitigated against by clearing internal branch
+	  predictor state and limiting the prediction logic in some situations.
+
+	  This config option will take CPU-specific actions to harden the
+	  branch predictor against aliasing attacks and may rely on specific
+	  instruction sequences or control bits being set by the system
+	  firmware.
+
+	  If unsure, say Y.
+
+config ARM64_HARDEN_BRANCH_PREDICTOR
+    def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR
+
 source "common/Kconfig"
 
 source "drivers/Kconfig"
diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile
index 149b6b3901..aebe5c2a42 100644
--- a/xen/arch/arm/arm64/Makefile
+++ b/xen/arch/arm/arm64/Makefile
@@ -1,6 +1,7 @@
 subdir-y += lib
 
 obj-y += cache.o
+obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o
 obj-$(EARLY_PRINTK) += debug.o
 obj-y += domctl.o
 obj-y += domain.o
diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S
new file mode 100644
index 0000000000..6cc2f17529
--- /dev/null
+++ b/xen/arch/arm/arm64/bpi.S
@@ -0,0 +1,64 @@
+/*
+ * Contains CPU specific branch predictor invalidation sequences
+ *
+ * Copyright (C) 2018 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+.macro ventry target
+    .rept 31
+    nop
+    .endr
+    b	\target
+.endm
+
+.macro vectors target
+    ventry \target + 0x000
+    ventry \target + 0x080
+    ventry \target + 0x100
+    ventry \target + 0x180
+
+    ventry \target + 0x200
+    ventry \target + 0x280
+    ventry \target + 0x300
+    ventry \target + 0x380
+
+    ventry \target + 0x400
+    ventry \target + 0x480
+    ventry \target + 0x500
+    ventry \target + 0x580
+
+    ventry \target + 0x600
+    ventry \target + 0x680
+    ventry \target + 0x700
+    ventry \target + 0x780
+.endm
+
+/*
+ * Populate 4 vector tables. This will cover up to 4 different
+ * micro-architectures in a system.
+ */
+    .align	11
+ENTRY(__bp_harden_hyp_vecs_start)
+    .rept 4
+    vectors hyp_traps_vector
+    .endr
+ENTRY(__bp_harden_hyp_vecs_end)
+
+/*
+ * Local variables:
+ * mode: ASM
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index 5dad0fe264..d0693a2ec6 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -1,7 +1,146 @@
 #include <xen/config.h>
+#include <xen/cpumask.h>
+#include <xen/mm.h>
+#include <xen/sizes.h>
+#include <xen/smp.h>
+#include <xen/spinlock.h>
+#include <xen/vmap.h>
+#include <xen/warning.h>
 #include <asm/cpufeature.h>
 #include <asm/cpuerrata.h>
 
+/* Hardening Branch predictor code for Arm64 */
+#ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
+
+#define VECTOR_TABLE_SIZE SZ_2K
+
+/*
+ * Number of available table vectors (this should be in-sync with
+ * arch/arm64/bpi.S
+ */
+#define NR_BPI_HYP_VECS 4
+
+extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[];
+
+/*
+ * Key for each slot. This is used to find whether a specific workaround
+ * had a slot assigned.
+ *
+ * The key is virtual address of the vector workaround
+ */
+static uintptr_t bp_harden_slot_key[NR_BPI_HYP_VECS];
+
+/*
+ * [hyp_vec_start, hyp_vec_end[ corresponds to the first 31 instructions
+ * of each vector. The last (i.e 32th) instruction is used to branch to
+ * the original entry.
+ *
+ * Those instructions will be copied on each vector to harden them.
+ */
+static bool copy_hyp_vect_bpi(unsigned int slot, const char *hyp_vec_start,
+                              const char *hyp_vec_end)
+{
+    void *dst_remapped;
+    const void *dst = __bp_harden_hyp_vecs_start + slot * VECTOR_TABLE_SIZE;
+    unsigned int i;
+    mfn_t dst_mfn = _mfn(virt_to_mfn(dst));
+
+    BUG_ON(((hyp_vec_end - hyp_vec_start) / 4) > 31);
+
+    /*
+     * Vectors are part of the text that are mapped read-only. So re-map
+     * the vector table to be able to update vectors.
+     */
+    dst_remapped = __vmap(&dst_mfn,
+                          1UL << get_order_from_bytes(VECTOR_TABLE_SIZE),
+                          1, 1, PAGE_HYPERVISOR, VMAP_DEFAULT);
+    if ( !dst_remapped )
+        return false;
+
+    dst_remapped += (vaddr_t)dst & ~PAGE_MASK;
+
+    for ( i = 0; i < VECTOR_TABLE_SIZE; i += 0x80 )
+    {
+        memcpy(dst_remapped + i, hyp_vec_start, hyp_vec_end - hyp_vec_start);
+    }
+
+    clean_dcache_va_range(dst_remapped, VECTOR_TABLE_SIZE);
+    invalidate_icache();
+
+    vunmap(dst_remapped);
+
+    return true;
+}
+
+static bool __maybe_unused
+install_bp_hardening_vec(const struct arm_cpu_capabilities *entry,
+                         const char *hyp_vec_start,
+                         const char *hyp_vec_end)
+{
+    static int last_slot = -1;
+    static DEFINE_SPINLOCK(bp_lock);
+    unsigned int i, slot = -1;
+    bool ret = true;
+
+    /*
+     * Enable callbacks are called on every CPU based on the
+     * capabilities. So double-check whether the CPU matches the
+     * entry.
+     */
+    if ( !entry->matches(entry) )
+        return true;
+
+    /*
+     * No need to install hardened vector when the processor has
+     * ID_AA64PRF0_EL1.CSV2 set.
+     */
+    if ( cpu_data[smp_processor_id()].pfr64.csv2 )
+        return true;
+
+    spin_lock(&bp_lock);
+
+    /*
+     * Look up whether the hardening vector had a slot already
+     * assigned.
+     */
+    for ( i = 0; i < 4; i++ )
+    {
+        if ( bp_harden_slot_key[i] == (uintptr_t)hyp_vec_start )
+        {
+            slot = i;
+            break;
+        }
+    }
+
+    if ( slot == -1 )
+    {
+        last_slot++;
+        /* Check we don't overrun the number of slots available. */
+        BUG_ON(NR_BPI_HYP_VECS <= last_slot);
+
+        slot = last_slot;
+        ret = copy_hyp_vect_bpi(slot, hyp_vec_start, hyp_vec_end);
+
+        /* Only update the slot if the copy succeeded. */
+        if ( ret )
+            bp_harden_slot_key[slot] = (uintptr_t)hyp_vec_start;
+    }
+
+    if ( ret )
+    {
+        /* Install the new vector table. */
+        WRITE_SYSREG((vaddr_t)(__bp_harden_hyp_vecs_start + slot * VECTOR_TABLE_SIZE),
+                     VBAR_EL2);
+        isb();
+    }
+
+    spin_unlock(&bp_lock);
+
+    return ret;
+}
+
+#endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */
+
 #define MIDR_RANGE(model, min, max)     \
     .matches = is_affected_midr_range,  \
     .midr_model = model,                \
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index bc4cbbb35f..d578a5c598 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -136,7 +136,10 @@ presmp_initcall(vwfi_init);
 
 void init_traps(void)
 {
-    /* Setup Hyp vector base */
+    /*
+     * Setup Hyp vector base. Note they might get updated with the
+     * branch predictor hardening.
+     */
     WRITE_SYSREG((vaddr_t)hyp_traps_vector, VBAR_EL2);
 
     /* Trap Debug and Performance Monitor accesses */
diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
index 45d57cbec9..e26584fb2d 100644
--- a/xen/include/asm-arm/cpufeature.h
+++ b/xen/include/asm-arm/cpufeature.h
@@ -40,8 +40,9 @@
 #define ARM32_WORKAROUND_766422 2
 #define ARM64_WORKAROUND_834220 3
 #define LIVEPATCH_FEATURE   4
+#define ARM_HARDEN_BRANCH_PREDICTOR 5
 
-#define ARM_NCAPS           5
+#define ARM_NCAPS           6
 
 #ifndef __ASSEMBLY__
 
diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index 2c9c193755..51ecbd203a 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -318,8 +318,9 @@ struct cpuinfo_arm {
             unsigned long fp:4;   /* Floating Point */
             unsigned long simd:4; /* Advanced SIMD */
             unsigned long gic:4;  /* GIC support */
-            unsigned long __res0:4;
-            unsigned long __res1;
+            unsigned long __res0:28;
+            unsigned long csv2:4;
+            unsigned long __res1:4;
         };
     } pfr64;
 
-- 
2.14.3


From 6f6786ef0d7f7025860d360f6b1267193ffd1b27 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Tue, 16 Jan 2018 14:23:37 +0000
Subject: [PATCH 05/45] xen/arm64: Implement branch predictor hardening for
 affected Cortex-A CPUs

Cortex-A57, A72, A73 and A75 are susceptible to branch predictor
aliasing and can theoritically be attacked by malicious code.

This patch implements a PSCI-based mitigation for these CPUs when
available. The call into firmware will invalidate the branch predictor
state, preventing any malicious entries from affection other victim
contexts.

Ported from Linux git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
branch kpti.

 Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
 Signed-off-by: Will Deacon <will.deacon@arm.com>

This is part of XSA-254.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
(cherry picked from commit e730f8e41e8537f1db9770b9464f9523c28857b9)
(cherry picked from commit 9f79e8d846e8413c828f5fc7cc6ac733728dff00)

Conflicts:
	xen/arch/arm/cpuerrata.c
---
 xen/arch/arm/arm64/bpi.S | 25 +++++++++++++++++++++++++
 xen/arch/arm/cpuerrata.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S
index 6cc2f17529..4b7f1dc21f 100644
--- a/xen/arch/arm/arm64/bpi.S
+++ b/xen/arch/arm/arm64/bpi.S
@@ -56,6 +56,31 @@ ENTRY(__bp_harden_hyp_vecs_start)
     .endr
 ENTRY(__bp_harden_hyp_vecs_end)
 
+ENTRY(__psci_hyp_bp_inval_start)
+    sub     sp, sp, #(8 * 18)
+    stp     x16, x17, [sp, #(16 * 0)]
+    stp     x14, x15, [sp, #(16 * 1)]
+    stp     x12, x13, [sp, #(16 * 2)]
+    stp     x10, x11, [sp, #(16 * 3)]
+    stp     x8, x9, [sp, #(16 * 4)]
+    stp     x6, x7, [sp, #(16 * 5)]
+    stp     x4, x5, [sp, #(16 * 6)]
+    stp     x2, x3, [sp, #(16 * 7)]
+    stp     x0, x1, [sp, #(16 * 8)]
+    mov     x0, #0x84000000
+    smc     #0
+    ldp     x16, x17, [sp, #(16 * 0)]
+    ldp     x14, x15, [sp, #(16 * 1)]
+    ldp     x12, x13, [sp, #(16 * 2)]
+    ldp     x10, x11, [sp, #(16 * 3)]
+    ldp     x8, x9, [sp, #(16 * 4)]
+    ldp     x6, x7, [sp, #(16 * 5)]
+    ldp     x4, x5, [sp, #(16 * 6)]
+    ldp     x2, x3, [sp, #(16 * 7)]
+    ldp     x0, x1, [sp, #(16 * 8)]
+    add     sp, sp, #(8 * 18)
+ENTRY(__psci_hyp_bp_inval_end)
+
 /*
  * Local variables:
  * mode: ASM
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index d0693a2ec6..1b49f84f60 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -8,6 +8,7 @@
 #include <xen/warning.h>
 #include <asm/cpufeature.h>
 #include <asm/cpuerrata.h>
+#include <asm/psci.h>
 
 /* Hardening Branch predictor code for Arm64 */
 #ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
@@ -139,6 +140,31 @@ install_bp_hardening_vec(const struct arm_cpu_capabilities *entry,
     return ret;
 }
 
+extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[];
+
+static int enable_psci_bp_hardening(void *data)
+{
+    bool ret = true;
+    static bool warned = false;
+
+    /*
+     * The mitigation is using PSCI version function to invalidate the
+     * branch predictor. This function is only available with PSCI 0.2
+     * and later.
+     */
+    if ( psci_ver >= PSCI_VERSION(0, 2) )
+        ret = install_bp_hardening_vec(data, __psci_hyp_bp_inval_start,
+                                       __psci_hyp_bp_inval_end);
+    else if ( !warned )
+    {
+        ASSERT(system_state < SYS_STATE_active);
+        warning_add("PSCI 0.2 or later is required for the branch predictor hardening.\n");
+        warned = true;
+    }
+
+    return !ret;
+}
+
 #endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */
 
 #define MIDR_RANGE(model, min, max)     \
@@ -202,6 +228,28 @@ static const struct arm_cpu_capabilities arm_errata[] = {
         MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
                    (1 << MIDR_VARIANT_SHIFT) | 2),
     },
+#endif
+#ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
+    {
+        .capability = ARM_HARDEN_BRANCH_PREDICTOR,
+        MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+        .enable = enable_psci_bp_hardening,
+    },
+    {
+        .capability = ARM_HARDEN_BRANCH_PREDICTOR,
+        MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+        .enable = enable_psci_bp_hardening,
+    },
+    {
+        .capability = ARM_HARDEN_BRANCH_PREDICTOR,
+        MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+        .enable = enable_psci_bp_hardening,
+    },
+    {
+        .capability = ARM_HARDEN_BRANCH_PREDICTOR,
+        MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
+        .enable = enable_psci_bp_hardening,
+    },
 #endif
     {},
 };
-- 
2.14.3


From 6082e3ba8941b3d10c3cb73f445759c19e89afc9 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Wed, 7 Feb 2018 08:52:44 -0800
Subject: [PATCH 06/45] xen/arm32: entry: Consolidate DEFINE_TRAP_ENTRY_*
 macros

The only difference between all the DEFINE_TRAP_ENTRY_* macros  are the
interrupts (Asynchronous Abort, IRQ, FIQ) unmasked.

Rather than duplicating the code, introduce __DEFINE_TRAP_ENTRY macro
that will take the list of interrupts to unmask.

This is part of XSA-254.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
(cherry picked from commit 3bd8fd751e50dd981b7055fb33cdc8aa29537673)
---
 xen/arch/arm/arm32/entry.S | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
index 090f049470..e1e60c278c 100644
--- a/xen/arch/arm/arm32/entry.S
+++ b/xen/arch/arm/arm32/entry.S
@@ -102,26 +102,26 @@ abort_guest_exit_end:
 
         mov pc, lr
 
-#define DEFINE_TRAP_ENTRY(trap)                                         \
+/*
+ * Macro to define trap entry. The iflags corresponds to the list of
+ * interrupts (Asynchronous Abort, IRQ, FIQ) to unmask.
+ */
+#define __DEFINE_TRAP_ENTRY(trap, iflags)                               \
         ALIGN;                                                          \
 trap_##trap:                                                            \
         SAVE_ALL;                                                       \
-        cpsie i;        /* local_irq_enable */                          \
+        cpsie iflags;                                                   \
         adr lr, return_from_trap;                                       \
         mov r0, sp;                                                     \
         mov r11, sp;                                                    \
         bic sp, #7; /* Align the stack pointer (noop on guest trap) */  \
         b do_trap_##trap
 
-#define DEFINE_TRAP_ENTRY_NOIRQ(trap)                                   \
-        ALIGN;                                                          \
-trap_##trap:                                                            \
-        SAVE_ALL;                                                       \
-        adr lr, return_from_trap;                                       \
-        mov r0, sp;                                                     \
-        mov r11, sp;                                                    \
-        bic sp, #7; /* Align the stack pointer (noop on guest trap) */  \
-        b do_trap_##trap
+/* Trap handler which unmask IRQ/Abort, keep FIQ masked */
+#define DEFINE_TRAP_ENTRY(trap) __DEFINE_TRAP_ENTRY(trap, ai)
+
+/* Trap handler which unmask Abort, keep IRQ/FIQ masked */
+#define DEFINE_TRAP_ENTRY_NOIRQ(trap) __DEFINE_TRAP_ENTRY(trap, a)
 
         .align 5
 GLOBAL(hyp_traps_vector)
-- 
2.14.3


From a586cbd9f0cbb3835de1f8ab4d9a105e08b2ac5a Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Fri, 2 Feb 2018 14:19:20 +0000
Subject: [PATCH 07/45] xen/arm32: Add missing MIDR values for Cortex-A17 and
 A12

Cortex-A17 and A12 MIDR will be used in a follow-up patch for hardening
the branch predictor.

This is part of XSA-254.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
(cherry picked from commit 340367bca5360f3e3d263341b58234d0efe5ced2)
---
 xen/include/asm-arm/processor.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index 51ecbd203a..d3882e0509 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -46,6 +46,8 @@
 
 #define ARM_CPU_IMP_ARM             0x41
 
+#define ARM_CPU_PART_CORTEX_A12     0xC0D
+#define ARM_CPU_PART_CORTEX_A17     0xC0E
 #define ARM_CPU_PART_CORTEX_A15     0xC0F
 #define ARM_CPU_PART_CORTEX_A53     0xD03
 #define ARM_CPU_PART_CORTEX_A57     0xD07
@@ -53,6 +55,8 @@
 #define ARM_CPU_PART_CORTEX_A73     0xD09
 #define ARM_CPU_PART_CORTEX_A75     0xD0A
 
+#define MIDR_CORTEX_A12 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A12)
+#define MIDR_CORTEX_A17 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A17)
 #define MIDR_CORTEX_A15 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A15)
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
-- 
2.14.3


From cf95bba7b7406ef1929ea4c6c36388ed43b4f9bb Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Fri, 2 Feb 2018 14:19:21 +0000
Subject: [PATCH 08/45] xen/arm32: entry: Add missing trap_reset entry

At the moment, the reset vector is defined as .word 0 (e.g andeq r0, r0,
r0).

This is rather unintuitive and will result to execute the trap
undefined. Instead introduce trap helpers for reset and will generate an
error message in the unlikely case that reset will be called.

This is part of XSA-254.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
(cherry picked from commit 00268cc91270c7b0aa3a1906bf7e7702db9c61c1)

Conflicts:
	xen/arch/arm/arm32/traps.c
---
 xen/arch/arm/arm32/entry.S | 3 ++-
 xen/arch/arm/arm32/traps.c | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
index e1e60c278c..c5fec60e44 100644
--- a/xen/arch/arm/arm32/entry.S
+++ b/xen/arch/arm/arm32/entry.S
@@ -125,7 +125,7 @@ trap_##trap:                                                            \
 
         .align 5
 GLOBAL(hyp_traps_vector)
-        .word 0                         /* 0x00 - Reset */
+        b trap_reset                    /* 0x00 - Reset */
         b trap_undefined_instruction    /* 0x04 - Undefined Instruction */
         b trap_supervisor_call          /* 0x08 - Supervisor Call */
         b trap_prefetch_abort           /* 0x0c - Prefetch Abort */
@@ -134,6 +134,7 @@ GLOBAL(hyp_traps_vector)
         b trap_irq                      /* 0x18 - IRQ */
         b trap_fiq                      /* 0x1c - FIQ */
 
+DEFINE_TRAP_ENTRY(reset)
 DEFINE_TRAP_ENTRY(undefined_instruction)
 DEFINE_TRAP_ENTRY(supervisor_call)
 DEFINE_TRAP_ENTRY(prefetch_abort)
diff --git a/xen/arch/arm/arm32/traps.c b/xen/arch/arm/arm32/traps.c
index 7bfc5ffe9d..cd38dfaaf3 100644
--- a/xen/arch/arm/arm32/traps.c
+++ b/xen/arch/arm/arm32/traps.c
@@ -24,6 +24,11 @@
 
 #include <asm/processor.h>
 
+asmlinkage void do_trap_reset(struct cpu_user_regs *regs)
+{
+    do_unexpected_trap("Reset", regs);
+}
+
 asmlinkage void do_trap_undefined_instruction(struct cpu_user_regs *regs)
 {
     uint32_t pc = regs->pc;
-- 
2.14.3


From 7336d0d2a719d6135b8d02801401e449b0dbbfb6 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Fri, 2 Feb 2018 14:19:22 +0000
Subject: [PATCH 09/45] xen/arm32: Add skeleton to harden branch predictor
 aliasing attacks

Aliasing attacked against CPU branch predictors can allow an attacker to
redirect speculative control flow on some CPUs and potentially divulge
information from one context to another.

This patch adds initiatial skeleton code behind a new Kconfig option
to enable implementation-specific mitigations against these attacks
for CPUs that are affected.

Most of mitigations will have to be applied when entering to the
hypervisor from the guest context.

Because the attack is against branch predictor, it is not possible to
safely use branch instruction before the mitigation is applied.
Therefore this has to be done in the vector entry before jump to the
helper handling a given exception.

However, on arm32, each vector contain a single instruction. This means
that the hardened vector tables may rely on the state of registers that
does not hold when in the hypervisor (e.g SP is 8 bytes aligned).
Therefore hypervisor code running with guest vectors table should be
minimized and always have IRQs and SErrors masked to reduce the risk to
use them.

This patch provides an infrastructure to switch vector tables before
entering to the guest and when leaving it.

Note that alternative could have been used, but older Xen (4.8 or
earlier) doesn't have support. So avoid using alternative to ease
backporting.

This is part of XSA-254.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
(cherry picked from commit 9bd4463b5c7cc026a07b9bbd41a6a7122a95647e)
---
 xen/arch/arm/Kconfig       |  3 +++
 xen/arch/arm/arm32/entry.S | 41 ++++++++++++++++++++++++++++++++++++++++-
 xen/arch/arm/cpuerrata.c   | 30 ++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index a71f9c0c73..686c34452d 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -179,6 +179,9 @@ config HARDEN_BRANCH_PREDICTOR
 config ARM64_HARDEN_BRANCH_PREDICTOR
     def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR
 
+config ARM32_HARDEN_BRANCH_PREDICTOR
+    def_bool y if ARM_32 && HARDEN_BRANCH_PREDICTOR
+
 source "common/Kconfig"
 
 source "drivers/Kconfig"
diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
index c5fec60e44..97cfa4cd27 100644
--- a/xen/arch/arm/arm32/entry.S
+++ b/xen/arch/arm/arm32/entry.S
@@ -34,6 +34,20 @@
         blne save_guest_regs
 
 save_guest_regs:
+#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
+        /*
+         * Restore vectors table to the default as it may have been
+         * changed when returning to the guest (see
+         * return_to_hypervisor). We need to do that early (e.g before
+         * any interrupts are unmasked) because hardened vectors requires
+         * SP to be 8 bytes aligned. This does not hold when running in
+         * the hypervisor.
+         */
+        ldr r1, =hyp_traps_vector
+        mcr p15, 4, r1, c12, c0, 0
+        isb
+#endif
+
         ldr r11, =0xffffffff  /* Clobber SP which is only valid for hypervisor frames. */
         str r11, [sp, #UREGS_sp]
         SAVE_ONE_BANKED(SP_usr)
@@ -167,12 +181,37 @@ return_to_guest:
         RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
         /* Fall thru */
 return_to_hypervisor:
-        cpsid i
+        cpsid ai
         ldr lr, [sp, #UREGS_lr]
         ldr r11, [sp, #UREGS_pc]
         msr ELR_hyp, r11
         ldr r11, [sp, #UREGS_cpsr]
         msr SPSR_hyp, r11
+#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
+        /*
+         * Hardening branch predictor may require to setup a different
+         * vector tables before returning to the guests. Those vectors
+         * may rely on the state of registers that does not hold when
+         * running in the hypervisor (e.g SP is 8 bytes aligned). So setup
+         * HVBAR very late.
+         *
+         * Default vectors table will be restored on exit (see
+         * save_guest_regs).
+         */
+        mov r9, #0                      /* vector tables = NULL */
+        /*
+         * Load vector tables pointer from the per-cpu bp_harden_vecs
+         * when returning to the guest only.
+         */
+        and r11, #PSR_MODE_MASK
+        cmp r11, #PSR_MODE_HYP
+        ldrne r11, =per_cpu__bp_harden_vecs
+        mrcne p15, 4, r10, c13, c0, 2   /* r10 = per-cpu offset (HTPIDR) */
+        addne r11, r11, r10             /* r11 = offset of the vector tables */
+        ldrne r9, [r11]                 /* r9  = vector tables */
+        cmp r9, #0                      /* Only update HVBAR when the vector */
+        mcrne p15, 4, r9, c12, c0, 0    /* tables is not NULL. */
+#endif
         pop {r0-r12}
         add sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */
         clrex
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index 1b49f84f60..1e3713524e 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -167,6 +167,36 @@ static int enable_psci_bp_hardening(void *data)
 
 #endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */
 
+/* Hardening Branch predictor code for Arm32 */
+#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
+
+/*
+ * Per-CPU vector tables to use when returning to the guests. They will
+ * only be used on platform requiring to harden the branch predictor.
+ */
+DEFINE_PER_CPU_READ_MOSTLY(const char *, bp_harden_vecs);
+
+extern char hyp_traps_vector_bp_inv[];
+
+static void __maybe_unused
+install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry,
+                          const char *hyp_vecs, const char *desc)
+{
+    /*
+     * Enable callbacks are called on every CPU based on the
+     * capabilities. So double-check whether the CPU matches the
+     * entry.
+     */
+    if ( !entry->matches(entry) )
+        return;
+
+    printk(XENLOG_INFO "CPU%u will %s on guest exit\n",
+           smp_processor_id(), desc);
+    this_cpu(bp_harden_vecs) = hyp_vecs;
+}
+
+#endif
+
 #define MIDR_RANGE(model, min, max)     \
     .matches = is_affected_midr_range,  \
     .midr_model = model,                \
-- 
2.14.3


From 754345c01933f1eed3d1601fa8fdbf62f52c9d80 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Fri, 2 Feb 2018 14:19:23 +0000
Subject: [PATCH 10/45] xen/arm32: Invalidate BTB on guest exit for Cortex A17
 and 12

In order to avoid aliasing attackes agains the branch predictor, let's
invalidate the BTB on guest exist. This is made complicated by the fact
that we cannot take a branch invalidating the BTB.

This is based on the fourth version posted by Marc Zyngier on Linux-arm
mailing list (see [1]).

This is part of XSA-254.

[1] https://www.spinics.net/lists/arm-kernel/msg632062.html

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Julien Grall <julien.grall@linaro.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
(cherry picked from commit 05e0690d03dc6177e614e060ae78001d4f2abde2)

Renamed trap_hypervisor_call to trap_supervisor_call
---
 xen/arch/arm/arm32/entry.S | 38 ++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/cpuerrata.c   | 19 +++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
index 97cfa4cd27..2ffa42248a 100644
--- a/xen/arch/arm/arm32/entry.S
+++ b/xen/arch/arm/arm32/entry.S
@@ -148,6 +148,44 @@ GLOBAL(hyp_traps_vector)
         b trap_irq                      /* 0x18 - IRQ */
         b trap_fiq                      /* 0x1c - FIQ */
 
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+
+        .align 5
+GLOBAL(hyp_traps_vector_bp_inv)
+        /*
+         * We encode the exception entry in the bottom 3 bits of
+         * SP, and we have to guarantee to be 8 bytes aligned.
+         */
+        add sp, sp, #1                  /* Reset            7 */
+        add sp, sp, #1                  /* Undef            6 */
+        add sp, sp, #1                  /* Hypervisor Call  5 */
+        add sp, sp, #1                  /* Prefetch abort   4 */
+        add sp, sp, #1                  /* Data abort       3 */
+        add sp, sp, #1                  /* Hypervisor       2 */
+        add sp, sp, #1                  /* IRQ              1 */
+        nop                             /* FIQ              0 */
+
+        mcr	p15, 0, r0, c7, c5, 6	    /* BPIALL */
+        isb
+
+.macro vect_br val, targ
+        eor     sp, sp, #\val
+        tst     sp, #7
+        eorne   sp, sp, #\val
+        beq     \targ
+.endm
+
+        vect_br 0, trap_fiq
+        vect_br 1, trap_irq
+        vect_br 2, trap_guest_sync
+        vect_br 3, trap_data_abort
+        vect_br 4, trap_prefetch_abort
+        vect_br 5, trap_supervisor_call
+        vect_br 6, trap_undefined_instruction
+        vect_br 7, trap_reset
+
+#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
+
 DEFINE_TRAP_ENTRY(reset)
 DEFINE_TRAP_ENTRY(undefined_instruction)
 DEFINE_TRAP_ENTRY(supervisor_call)
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index 1e3713524e..84fe698d0a 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -195,6 +195,13 @@ install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry,
     this_cpu(bp_harden_vecs) = hyp_vecs;
 }
 
+static int enable_bp_inv_hardening(void *data)
+{
+    install_bp_hardening_vecs(data, hyp_traps_vector_bp_inv,
+                              "execute BPIALL");
+    return 0;
+}
+
 #endif
 
 #define MIDR_RANGE(model, min, max)     \
@@ -280,6 +287,18 @@ static const struct arm_cpu_capabilities arm_errata[] = {
         MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
         .enable = enable_psci_bp_hardening,
     },
+#endif
+#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
+    {
+        .capability = ARM_HARDEN_BRANCH_PREDICTOR,
+        MIDR_ALL_VERSIONS(MIDR_CORTEX_A12),
+        .enable = enable_bp_inv_hardening,
+    },
+    {
+        .capability = ARM_HARDEN_BRANCH_PREDICTOR,
+        MIDR_ALL_VERSIONS(MIDR_CORTEX_A17),
+        .enable = enable_bp_inv_hardening,
+    },
 #endif
     {},
 };
-- 
2.14.3


From 1105f3a92df83f3bfcda78d66c4d28458123e1bb Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Fri, 2 Feb 2018 14:19:24 +0000
Subject: [PATCH 11/45] xen/arm32: Invalidate icache on guest exist for
 Cortex-A15

In order to avoid aliasing attacks against the branch predictor on
Cortex A-15, let's invalidate the BTB on guest exit, which can only be
done by invalidating the icache (with ACTLR[0] being set).

We use the same hack as for A12/A17 to perform the vector decoding.

This is based on Linux patch from the kpti branch in [1].

[1] https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Julien Grall <julien.grall@linaro.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
(cherry picked from commit 665c4b6aa79eb21b1aada9f7f98fb5cb7f03743a)
---
 xen/arch/arm/arm32/entry.S | 21 +++++++++++++++++++++
 xen/arch/arm/cpuerrata.c   | 13 +++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
index 2ffa42248a..3e48ba18d8 100644
--- a/xen/arch/arm/arm32/entry.S
+++ b/xen/arch/arm/arm32/entry.S
@@ -150,6 +150,26 @@ GLOBAL(hyp_traps_vector)
 
 #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 
+        .align 5
+GLOBAL(hyp_traps_vector_ic_inv)
+        /*
+         * We encode the exception entry in the bottom 3 bits of
+         * SP, and we have to guarantee to be 8 bytes aligned.
+         */
+        add sp, sp, #1                  /* Reset            7 */
+        add sp, sp, #1                  /* Undef            6 */
+        add sp, sp, #1                  /* Hypervisor call  5 */
+        add sp, sp, #1                  /* Prefetch abort   4 */
+        add sp, sp, #1                  /* Data abort       3 */
+        add sp, sp, #1                  /* Hypervisor       2 */
+        add sp, sp, #1                  /* IRQ              1 */
+        nop                             /* FIQ              0 */
+
+        mcr p15, 0, r0, c7, c5, 0       /* ICIALLU */
+        isb
+
+        b decode_vectors
+
         .align 5
 GLOBAL(hyp_traps_vector_bp_inv)
         /*
@@ -168,6 +188,7 @@ GLOBAL(hyp_traps_vector_bp_inv)
         mcr	p15, 0, r0, c7, c5, 6	    /* BPIALL */
         isb
 
+decode_vectors:
 .macro vect_br val, targ
         eor     sp, sp, #\val
         tst     sp, #7
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index 84fe698d0a..f29c5828ac 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -177,6 +177,7 @@ static int enable_psci_bp_hardening(void *data)
 DEFINE_PER_CPU_READ_MOSTLY(const char *, bp_harden_vecs);
 
 extern char hyp_traps_vector_bp_inv[];
+extern char hyp_traps_vector_ic_inv[];
 
 static void __maybe_unused
 install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry,
@@ -202,6 +203,13 @@ static int enable_bp_inv_hardening(void *data)
     return 0;
 }
 
+static int enable_ic_inv_hardening(void *data)
+{
+    install_bp_hardening_vecs(data, hyp_traps_vector_ic_inv,
+                              "execute ICIALLU");
+    return 0;
+}
+
 #endif
 
 #define MIDR_RANGE(model, min, max)     \
@@ -299,6 +307,11 @@ static const struct arm_cpu_capabilities arm_errata[] = {
         MIDR_ALL_VERSIONS(MIDR_CORTEX_A17),
         .enable = enable_bp_inv_hardening,
     },
+    {
+        .capability = ARM_HARDEN_BRANCH_PREDICTOR,
+        MIDR_ALL_VERSIONS(MIDR_CORTEX_A15),
+        .enable = enable_ic_inv_hardening,
+    },
 #endif
     {},
 };
-- 
2.14.3


From 11875b7d5706f8aef86d306a43d7fe3b7011aaa2 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Fri, 2 Feb 2018 14:19:25 +0000
Subject: [PATCH 12/45] xen/arm32: entry: Document the purpose of r11 in the
 traps handler

It took me a bit of time to understand why __DEFINE_TRAP_ENTRY is
storing the original stack pointer in r11. It is working in pair with
return_traps_entry where sp will be restored from r11.

This is fine because per the AAPCS r11 must be preserved by the
subroutine. So in return_from_trap, r11 will still contain the original
stack pointer.

Add some documentation in the code to point the 2 sides to each other.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
(cherry picked from commit dd855aa430f2da9b677c145f0c625a82aaa97110)
---
 xen/arch/arm/arm32/entry.S | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
index 3e48ba18d8..b66705ff50 100644
--- a/xen/arch/arm/arm32/entry.S
+++ b/xen/arch/arm/arm32/entry.S
@@ -127,6 +127,10 @@ trap_##trap:                                                            \
         cpsie iflags;                                                   \
         adr lr, return_from_trap;                                       \
         mov r0, sp;                                                     \
+        /*                                                              \
+         * Save the stack pointer in r11. It will be restored after the \
+         * trap has been handled (see return_from_trap).                \
+         */                                                             \
         mov r11, sp;                                                    \
         bic sp, #7; /* Align the stack pointer (noop on guest trap) */  \
         b do_trap_##trap
@@ -217,6 +221,10 @@ DEFINE_TRAP_ENTRY_NOIRQ(irq)
 DEFINE_TRAP_ENTRY_NOIRQ(fiq)
 
 return_from_trap:
+        /*
+         * Restore the stack pointer from r11. It was saved on exception
+         * entry (see __DEFINE_TRAP_ENTRY).
+         */
         mov sp, r11
 ENTRY(return_to_new_vcpu32)
         ldr r11, [sp, #UREGS_cpsr]
-- 
2.14.3


From 2aff8d5e7318882909564f8b1cf091621066c41b Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:38:24 +0100
Subject: [PATCH 13/45] x86: Avoid corruption on migrate for vcpus using CPUID
 Faulting

Xen 4.8 and later virtualises CPUID Faulting support for guests.  However, the
value of MSR_MISC_FEATURES_ENABLES is omitted from the vcpu state, meaning
that the current cpuid faulting setting is lost on migrate/suspend/resume.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: b90f86be161c74df8cb69c98d9f22885d9d87114
master date: 2017-12-01 18:09:48 +0000
---
 xen/arch/x86/domctl.c      | 18 ++++++++++++++++++
 xen/arch/x86/hvm/vmx/vmx.c | 13 ++++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index d32e70e626..8bb4a529a2 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1292,6 +1292,19 @@ long arch_do_domctl(
                     }
                 }
 
+                if ( v->arch.cpuid_faulting )
+                {
+                    if ( i < vmsrs->msr_count && !ret )
+                    {
+                        msr.index = MSR_INTEL_MISC_FEATURES_ENABLES;
+                        msr.reserved = 0;
+                        msr.value = MSR_MISC_FEATURES_CPUID_FAULTING;
+                        if ( copy_to_guest_offset(vmsrs->msrs, i, &msr, 1) )
+                            ret = -EFAULT;
+                    }
+                    ++i;
+                }
+
                 vcpu_unpause(v);
 
                 if ( i > vmsrs->msr_count && !ret )
@@ -1319,6 +1332,11 @@ long arch_do_domctl(
 
                 switch ( msr.index )
                 {
+                case MSR_INTEL_MISC_FEATURES_ENABLES:
+                    v->arch.cpuid_faulting = !!(msr.value &
+                                                MSR_MISC_FEATURES_CPUID_FAULTING);
+                    continue;
+
                 case MSR_AMD64_DR0_ADDRESS_MASK:
                     if ( !boot_cpu_has(X86_FEATURE_DBEXT) ||
                          (msr.value >> 32) )
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index fa528fcf84..e282fc865f 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -787,7 +787,8 @@ static int vmx_load_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
 
 static unsigned int __init vmx_init_msr(void)
 {
-    return (cpu_has_mpx && cpu_has_vmx_mpx) +
+    return 1 /* MISC_FEATURES_ENABLES */ +
+           (cpu_has_mpx && cpu_has_vmx_mpx) +
            (cpu_has_xsaves && cpu_has_vmx_xsaves);
 }
 
@@ -795,6 +796,12 @@ static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt)
 {
     vmx_vmcs_enter(v);
 
+    if ( v->arch.cpuid_faulting )
+    {
+        ctxt->msr[ctxt->count].index = MSR_INTEL_MISC_FEATURES_ENABLES;
+        ctxt->msr[ctxt->count++].val = MSR_MISC_FEATURES_CPUID_FAULTING;
+    }
+
     if ( cpu_has_mpx && cpu_has_vmx_mpx )
     {
         __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val);
@@ -823,6 +830,10 @@ static int vmx_load_msr(struct vcpu *v, struct hvm_msr *ctxt)
     {
         switch ( ctxt->msr[i].index )
         {
+        case MSR_INTEL_MISC_FEATURES_ENABLES:
+            v->arch.cpuid_faulting = !!(ctxt->msr[i].val &
+                                        MSR_MISC_FEATURES_CPUID_FAULTING);
+            break;
         case MSR_IA32_BNDCFGS:
             if ( cpu_has_mpx && cpu_has_vmx_mpx &&
                  is_canonical_address(ctxt->msr[i].val) &&
-- 
2.14.3


From 3efcd7fb40a900bc7d4f9063f2d43ee27b0a5270 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:39:08 +0100
Subject: [PATCH 14/45] x86/alt: Break out alternative-asm into a separate
 header file

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
master commit: 9d7b4351d3bb5c744db311cffa57ba3ebb583327
master date: 2018-01-05 19:57:07 +0000
---
 xen/include/asm-x86/alternative-asm.h | 31 +++++++++++++++++++++++++++++++
 xen/include/asm-x86/alternative.h     | 13 +++----------
 2 files changed, 34 insertions(+), 10 deletions(-)
 create mode 100644 xen/include/asm-x86/alternative-asm.h

diff --git a/xen/include/asm-x86/alternative-asm.h b/xen/include/asm-x86/alternative-asm.h
new file mode 100644
index 0000000000..bf0332ef28
--- /dev/null
+++ b/xen/include/asm-x86/alternative-asm.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_X86_ALTERNATIVE_ASM_H_
+#define _ASM_X86_ALTERNATIVE_ASM_H_
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro altinstruction_entry orig alt feature orig_len alt_len
+    .long \orig - .
+    .long \alt - .
+    .word \feature
+    .byte \orig_len
+    .byte \alt_len
+.endm
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_ALTERNATIVE_ASM_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/alternative.h b/xen/include/asm-x86/alternative.h
index db4f08e0e7..ba537d6b7e 100644
--- a/xen/include/asm-x86/alternative.h
+++ b/xen/include/asm-x86/alternative.h
@@ -1,17 +1,10 @@
 #ifndef __X86_ALTERNATIVE_H__
 #define __X86_ALTERNATIVE_H__
 
+#include <asm/alternative-asm.h>
 #include <asm/nops.h>
 
-#ifdef __ASSEMBLY__
-.macro altinstruction_entry orig alt feature orig_len alt_len
-        .long \orig - .
-        .long \alt - .
-        .word \feature
-        .byte \orig_len
-        .byte \alt_len
-.endm
-#else
+#ifndef __ASSEMBLY__
 #include <xen/stringify.h>
 #include <xen/types.h>
 
@@ -145,6 +138,6 @@ extern void alternative_instructions(void);
 /* Use this macro(s) if you need more than one output parameter. */
 #define ASM_OUTPUT2(a...) a
 
-#endif  /*  __ASSEMBLY__  */
+#endif /*  !__ASSEMBLY__  */
 
 #endif /* __X86_ALTERNATIVE_H__ */
-- 
2.14.3


From 87cb0e2090fce317c4e6775f343d5caba66f61f1 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:41:28 +0100
Subject: [PATCH 16/45] x86/alt: Introduce ALTERNATIVE{,_2} macros

To help creating alternative frames in assembly.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: 4711428f5e2a9bfff9f8d75b6a696072118c19a4
master date: 2018-01-05 19:57:07 +0000
---
 xen/include/asm-x86/alternative-asm.h | 46 +++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/xen/include/asm-x86/alternative-asm.h b/xen/include/asm-x86/alternative-asm.h
index bf0332ef28..6640e85581 100644
--- a/xen/include/asm-x86/alternative-asm.h
+++ b/xen/include/asm-x86/alternative-asm.h
@@ -17,6 +17,52 @@
     .byte \alt_len
 .endm
 
+.macro ALTERNATIVE oldinstr, newinstr, feature
+.Lold_start_\@:
+    \oldinstr
+.Lold_end_\@:
+
+    .pushsection .altinstructions, "a", @progbits
+    altinstruction_entry .Lold_start_\@, .Lnew_start_\@, \feature, \
+        (.Lold_end_\@ - .Lold_start_\@), (.Lnew_end_\@ - .Lnew_start_\@)
+
+    .section .discard, "a", @progbits
+    /* Assembler-time check that \newinstr isn't longer than \oldinstr. */
+    .byte 0xff + (.Lnew_end_\@ - .Lnew_start_\@) - (.Lold_end_\@ - .Lold_start_\@)
+
+    .section .altinstr_replacement, "ax", @progbits
+.Lnew_start_\@:
+    \newinstr
+.Lnew_end_\@:
+    .popsection
+.endm
+
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+.Lold_start_\@:
+    \oldinstr
+.Lold_end_\@:
+
+    .pushsection .altinstructions, "a", @progbits
+    altinstruction_entry .Lold_start_\@, .Lnew1_start_\@, \feature1, \
+        (.Lold_end_\@ - .Lold_start_\@), (.Lnew1_end_\@ - .Lnew1_start_\@)
+    altinstruction_entry .Lold_start_\@, .Lnew2_start_\@, \feature2, \
+        (.Lold_end_\@ - .Lold_start_\@), (.Lnew2_end_\@ - .Lnew2_start_\@)
+
+    .section .discard, "a", @progbits
+    /* Assembler-time check that \newinstr{1,2} aren't longer than \oldinstr. */
+    .byte 0xff + (.Lnew1_end_\@ - .Lnew1_start_\@) - (.Lold_end_\@ - .Lold_start_\@)
+    .byte 0xff + (.Lnew2_end_\@ - .Lnew2_start_\@) - (.Lold_end_\@ - .Lold_start_\@)
+
+    .section .altinstr_replacement, "ax", @progbits
+.Lnew1_start_\@:
+    \newinstr1
+.Lnew1_end_\@:
+.Lnew2_start_\@:
+    \newinstr2
+.Lnew2_end_\@:
+    .popsection
+.endm
+
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_ALTERNATIVE_ASM_H_ */
 
-- 
2.14.3


From 499391b50b85d31fa3dd4c427a816e10facb1fe4 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:42:02 +0100
Subject: [PATCH 17/45] x86/hvm: Rename update_guest_vendor() callback to
 cpuid_policy_changed()

It will shortly be used for more than just changing the vendor.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: 3bea00966eb6680410c89df764d075a8fbacc3cc
master date: 2018-01-05 19:57:07 +0000
---
 xen/arch/x86/domctl.c         | 19 +++++++++++--------
 xen/arch/x86/hvm/hvm.c        |  2 +-
 xen/arch/x86/hvm/svm/svm.c    |  4 ++--
 xen/arch/x86/hvm/vmx/vmx.c    |  5 ++---
 xen/include/asm-x86/hvm/hvm.h |  6 +++---
 5 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 8bb4a529a2..ad5536d7a7 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -51,6 +51,8 @@ static int gdbsx_guest_mem_io(domid_t domid, struct xen_domctl_gdbsx_memio *iop)
 static void update_domain_cpuid_info(struct domain *d,
                                      const xen_domctl_cpuid_t *ctl)
 {
+    bool call_policy_changed = false; /* Avoid for_each_vcpu() unnecessarily */
+
     switch ( ctl->input[0] )
     {
     case 0: {
@@ -69,14 +71,7 @@ static void update_domain_cpuid_info(struct domain *d,
         int old_vendor = d->arch.x86_vendor;
 
         d->arch.x86_vendor = get_cpu_vendor(vendor_id.str, gcv_guest);
-
-        if ( is_hvm_domain(d) && (d->arch.x86_vendor != old_vendor) )
-        {
-            struct vcpu *v;
-
-            for_each_vcpu( d, v )
-                hvm_update_guest_vendor(v);
-        }
+        call_policy_changed = (d->arch.x86_vendor != old_vendor);
 
         break;
     }
@@ -235,6 +230,14 @@ static void update_domain_cpuid_info(struct domain *d,
         }
         break;
     }
+
+    if ( is_hvm_domain(d) && call_policy_changed )
+    {
+        struct vcpu *v;
+
+        for_each_vcpu( d, v )
+            hvm_cpuid_policy_changed(v);
+    }
 }
 
 void arch_get_domain_info(const struct domain *d,
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index d210265669..9b228d88e4 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1621,7 +1621,7 @@ int hvm_vcpu_initialise(struct vcpu *v)
         hvm_set_guest_tsc(v, 0);
     }
 
-    hvm_update_guest_vendor(v);
+    hvm_cpuid_policy_changed(v);
 
     return 0;
 
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 60c804e44f..1b84aa6f0a 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -607,7 +607,7 @@ static void svm_update_guest_efer(struct vcpu *v)
     vmcb_set_efer(vmcb, new_efer);
 }
 
-static void svm_update_guest_vendor(struct vcpu *v)
+static void svm_cpuid_policy_changed(struct vcpu *v)
 {
     struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
     struct vmcb_struct *vmcb = arch_svm->vmcb;
@@ -2252,7 +2252,7 @@ static struct hvm_function_table __initdata svm_function_table = {
     .get_shadow_gs_base   = svm_get_shadow_gs_base,
     .update_guest_cr      = svm_update_guest_cr,
     .update_guest_efer    = svm_update_guest_efer,
-    .update_guest_vendor  = svm_update_guest_vendor,
+    .cpuid_policy_changed = svm_cpuid_policy_changed,
     .set_guest_pat        = svm_set_guest_pat,
     .get_guest_pat        = svm_get_guest_pat,
     .set_tsc_offset       = svm_set_tsc_offset,
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index e282fc865f..a7053527f1 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -72,7 +72,6 @@ static void vmx_free_vlapic_mapping(struct domain *d);
 static void vmx_install_vlapic_mapping(struct vcpu *v);
 static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
 static void vmx_update_guest_efer(struct vcpu *v);
-static void vmx_update_guest_vendor(struct vcpu *v);
 static void vmx_cpuid_intercept(
     unsigned int *eax, unsigned int *ebx,
     unsigned int *ecx, unsigned int *edx);
@@ -544,7 +543,7 @@ void vmx_update_exception_bitmap(struct vcpu *v)
         __vmwrite(EXCEPTION_BITMAP, bitmap);
 }
 
-static void vmx_update_guest_vendor(struct vcpu *v)
+static void vmx_cpuid_policy_changed(struct vcpu *v)
 {
     if ( opt_hvm_fep ||
          (v->domain->arch.x86_vendor != boot_cpu_data.x86_vendor) )
@@ -2179,7 +2178,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .update_host_cr3      = vmx_update_host_cr3,
     .update_guest_cr      = vmx_update_guest_cr,
     .update_guest_efer    = vmx_update_guest_efer,
-    .update_guest_vendor  = vmx_update_guest_vendor,
+    .cpuid_policy_changed = vmx_cpuid_policy_changed,
     .set_guest_pat        = vmx_set_guest_pat,
     .get_guest_pat        = vmx_get_guest_pat,
     .set_tsc_offset       = vmx_set_tsc_offset,
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 7e7462e79d..9380d6ce58 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -142,7 +142,7 @@ struct hvm_function_table {
     void (*update_guest_cr)(struct vcpu *v, unsigned int cr);
     void (*update_guest_efer)(struct vcpu *v);
 
-    void (*update_guest_vendor)(struct vcpu *v);
+    void (*cpuid_policy_changed)(struct vcpu *v);
 
     int  (*get_guest_pat)(struct vcpu *v, u64 *);
     int  (*set_guest_pat)(struct vcpu *v, u64);
@@ -348,9 +348,9 @@ static inline void hvm_update_guest_efer(struct vcpu *v)
     hvm_funcs.update_guest_efer(v);
 }
 
-static inline void hvm_update_guest_vendor(struct vcpu *v)
+static inline void hvm_cpuid_policy_changed(struct vcpu *v)
 {
-    hvm_funcs.update_guest_vendor(v);
+    hvm_funcs.cpuid_policy_changed(v);
 }
 
 /*
-- 
2.14.3


From 17bfbc8289c487bcb5f446f79de54869f12786cb Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:42:48 +0100
Subject: [PATCH 18/45] x86: Introduce a common cpuid_policy_updated()

No practical change at the moment, but future changes will need to react
irrespective of guest type.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
master commit: b357546b43ab87dfb10d740ae637a685134d5e32
master date: 2018-01-05 19:57:07 +0000
---
 xen/arch/x86/domain.c        | 12 ++++++++++++
 xen/arch/x86/domctl.c        |  4 ++--
 xen/arch/x86/hvm/hvm.c       |  2 --
 xen/include/asm-x86/domain.h |  2 ++
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 8817263ac5..7e74832af5 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -485,6 +485,8 @@ int vcpu_initialise(struct vcpu *v)
         if ( is_pv_domain(d) )
             xfree(v->arch.pv_vcpu.trap_ctxt);
     }
+    else if ( !is_idle_domain(v->domain) )
+        cpuid_policy_updated(v);
 
     return rc;
 }
@@ -2659,6 +2661,16 @@ int domain_relinquish_resources(struct domain *d)
     return 0;
 }
 
+/*
+ * Called during vcpu construction, and each time the toolstack changes the
+ * CPUID configuration for the domain.
+ */
+void cpuid_policy_updated(struct vcpu *v)
+{
+    if ( is_hvm_vcpu(v) )
+        hvm_cpuid_policy_changed(v);
+}
+
 void arch_dump_domain_info(struct domain *d)
 {
     paging_dump_domain_info(d);
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index ad5536d7a7..84e585613a 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -231,12 +231,12 @@ static void update_domain_cpuid_info(struct domain *d,
         break;
     }
 
-    if ( is_hvm_domain(d) && call_policy_changed )
+    if ( call_policy_changed )
     {
         struct vcpu *v;
 
         for_each_vcpu( d, v )
-            hvm_cpuid_policy_changed(v);
+            cpuid_policy_updated(v);
     }
 }
 
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 9b228d88e4..a5ad7ca1c3 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1621,8 +1621,6 @@ int hvm_vcpu_initialise(struct vcpu *v)
         hvm_set_guest_tsc(v, 0);
     }
 
-    hvm_cpuid_policy_changed(v);
-
     return 0;
 
  fail6:
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 82dca93ca1..33f1c88af9 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -79,6 +79,8 @@ void toggle_guest_mode(struct vcpu *);
 /* x86/64: toggle guest page tables between kernel and user modes. */
 void toggle_guest_pt(struct vcpu *);
 
+void cpuid_policy_updated(struct vcpu *v);
+
 /*
  * Initialise a hypercall-transfer page. The given pointer must be mapped
  * in Xen virtual address space (accesses are not validated or checked).
-- 
2.14.3


From ef2464c56e8dab194cd956498c3d5215f1b6b97b Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:43:22 +0100
Subject: [PATCH 19/45] x86/entry: Rearrange RESTORE_ALL to restore register in
 stack order

Results in a more predictable (i.e. linear) memory access pattern.

No functional change.

This is part of XSA-254.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
master commit: f85d105e27735f0e20aa30d77f03774f3ed55ae5
master date: 2018-01-05 19:57:08 +0000
---
 xen/include/asm-x86/asm_defns.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
index 1c8d66c435..7d740e7156 100644
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -310,7 +310,6 @@ static always_inline void stac(void)
  * @compat: R8-R15 don't need reloading
  */
 .macro RESTORE_ALL adj=0 compat=0
-        LOAD_C_CLOBBERED \compat
 .if !\compat
         movq  UREGS_r15(%rsp),%r15
         movq  UREGS_r14(%rsp),%r14
@@ -319,6 +318,7 @@ static always_inline void stac(void)
 .endif
         LOAD_ONE_REG(bp, \compat)
         LOAD_ONE_REG(bx, \compat)
+        LOAD_C_CLOBBERED \compat
         subq  $-(UREGS_error_code-UREGS_r15+\adj), %rsp
 .endm
 
-- 
2.14.3


From eadcd8318c46f53ed8ee6516ca876271f75930fa Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:43:52 +0100
Subject: [PATCH 20/45] x86/hvm: Use SAVE_ALL to construct the cpu_user_regs
 frame after VMExit

No practical change.

One side effect in debug builds is that %rbp is inverted in the manner
expected by the stack unwinder to indicate a interrupt frame.

This is part of XSA-254.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
master commit: 13682ca8c94bd5612a44f7f1edc1fd8ff675dacb
master date: 2018-01-05 19:57:08 +0000
---
 xen/arch/x86/hvm/svm/entry.S | 22 ++++------------------
 xen/arch/x86/hvm/vmx/entry.S | 17 ++---------------
 2 files changed, 6 insertions(+), 33 deletions(-)

diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
index 759c184ef7..64f0dcd909 100644
--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -97,24 +97,10 @@ UNLIKELY_END(svm_trace)
 
         VMRUN
 
-        GET_CURRENT(ax)
-        push %rdi
-        push %rsi
-        push %rdx
-        push %rcx
-        mov  VCPU_svm_vmcb(%rax),%rcx
-        push %rax
-        push %r8
-        push %r9
-        push %r10
-        push %r11
-        push %rbx
-        mov  %rax,%rbx
-        push %rbp
-        push %r12
-        push %r13
-        push %r14
-        push %r15
+        SAVE_ALL
+
+        GET_CURRENT(bx)
+        mov  VCPU_svm_vmcb(%rbx),%rcx
 
         movb $0,VCPU_svm_vmcb_in_sync(%rbx)
         mov  VMCB_rax(%rcx),%rax
diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
index 3d98ee2d87..cc6188e7db 100644
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -29,23 +29,10 @@
 #define VMLAUNCH     .byte 0x0f,0x01,0xc2
 
 ENTRY(vmx_asm_vmexit_handler)
-        push %rdi
-        push %rsi
-        push %rdx
-        push %rcx
-        push %rax
+        SAVE_ALL
+
         mov  %cr2,%rax
-        push %r8
-        push %r9
-        push %r10
-        push %r11
-        push %rbx
         GET_CURRENT(bx)
-        push %rbp
-        push %r12
-        push %r13
-        push %r14
-        push %r15
 
         movb $1,VCPU_vmx_launched(%rbx)
         mov  %rax,VCPU_hvm_guest_cr2(%rbx)
-- 
2.14.3


From 987b08d56cd8d439bdf435099218b96de901199d Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:44:21 +0100
Subject: [PATCH 21/45] x86/entry: Erase guest GPR state on entry to Xen

This reduces the number of code gadgets which can be attacked with arbitrary
guest-controlled GPR values.

This is part of XSA-254.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
master commit: 03bd8c3a70d101fc2f8f36f1e171b7594462a4cd
master date: 2018-01-05 19:57:08 +0000
---
 xen/include/asm-x86/asm_defns.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
index 7d740e7156..8eb5c0c6de 100644
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -247,22 +247,34 @@ static always_inline void stac(void)
         addq  $-(UREGS_error_code-UREGS_r15), %rsp
         cld
         movq  %rdi,UREGS_rdi(%rsp)
+        xor   %edi, %edi
         movq  %rsi,UREGS_rsi(%rsp)
+        xor   %esi, %esi
         movq  %rdx,UREGS_rdx(%rsp)
+        xor   %edx, %edx
         movq  %rcx,UREGS_rcx(%rsp)
+        xor   %ecx, %ecx
         movq  %rax,UREGS_rax(%rsp)
+        xor   %eax, %eax
 .if !\compat
         movq  %r8,UREGS_r8(%rsp)
         movq  %r9,UREGS_r9(%rsp)
         movq  %r10,UREGS_r10(%rsp)
         movq  %r11,UREGS_r11(%rsp)
 .endif
+        xor   %r8, %r8
+        xor   %r9, %r9
+        xor   %r10, %r10
+        xor   %r11, %r11
         movq  %rbx,UREGS_rbx(%rsp)
+        xor   %ebx, %ebx
         movq  %rbp,UREGS_rbp(%rsp)
 #ifdef CONFIG_FRAME_POINTER
 /* Indicate special exception stack frame by inverting the frame pointer. */
         leaq  UREGS_rbp(%rsp), %rbp
         notq  %rbp
+#else
+        xor   %ebp, %ebp
 #endif
 .if !\compat
         movq  %r12,UREGS_r12(%rsp)
@@ -270,6 +282,10 @@ static always_inline void stac(void)
         movq  %r14,UREGS_r14(%rsp)
         movq  %r15,UREGS_r15(%rsp)
 .endif
+        xor   %r12, %r12
+        xor   %r13, %r13
+        xor   %r14, %r14
+        xor   %r15, %r15
 .endm
 
 #define LOAD_ONE_REG(reg, compat) \
-- 
2.14.3


From 5732a8ef2885633cdffc56fe9d8df40f76bfb2c2 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:45:02 +0100
Subject: [PATCH 22/45] common/wait: Clarifications to wait infrastructure

This logic is not as clear as it could be.  Add some comments to help.

Rearrange the asm block in __prepare_to_wait() to separate the GPR
saving/restoring from the internal logic.

While tweaking, add an unreachable() following the jmp in
check_wakeup_from_wait().

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: 2d1c82261d966735e82e5971eddb63ba3c565a37
master date: 2018-01-05 19:57:08 +0000
---
 xen/common/wait.c | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/xen/common/wait.c b/xen/common/wait.c
index 398f653174..877ef19638 100644
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -139,14 +139,26 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
         domain_crash_synchronous();
     }
 
+    /* Hand-rolled setjmp(). */
     asm volatile (
-        "push %%rax; push %%rbx; push %%rdx; "
-        "push %%rbp; push %%r8; push %%r9; push %%r10; push %%r11; "
-        "push %%r12; push %%r13; push %%r14; push %%r15; call 1f; "
-        "1: addq $2f-1b,(%%rsp); sub %%esp,%%ecx; cmp %3,%%ecx; ja 3f; "
-        "mov %%rsp,%%rsi; 2: rep movsb; mov %%rsp,%%rsi; 3: pop %%rax; "
-        "pop %%r15; pop %%r14; pop %%r13; pop %%r12; "
-        "pop %%r11; pop %%r10; pop %%r9; pop %%r8; "
+        "push %%rax; push %%rbx; push %%rdx; push %%rbp;"
+        "push %%r8;  push %%r9;  push %%r10; push %%r11;"
+        "push %%r12; push %%r13; push %%r14; push %%r15;"
+
+        "call 1f;"
+        "1: addq $2f-1b,(%%rsp);"
+        "sub %%esp,%%ecx;"
+        "cmp %3,%%ecx;"
+        "ja 3f;"
+        "mov %%rsp,%%rsi;"
+
+        /* check_wakeup_from_wait() longjmp()'s to this point. */
+        "2: rep movsb;"
+        "mov %%rsp,%%rsi;"
+        "3: pop %%rax;"
+
+        "pop %%r15; pop %%r14; pop %%r13; pop %%r12;"
+        "pop %%r11; pop %%r10; pop %%r9;  pop %%r8;"
         "pop %%rbp; pop %%rdx; pop %%rbx; pop %%rax"
         : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
         : "i" (PAGE_SIZE), "0" (0), "1" (cpu_info), "2" (wqv->stack)
@@ -190,11 +202,16 @@ void check_wakeup_from_wait(void)
         wait(); /* takes us back into the scheduler */
     }
 
+    /*
+     * Hand-rolled longjmp().  Returns to the pointer on the top of
+     * wqv->stack, and lands on a `rep movs` instruction.
+     */
     asm volatile (
         "mov %1,%%"__OP"sp; jmp *(%0)"
         : : "S" (wqv->stack), "D" (wqv->esp),
         "c" ((char *)get_cpu_info() - (char *)wqv->esp)
         : "memory" );
+    unreachable();
 }
 
 #else /* !CONFIG_X86 */
-- 
2.14.3


From 1838e21521497cdfa6d3b1dfac0374bcce717eba Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:45:39 +0100
Subject: [PATCH 23/45] x86: Support compiling with indirect branch thunks

Use -mindirect-branch=thunk-extern/-mindirect-branch-register when available.
To begin with, use the retpoline thunk.  Later work will add alternative
thunks which can be selected at boot time.

This is part of XSA-254.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
master commit: 3659f0f4bcc6ca08103d1a7ae4e97535ecc978be
master date: 2018-01-16 17:45:50 +0000
---
 xen/arch/x86/Makefile         |  1 +
 xen/arch/x86/Rules.mk         |  7 +++++++
 xen/arch/x86/indirect-thunk.S | 38 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/xen.lds.S        |  1 +
 4 files changed, 47 insertions(+)
 create mode 100644 xen/arch/x86/indirect-thunk.S

diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 931917d1d3..a9c8fd7df9 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -34,6 +34,7 @@ obj-y += i8259.o
 obj-y += io_apic.o
 obj-$(CONFIG_LIVEPATCH) += alternative.o livepatch.o
 obj-y += msi.o
+obj-$(CONFIG_INDIRECT_THUNK) += indirect-thunk.o
 obj-y += ioport_emulate.o
 obj-y += irq.o
 obj-$(CONFIG_KEXEC) += machine_kexec.o
diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk
index 42be4bc100..ad56e56ac6 100644
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -25,3 +25,10 @@ CFLAGS += -fno-asynchronous-unwind-tables
 ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n)
 CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
 endif
+
+# Compile with thunk-extern, indirect-branch-register if avaiable.
+ifneq ($(call cc-option,$(CC),-mindirect-branch-register,n),n)
+CFLAGS += -mindirect-branch=thunk-extern -mindirect-branch-register
+CFLAGS += -DCONFIG_INDIRECT_THUNK
+export CONFIG_INDIRECT_THUNK=y
+endif
diff --git a/xen/arch/x86/indirect-thunk.S b/xen/arch/x86/indirect-thunk.S
new file mode 100644
index 0000000000..3eaf505d0e
--- /dev/null
+++ b/xen/arch/x86/indirect-thunk.S
@@ -0,0 +1,38 @@
+/*
+ * Implement __x86_indirect_thunk_* symbols for use with compatbile compilers
+ * and the -mindirect-branch=thunk-extern -mindirect-branch-register options.
+ *
+ * Copyright (c) 2017-2018 Citrix Systems Ltd.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+        .file __FILE__
+
+#include <asm/asm_defns.h>
+
+.macro IND_THUNK_RETPOLINE reg:req
+        call 2f
+1:
+        lfence
+        jmp 1b
+2:
+        mov %\reg, (%rsp)
+        ret
+.endm
+
+/*
+ * Build the __x86_indirect_thunk_* symbols.  Currently implement the
+ * retpoline thunk only.
+ */
+.macro GEN_INDIRECT_THUNK reg:req
+        .section .text.__x86_indirect_thunk_\reg, "ax", @progbits
+
+ENTRY(__x86_indirect_thunk_\reg)
+        IND_THUNK_RETPOLINE \reg
+.endm
+
+/* Instantiate GEN_INDIRECT_THUNK for each register except %rsp. */
+.irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15
+        GEN_INDIRECT_THUNK reg=r\reg
+.endr
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 11549964c0..681c795c6d 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -60,6 +60,7 @@ SECTIONS
   .text : {
         _stext = .;            /* Text and read-only data */
        *(.text)
+       *(.text.__x86_indirect_thunk_*)
        *(.text.cold)
        *(.text.unlikely)
        *(.fixup)
-- 
2.14.3


From 60c50f2b0bf5d3f894ca428cf4b4374fbea2d082 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:46:40 +0100
Subject: [PATCH 24/45] x86: Support indirect thunks from assembly code

Introduce INDIRECT_CALL and INDIRECT_JMP which either degrade to a normal
indirect branch, or dispatch to the __x86_indirect_thunk_* symbols.

Update all the manual indirect branches in to use the new thunks.  The
indirect branches in the early boot and kexec path are left intact as we can't
use the compiled-in thunks at those points.

This is part of XSA-254.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: 7c508612f7a5096b4819d4ef2ce566e01bd66c0c
master date: 2018-01-16 17:45:50 +0000
---
 xen/Rules.mk                             |  4 ++--
 xen/arch/x86/Rules.mk                    |  6 +++++
 xen/arch/x86/boot/trampoline.S           | 24 +++++++++++++++++--
 xen/arch/x86/traps.c                     | 41 ++++++++++++++++++++++----------
 xen/arch/x86/x86_64/entry.S              |  6 +++--
 xen/arch/x86/x86_emulate/x86_emulate.c   | 16 ++++++-------
 xen/common/wait.c                        |  8 ++++---
 xen/include/asm-x86/asm_defns.h          |  8 +++++++
 xen/include/asm-x86/indirect_thunk_asm.h | 41 ++++++++++++++++++++++++++++++++
 xen/include/xen/config.h                 |  2 +-
 10 files changed, 126 insertions(+), 30 deletions(-)
 create mode 100644 xen/include/asm-x86/indirect_thunk_asm.h

diff --git a/xen/Rules.mk b/xen/Rules.mk
index 08cc776ebc..2653f745a8 100644
--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -72,8 +72,8 @@ endif
 
 AFLAGS-y                += -D__ASSEMBLY__
 
-# Clang's built-in assembler can't handle .code16/.code32/.code64 yet
-AFLAGS-$(clang)         += -no-integrated-as
+# Clang's built-in assembler can't handle embedded .include's
+CFLAGS-$(clang)         += -no-integrated-as
 
 ALL_OBJS := $(ALL_OBJS-y)
 
diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk
index ad56e56ac6..2784e19f27 100644
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -32,3 +32,9 @@ CFLAGS += -mindirect-branch=thunk-extern -mindirect-branch-register
 CFLAGS += -DCONFIG_INDIRECT_THUNK
 export CONFIG_INDIRECT_THUNK=y
 endif
+
+# Set up the assembler include path properly for older GCC toolchains.  Clang
+# objects to the agument being passed however.
+ifneq ($(clang),y)
+CFLAGS += -Wa,-I$(BASEDIR)/include
+endif
diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S
index b0136148bb..8a17e128d7 100644
--- a/xen/arch/x86/boot/trampoline.S
+++ b/xen/arch/x86/boot/trampoline.S
@@ -128,8 +128,28 @@ trampoline_protmode_entry:
         .code64
 start64:
         /* Jump to high mappings. */
-        movabs  $__high_start,%rax
-        jmpq    *%rax
+        movabs  $__high_start, %rdi
+
+#ifdef CONFIG_INDIRECT_THUNK
+        /*
+         * If booting virtualised, or hot-onlining a CPU, sibling threads can
+         * attempt Branch Target Injection against this jmp.
+         *
+         * We've got no usable stack so can't use a RETPOLINE thunk, and are
+         * further than disp32 from the high mappings so couldn't use
+         * JUMP_THUNK even if it was a non-RETPOLINE thunk.  Furthermore, an
+         * LFENCE isn't necessarily safe to use at this point.
+         *
+         * As this isn't a hotpath, use a fully serialising event to reduce
+         * the speculation window as much as possible.  %ebx needs preserving
+         * for __high_start.
+         */
+        mov     %ebx, %esi
+        cpuid
+        mov     %esi, %ebx
+#endif
+
+        jmpq    *%rdi
 
         .code32
 trampoline_boot_cpu_entry:
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index f4bf8b58fa..5d6ccde9f9 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2789,6 +2789,8 @@ int pv_emul_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
     return X86EMUL_OKAY;
 }
 
+void __x86_indirect_thunk_rcx(void);
+
 /* Instruction fetch with error handling. */
 #define insn_fetch(type, base, eip, limit)                                  \
 ({  unsigned long _rc, _ptr = (base) + (eip);                               \
@@ -2827,6 +2829,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
     unsigned long code_base, code_limit;
     char *io_emul_stub = NULL;
     void (*io_emul)(struct cpu_user_regs *);
+    struct stubs *this_stubs = &this_cpu(stubs);
+    unsigned long stub_va = this_stubs->addr + STUB_BUF_SIZE / 2;
     uint64_t val;
 
     if ( !read_descriptor(regs->cs, v, &code_base, &code_limit, &ar, 1) )
@@ -3010,31 +3014,44 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
      * context. This is needed for some systems which (ab)use IN/OUT
      * to communicate with BIOS code in system-management mode.
      */
-    io_emul_stub = map_domain_page(_mfn(this_cpu(stubs.mfn))) +
-                   (this_cpu(stubs.addr) & ~PAGE_MASK) +
-                   STUB_BUF_SIZE / 2;
+    io_emul_stub = map_domain_page(_mfn(this_stubs->mfn)) +
+                   (stub_va & ~PAGE_MASK);
     /* movq $host_to_guest_gpr_switch,%rcx */
     io_emul_stub[0] = 0x48;
     io_emul_stub[1] = 0xb9;
     *(void **)&io_emul_stub[2] = (void *)host_to_guest_gpr_switch;
+
+#ifdef CONFIG_INDIRECT_THUNK
+    /* callq __x86_indirect_thunk_rcx */
+    io_emul_stub[10] = 0xe8;
+    *(int32_t *)&io_emul_stub[11] =
+        (long)__x86_indirect_thunk_rcx - (stub_va + 11 + 4);
+#else
     /* callq *%rcx */
     io_emul_stub[10] = 0xff;
     io_emul_stub[11] = 0xd1;
+    /* TODO: untangle ideal_nops from init/livepatch Kconfig options. */
+    memcpy(&io_emul_stub[12], "\x0f\x1f\x00", 3); /* P6_NOP3 */
+#endif
+
     /* data16 or nop */
-    io_emul_stub[12] = (op_bytes != 2) ? 0x90 : 0x66;
+    io_emul_stub[15] = (op_bytes != 2) ? 0x90 : 0x66;
     /* <io-access opcode> */
-    io_emul_stub[13] = opcode;
+    io_emul_stub[16] = opcode;
     /* imm8 or nop */
-    io_emul_stub[14] = 0x90;
+    io_emul_stub[17] = 0x90;
     /* ret (jumps to guest_to_host_gpr_switch) */
-    io_emul_stub[15] = 0xc3;
-    BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 16);
+    io_emul_stub[18] = 0xc3;
+    BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 19);
 
     /* Handy function-typed pointer to the stub. */
-    io_emul = (void *)(this_cpu(stubs.addr) + STUB_BUF_SIZE / 2);
+    io_emul = (void *)stub_va;
 
     if ( ioemul_handle_quirk )
-        ioemul_handle_quirk(opcode, &io_emul_stub[12], regs);
+    {
+        BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 15 + 10);
+        ioemul_handle_quirk(opcode, &io_emul_stub[15], regs);
+    }
 
     /* I/O Port and Interrupt Flag instructions. */
     switch ( opcode )
@@ -3043,7 +3060,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
         op_bytes = 1;
     case 0xe5: /* IN imm8,%eax */
         port = insn_fetch(u8, code_base, eip, code_limit);
-        io_emul_stub[14] = port; /* imm8 */
+        io_emul_stub[17] = port; /* imm8 */
     exec_in:
         if ( !guest_io_okay(port, op_bytes, v, regs) )
             goto fail;
@@ -3072,7 +3089,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
         op_bytes = 1;
     case 0xe7: /* OUT %eax,imm8 */
         port = insn_fetch(u8, code_base, eip, code_limit);
-        io_emul_stub[14] = port; /* imm8 */
+        io_emul_stub[17] = port; /* imm8 */
     exec_out:
         if ( !guest_io_okay(port, op_bytes, v, regs) )
             goto fail;
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 505604fd6e..20536d44aa 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -578,7 +578,8 @@ handle_exception_saved:
         movzbl UREGS_entry_vector(%rsp),%eax
         leaq  exception_table(%rip),%rdx
         PERFC_INCR(exceptions, %rax, %rbx)
-        callq *(%rdx,%rax,8)
+        mov   (%rdx, %rax, 8), %rdx
+        INDIRECT_CALL %rdx
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         testb $3,UREGS_cs(%rsp)
         jz    restore_all_xen
@@ -750,7 +751,8 @@ handle_ist_exception:
 1:      movq  %rsp,%rdi
         movzbl UREGS_entry_vector(%rsp),%eax
         leaq  exception_table(%rip),%rdx
-        callq *(%rdx,%rax,8)
+        mov   (%rdx, %rax, 8), %rdx
+        INDIRECT_CALL %rdx
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         cmpb  $TRAP_nmi,UREGS_entry_vector(%rsp)
         jne   ret_from_intr
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 9851416713..bfabec06ba 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -880,8 +880,8 @@ do {                                                                    \
     struct fpu_insn_ctxt fic_ = { .insn_bytes = nr_ };                  \
     memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
     get_fpu(X86EMUL_FPU_fpu, &fic_);                                    \
-    asm volatile ( "call *%[stub]" : "+m" (fic_) :                      \
-                   [stub] "rm" (stub.func) );                           \
+    asm volatile ( "INDIRECT_CALL %[stub]" : "+m" (fic_) :              \
+                   [stub] "r" (stub.func) );                            \
     put_fpu(&fic_);                                                     \
     put_stub(stub);                                                     \
 } while (0)
@@ -894,11 +894,11 @@ do {                                                                    \
     memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
     get_fpu(X86EMUL_FPU_fpu, &fic_);                                    \
     asm volatile ( _PRE_EFLAGS("[eflags]", "[mask]", "[tmp]")           \
-                   "call *%[func];"                                     \
+                   "INDIRECT_CALL %[func];"                             \
                    _POST_EFLAGS("[eflags]", "[mask]", "[tmp]")          \
                    : [eflags] "+g" (_regs.eflags),                      \
                      [tmp] "=&r" (tmp_), "+m" (fic_)                    \
-                   : [func] "rm" (stub.func),                           \
+                   : [func] "r" (stub.func),                            \
                      [mask] "i" (EFLG_ZF|EFLG_PF|EFLG_CF) );            \
     put_fpu(&fic_);                                                     \
     put_stub(stub);                                                     \
@@ -4766,8 +4766,8 @@ x86_emulate(
         if ( !rc )
         {
            copy_REX_VEX(buf, rex_prefix, vex);
-           asm volatile ( "call *%0" : : "r" (stub.func), "a" (mmvalp)
-                                     : "memory" );
+           asm volatile ( "INDIRECT_CALL %0" : : "r" (stub.func), "a" (mmvalp)
+                                             : "memory" );
         }
         put_fpu(&fic);
         put_stub(stub);
@@ -5058,8 +5058,8 @@ x86_emulate(
         if ( !rc )
         {
            copy_REX_VEX(buf, rex_prefix, vex);
-           asm volatile ( "call *%0" : : "r" (stub.func), "a" (ea.reg)
-                                     : "memory" );
+           asm volatile ( "INDIRECT_CALL %0" : : "r" (stub.func), "a" (ea.reg)
+                                             : "memory" );
         }
         put_fpu(&fic);
         put_stub(stub);
diff --git a/xen/common/wait.c b/xen/common/wait.c
index 877ef19638..6bb65c60eb 100644
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -204,12 +204,14 @@ void check_wakeup_from_wait(void)
 
     /*
      * Hand-rolled longjmp().  Returns to the pointer on the top of
-     * wqv->stack, and lands on a `rep movs` instruction.
+     * wqv->stack, and lands on a `rep movs` instruction.  All other GPRs are
+     * restored from the stack, so are available for use here.
      */
     asm volatile (
-        "mov %1,%%"__OP"sp; jmp *(%0)"
+        "mov %1,%%"__OP"sp; INDIRECT_JMP %[ip]"
         : : "S" (wqv->stack), "D" (wqv->esp),
-        "c" ((char *)get_cpu_info() - (char *)wqv->esp)
+          "c" ((char *)get_cpu_info() - (char *)wqv->esp),
+          [ip] "r" (*(unsigned long *)wqv->stack)
         : "memory" );
     unreachable();
 }
diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
index 8eb5c0c6de..358deffb5b 100644
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -13,6 +13,14 @@
 #include <asm/cpufeature.h>
 #include <asm/alternative.h>
 
+#ifdef __ASSEMBLY__
+# include <asm/indirect_thunk_asm.h>
+#else
+asm ( "\t.equ CONFIG_INDIRECT_THUNK, "
+      __stringify(IS_ENABLED(CONFIG_INDIRECT_THUNK)) );
+asm ( "\t.include \"asm/indirect_thunk_asm.h\"" );
+#endif
+
 #ifndef __ASSEMBLY__
 void ret_from_intr(void);
 #endif
diff --git a/xen/include/asm-x86/indirect_thunk_asm.h b/xen/include/asm-x86/indirect_thunk_asm.h
new file mode 100644
index 0000000000..96bcc25497
--- /dev/null
+++ b/xen/include/asm-x86/indirect_thunk_asm.h
@@ -0,0 +1,41 @@
+/*
+ * Warning!  This file is included at an assembler level for .c files, causing
+ * usual #ifdef'ary to turn into comments.
+ */
+
+.macro INDIRECT_BRANCH insn:req arg:req
+/*
+ * Create an indirect branch.  insn is one of call/jmp, arg is a single
+ * register.
+ *
+ * With no compiler support, this degrades into a plain indirect call/jmp.
+ * With compiler support, dispatch to the correct __x86_indirect_thunk_*
+ */
+    .if CONFIG_INDIRECT_THUNK == 1
+
+        $done = 0
+        .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15
+        .ifeqs "\arg", "%r\reg"
+            \insn __x86_indirect_thunk_r\reg
+            $done = 1
+           .exitm
+        .endif
+        .endr
+
+        .if $done != 1
+            .error "Bad register arg \arg"
+        .endif
+
+    .else
+        \insn *\arg
+    .endif
+.endm
+
+/* Convenience wrappers. */
+.macro INDIRECT_CALL arg:req
+    INDIRECT_BRANCH call \arg
+.endm
+
+.macro INDIRECT_JMP arg:req
+    INDIRECT_BRANCH jmp \arg
+.endm
diff --git a/xen/include/xen/config.h b/xen/include/xen/config.h
index 473c5e8c3e..9f39687fab 100644
--- a/xen/include/xen/config.h
+++ b/xen/include/xen/config.h
@@ -7,7 +7,7 @@
 #ifndef __XEN_CONFIG_H__
 #define __XEN_CONFIG_H__
 
-#include <generated/autoconf.h>
+#include <xen/kconfig.h>
 
 #ifndef __ASSEMBLY__
 #include <xen/compiler.h>
-- 
2.14.3


From 2eefd926bbc8217cf511bc096c897ae4c56dd0c2 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:47:45 +0100
Subject: [PATCH 25/45] x86/boot: Report details of speculative mitigations

Nothing very interesting at the moment, but the logic will grow as new
mitigations are added.

This is part of XSA-254.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
master commit: 31d6c53adf6417bf449ca50e8416e41b64d46803
master date: 2018-01-16 17:45:50 +0000
---
 xen/arch/x86/Makefile           |  1 +
 xen/arch/x86/setup.c            |  3 ++
 xen/arch/x86/spec_ctrl.c        | 75 +++++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/spec_ctrl.h | 35 +++++++++++++++++++
 4 files changed, 114 insertions(+)
 create mode 100644 xen/arch/x86/spec_ctrl.c
 create mode 100644 xen/include/asm-x86/spec_ctrl.h

diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index a9c8fd7df9..77105c86ab 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -56,6 +56,7 @@ obj-y += setup.o
 obj-y += shutdown.o
 obj-y += smp.o
 obj-y += smpboot.o
+obj-y += spec_ctrl.o
 obj-y += srat.o
 obj-y += string.o
 obj-y += sysctl.o
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 468e51efef..930230d4b9 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -52,6 +52,7 @@
 #include <asm/alternative.h>
 #include <asm/mc146818rtc.h>
 #include <asm/cpuid.h>
+#include <asm/spec_ctrl.h>
 
 /* opt_nosmp: If true, secondary processors are ignored. */
 static bool_t __initdata opt_nosmp;
@@ -1479,6 +1480,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     if ( cpu_has_fsgsbase )
         set_in_cr4(X86_CR4_FSGSBASE);
 
+    init_speculation_mitigations();
+
     init_idle_domain();
 
     this_cpu(stubs.addr) = alloc_stub_page(smp_processor_id(),
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
new file mode 100644
index 0000000000..256701a43c
--- /dev/null
+++ b/xen/arch/x86/spec_ctrl.c
@@ -0,0 +1,75 @@
+/******************************************************************************
+ * arch/x86/spec_ctrl.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2017-2018 Citrix Systems Ltd.
+ */
+#include <xen/init.h>
+#include <xen/lib.h>
+
+#include <asm/processor.h>
+#include <asm/spec_ctrl.h>
+
+enum ind_thunk {
+    THUNK_DEFAULT, /* Decide which thunk to use at boot time. */
+    THUNK_NONE,    /* Missing compiler support for thunks. */
+
+    THUNK_RETPOLINE,
+};
+
+static void __init print_details(enum ind_thunk thunk)
+{
+    printk(XENLOG_DEBUG "Speculative mitigation facilities:\n");
+
+    /* Compiled-in support which pertains to BTI mitigations. */
+    if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) )
+        printk(XENLOG_DEBUG "  Compiled-in support: INDIRECT_THUNK\n");
+
+    printk(XENLOG_INFO
+           "BTI mitigations: Thunk %s\n",
+           thunk == THUNK_NONE      ? "N/A" :
+           thunk == THUNK_RETPOLINE ? "RETPOLINE" : "?");
+}
+
+void __init init_speculation_mitigations(void)
+{
+    enum ind_thunk thunk = THUNK_DEFAULT;
+
+    /*
+     * Supplimentary minor adjustments.  Without compiler support, there are
+     * no thunks.
+     */
+    if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) )
+        thunk = THUNK_NONE;
+
+    /*
+     * If there are still no thunk preferences, the compiled default is
+     * actually retpoline, and it is better than nothing.
+     */
+    if ( thunk == THUNK_DEFAULT )
+        thunk = THUNK_RETPOLINE;
+
+    print_details(thunk);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
new file mode 100644
index 0000000000..e088a551da
--- /dev/null
+++ b/xen/include/asm-x86/spec_ctrl.h
@@ -0,0 +1,35 @@
+/******************************************************************************
+ * include/asm-x86/spec_ctrl.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2017-2018 Citrix Systems Ltd.
+ */
+
+#ifndef __X86_SPEC_CTRL_H__
+#define __X86_SPEC_CTRL_H__
+
+void init_speculation_mitigations(void);
+
+#endif /* !__X86_SPEC_CTRL_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
2.14.3


From 479b879a7dd0bbf02920d2f6053d9bee271797ce Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:48:30 +0100
Subject: [PATCH 26/45] x86/amd: Try to set lfence as being Dispatch
 Serialising

This property is required for the AMD's recommended mitigation for Branch
Target Injection, but Xen needs to cope with being unable to detect or modify
the MSR.

This is part of XSA-254.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: fe3ee5530a8d0d0b6a478167125d00c40f294a86
master date: 2018-01-16 17:45:50 +0000
---
 xen/arch/x86/cpu/amd.c           | 35 ++++++++++++++++++++++++++++++++++-
 xen/include/asm-x86/cpufeature.h |  2 ++
 xen/include/asm-x86/msr-index.h  |  1 +
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
index 4ff0b54629..5c1bb1352c 100644
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -559,8 +559,41 @@ static void init_amd(struct cpuinfo_x86 *c)
 			wrmsr_amd_safe(0xc001100d, l, h & ~1);
 	}
 
+	/*
+	 * Attempt to set lfence to be Dispatch Serialising.  This MSR almost
+	 * certainly isn't virtualised (and Xen at least will leak the real
+	 * value in but silently discard writes), as well as being per-core
+	 * rather than per-thread, so do a full safe read/write/readback cycle
+	 * in the worst case.
+	 */
+	if (c->x86 == 0x0f || c->x86 == 0x11)
+		/* Always dispatch serialising on this hardare. */
+		__set_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability);
+	else /* Implicily "== 0x10 || >= 0x12" by being 64bit. */ {
+		if (rdmsr_safe(MSR_AMD64_DE_CFG, value))
+			/* Unable to read.  Assume the safer default. */
+			__clear_bit(X86_FEATURE_LFENCE_DISPATCH,
+				    c->x86_capability);
+		else if (value & AMD64_DE_CFG_LFENCE_SERIALISE)
+			/* Already dispatch serialising. */
+			__set_bit(X86_FEATURE_LFENCE_DISPATCH,
+				  c->x86_capability);
+		else if (wrmsr_safe(MSR_AMD64_DE_CFG,
+				    value | AMD64_DE_CFG_LFENCE_SERIALISE) ||
+			 rdmsr_safe(MSR_AMD64_DE_CFG, value) ||
+			 !(value & AMD64_DE_CFG_LFENCE_SERIALISE))
+			/* Attempt to set failed.  Assume the safer default. */
+			__clear_bit(X86_FEATURE_LFENCE_DISPATCH,
+				    c->x86_capability);
+		else
+			/* Successfully enabled! */
+			__set_bit(X86_FEATURE_LFENCE_DISPATCH,
+				  c->x86_capability);
+	}
+
 	/* MFENCE stops RDTSC speculation */
-	__set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
+	if (!cpu_has_lfence_dispatch)
+		__set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
 
 	switch(c->x86)
 	{
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index ec380acd2e..5288bbfdd2 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -21,6 +21,7 @@ XEN_CPUFEATURE(XEN_SMEP,        (FSCAPINTS+0)*32+ 10) /* SMEP gets used by Xen i
 XEN_CPUFEATURE(XEN_SMAP,        (FSCAPINTS+0)*32+ 11) /* SMAP gets used by Xen itself */
 XEN_CPUFEATURE(MSR_PLATFORM_INFO, (FSCAPINTS+0)*32+12) /* PLATFORM_INFO MSR present */
 XEN_CPUFEATURE(MSR_MISC_FEATURES, (FSCAPINTS+0)*32+13) /* MISC_FEATURES_ENABLES MSR present */
+XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+14) /* lfence set as Dispatch Serialising */
 
 #define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */
 
@@ -96,6 +97,7 @@ XEN_CPUFEATURE(MSR_MISC_FEATURES, (FSCAPINTS+0)*32+13) /* MISC_FEATURES_ENABLES
 #define cpu_has_eist		boot_cpu_has(X86_FEATURE_EIST)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 #define cpu_has_cmp_legacy	boot_cpu_has(X86_FEATURE_CMP_LEGACY)
+#define cpu_has_lfence_dispatch boot_cpu_has(X86_FEATURE_LFENCE_DISPATCH)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,
diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
index 4f3f8d51e6..c45d0cd652 100644
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -204,6 +204,7 @@
 #define MSR_AMD64_IC_CFG		0xc0011021
 #define MSR_AMD64_DC_CFG		0xc0011022
 #define MSR_AMD64_DE_CFG		0xc0011029
+#define AMD64_DE_CFG_LFENCE_SERIALISE	(_AC(1, ULL) << 1)
 
 #define MSR_AMD64_DR0_ADDRESS_MASK	0xc0011027
 #define MSR_AMD64_DR1_ADDRESS_MASK	0xc0011019
-- 
2.14.3


From ca9583d9e705aaa74da121e920ebf77d9f7995b2 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:49:07 +0100
Subject: [PATCH 27/45] x86: Introduce alternative indirect thunks

Depending on hardware and microcode availability, we will want to replace
IND_THUNK_REPOLINE with other implementations.

For AMD hardware, choose IND_THUNK_LFENCE in preference to retpoline if lfence
is known to be (or was successfully made) dispatch serialising.

This is part of XSA-254.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: 858cba0d4c6b6b45180afcb41561fd6585ad51a3
master date: 2018-01-16 17:45:50 +0000
---
 docs/misc/xen-command-line.markdown | 16 ++++++++
 xen/arch/x86/indirect-thunk.S       | 17 +++++++--
 xen/arch/x86/spec_ctrl.c            | 75 +++++++++++++++++++++++++++++++++++--
 xen/include/asm-x86/cpufeature.h    |  2 +
 4 files changed, 104 insertions(+), 6 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index 768d4f5f55..5ecb5eb1f8 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -252,6 +252,22 @@ and not running softirqs. Reduce this if softirqs are not being run frequently
 enough. Setting this to a high value may cause boot failure, particularly if
 the NMI watchdog is also enabled.
 
+### bti (x86)
+> `= List of [ thunk=retpoline|lfence|jmp ]`
+
+Branch Target Injection controls.  By default, Xen will pick the most
+appropriate BTI mitigations based on compiled in support, loaded microcode,
+and hardware details.
+
+**WARNING: Any use of this option may interfere with heuristics.  Use with
+extreme care.**
+
+If Xen was compiled with INDIRECT_THUNK support, `thunk=` can be used to
+select which of the thunks gets patched into the `__x86_indirect_thunk_%reg`
+locations.  The default thunk is `retpoline` (generally preferred for Intel
+hardware), with the alternatives being `jmp` (a `jmp *%reg` gadget, minimal
+overhead), and `lfence` (an `lfence; jmp *%reg` gadget, preferred for AMD).
+
 ### xenheap\_megabytes (arm32)
 > `= <size>`
 
diff --git a/xen/arch/x86/indirect-thunk.S b/xen/arch/x86/indirect-thunk.S
index 3eaf505d0e..7d34707218 100644
--- a/xen/arch/x86/indirect-thunk.S
+++ b/xen/arch/x86/indirect-thunk.S
@@ -21,15 +21,26 @@
         ret
 .endm
 
+.macro IND_THUNK_LFENCE reg:req
+        lfence
+        jmp *%\reg
+.endm
+
+.macro IND_THUNK_JMP reg:req
+        jmp *%\reg
+.endm
+
 /*
- * Build the __x86_indirect_thunk_* symbols.  Currently implement the
- * retpoline thunk only.
+ * Build the __x86.indirect_thunk.* symbols.  Execution lands on an
+ * alternative patch point which implements one of the above THUNK_*'s
  */
 .macro GEN_INDIRECT_THUNK reg:req
         .section .text.__x86_indirect_thunk_\reg, "ax", @progbits
 
 ENTRY(__x86_indirect_thunk_\reg)
-        IND_THUNK_RETPOLINE \reg
+        ALTERNATIVE_2 __stringify(IND_THUNK_RETPOLINE \reg),              \
+        __stringify(IND_THUNK_LFENCE \reg), X86_FEATURE_IND_THUNK_LFENCE, \
+        __stringify(IND_THUNK_JMP \reg),    X86_FEATURE_IND_THUNK_JMP
 .endm
 
 /* Instantiate GEN_INDIRECT_THUNK for each register except %rsp. */
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index 256701a43c..d601c028d8 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -16,18 +16,54 @@
  *
  * Copyright (c) 2017-2018 Citrix Systems Ltd.
  */
+#include <xen/errno.h>
 #include <xen/init.h>
 #include <xen/lib.h>
 
 #include <asm/processor.h>
 #include <asm/spec_ctrl.h>
 
-enum ind_thunk {
+static enum ind_thunk {
     THUNK_DEFAULT, /* Decide which thunk to use at boot time. */
     THUNK_NONE,    /* Missing compiler support for thunks. */
 
     THUNK_RETPOLINE,
-};
+    THUNK_LFENCE,
+    THUNK_JMP,
+} opt_thunk __initdata = THUNK_DEFAULT;
+
+static int __init parse_bti(const char *s)
+{
+    const char *ss;
+    int rc = 0;
+
+    do {
+        ss = strchr(s, ',');
+        if ( !ss )
+            ss = strchr(s, '\0');
+
+        if ( !strncmp(s, "thunk=", 6) )
+        {
+            s += 6;
+
+            if ( !strncmp(s, "retpoline", ss - s) )
+                opt_thunk = THUNK_RETPOLINE;
+            else if ( !strncmp(s, "lfence", ss - s) )
+                opt_thunk = THUNK_LFENCE;
+            else if ( !strncmp(s, "jmp", ss - s) )
+                opt_thunk = THUNK_JMP;
+            else
+                rc = -EINVAL;
+        }
+        else
+            rc = -EINVAL;
+
+        s = ss + 1;
+    } while ( *ss );
+
+    return rc;
+}
+custom_param("bti", parse_bti);
 
 static void __init print_details(enum ind_thunk thunk)
 {
@@ -40,13 +76,40 @@ static void __init print_details(enum ind_thunk thunk)
     printk(XENLOG_INFO
            "BTI mitigations: Thunk %s\n",
            thunk == THUNK_NONE      ? "N/A" :
-           thunk == THUNK_RETPOLINE ? "RETPOLINE" : "?");
+           thunk == THUNK_RETPOLINE ? "RETPOLINE" :
+           thunk == THUNK_LFENCE    ? "LFENCE" :
+           thunk == THUNK_JMP       ? "JMP" : "?");
 }
 
 void __init init_speculation_mitigations(void)
 {
     enum ind_thunk thunk = THUNK_DEFAULT;
 
+    /*
+     * Has the user specified any custom BTI mitigations?  If so, follow their
+     * instructions exactly and disable all heuristics.
+     */
+    if ( opt_thunk != THUNK_DEFAULT )
+    {
+        thunk = opt_thunk;
+    }
+    else
+    {
+        /*
+         * Evaluate the safest Branch Target Injection mitigations to use.
+         * First, begin with compiler-aided mitigations.
+         */
+        if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) )
+        {
+            /*
+             * AMD's recommended mitigation is to set lfence as being dispatch
+             * serialising, and to use IND_THUNK_LFENCE.
+             */
+            if ( cpu_has_lfence_dispatch )
+                thunk = THUNK_LFENCE;
+        }
+    }
+
     /*
      * Supplimentary minor adjustments.  Without compiler support, there are
      * no thunks.
@@ -61,6 +124,12 @@ void __init init_speculation_mitigations(void)
     if ( thunk == THUNK_DEFAULT )
         thunk = THUNK_RETPOLINE;
 
+    /* Apply the chosen settings. */
+    if ( thunk == THUNK_LFENCE )
+        setup_force_cpu_cap(X86_FEATURE_IND_THUNK_LFENCE);
+    else if ( thunk == THUNK_JMP )
+        setup_force_cpu_cap(X86_FEATURE_IND_THUNK_JMP);
+
     print_details(thunk);
 }
 
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 5288bbfdd2..4dbcf863e0 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -22,6 +22,8 @@ XEN_CPUFEATURE(XEN_SMAP,        (FSCAPINTS+0)*32+ 11) /* SMAP gets used by Xen i
 XEN_CPUFEATURE(MSR_PLATFORM_INFO, (FSCAPINTS+0)*32+12) /* PLATFORM_INFO MSR present */
 XEN_CPUFEATURE(MSR_MISC_FEATURES, (FSCAPINTS+0)*32+13) /* MISC_FEATURES_ENABLES MSR present */
 XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+14) /* lfence set as Dispatch Serialising */
+XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+15) /* Use IND_THUNK_LFENCE */
+XEN_CPUFEATURE(IND_THUNK_JMP,   (FSCAPINTS+0)*32+16) /* Use IND_THUNK_JMP */
 
 #define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */
 
-- 
2.14.3


From da49e518d79ca6c405a244889cab57ac8ed097cb Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:49:41 +0100
Subject: [PATCH 28/45] x86/feature: Definitions for Indirect Branch Controls

Contemporary processors are gaining Indirect Branch Controls via microcode
updates.  Intel are introducing one bit to indicate IBRS and IBPB support, and
a second bit for STIBP.  AMD are introducing IBPB only, so enumerate it with a
separate bit.

Furthermore, depending on compiler and microcode availability, we may want to
run Xen with IBRS set, or clear.

To use these facilities, we synthesise separate IBRS and IBPB bits for
internal use.  A lot of infrastructure is required before these features are
safe to offer to guests.

This is part of XSA-254.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
master commit: 0d703a701cc4bc47773986b2796eebd28b1439b5
master date: 2018-01-16 17:45:50 +0000
---
 tools/libxc/xc_cpuid_x86.c                  |  8 ++++++--
 tools/libxl/libxl_cpuid.c                   |  3 +++
 tools/misc/xen-cpuid.c                      | 16 +++++++++++++++-
 xen/arch/x86/cpu/common.c                   |  2 +-
 xen/arch/x86/spec_ctrl.c                    | 17 +++++++++++++++++
 xen/include/asm-x86/cpufeature.h            |  3 +++
 xen/include/asm-x86/msr-index.h             |  8 ++++++++
 xen/include/public/arch-x86/cpufeatureset.h |  5 +++++
 xen/tools/gen-cpuid.py                      |  5 +++++
 9 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
index 2ad9aebfce..2779925314 100644
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -547,13 +547,15 @@ static void xc_cpuid_hvm_policy(xc_interface *xch,
         {
             regs[1] = info->featureset[featureword_of(X86_FEATURE_FSGSBASE)];
             regs[2] = info->featureset[featureword_of(X86_FEATURE_PREFETCHWT1)];
+            regs[3] = info->featureset[featureword_of(X86_FEATURE_IBRSB)];
         }
         else
         {
             regs[1] = 0;
             regs[2] = 0;
+            regs[3] = 0;
         }
-        regs[0] = regs[3] = 0;
+        regs[0] = 0;
         break;
 
     case 0x0000000d:
@@ -638,13 +640,15 @@ static void xc_cpuid_pv_policy(xc_interface *xch,
         {
             regs[1] = info->featureset[featureword_of(X86_FEATURE_FSGSBASE)];
             regs[2] = info->featureset[featureword_of(X86_FEATURE_PREFETCHWT1)];
+            regs[3] = info->featureset[featureword_of(X86_FEATURE_IBRSB)];
         }
         else
         {
             regs[1] = 0;
             regs[2] = 0;
+            regs[3] = 0;
         }
-        regs[0] = regs[3] = 0;
+        regs[0] = 0;
         break;
 
     case 0x0000000d:
diff --git a/tools/libxl/libxl_cpuid.c b/tools/libxl/libxl_cpuid.c
index 24591e2461..8292654c86 100644
--- a/tools/libxl/libxl_cpuid.c
+++ b/tools/libxl/libxl_cpuid.c
@@ -158,6 +158,8 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str)
         {"de",           0x00000001, NA, CPUID_REG_EDX,  2,  1},
         {"vme",          0x00000001, NA, CPUID_REG_EDX,  1,  1},
         {"fpu",          0x00000001, NA, CPUID_REG_EDX,  0,  1},
+        {"ibrsb",        0x00000007,  0, CPUID_REG_EDX, 26,  1},
+        {"stibp",        0x00000007,  0, CPUID_REG_EDX, 27,  1},
         {"topoext",      0x80000001, NA, CPUID_REG_ECX, 22,  1},
         {"tbm",          0x80000001, NA, CPUID_REG_ECX, 21,  1},
         {"nodeid",       0x80000001, NA, CPUID_REG_ECX, 19,  1},
@@ -187,6 +189,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str)
         {"nx",           0x80000001, NA, CPUID_REG_EDX, 20,  1},
         {"syscall",      0x80000001, NA, CPUID_REG_EDX, 11,  1},
         {"procpkg",      0x00000004,  0, CPUID_REG_EAX, 26,  6},
+        {"ibpb",         0x80000008, NA, CPUID_REG_EBX, 12,  1},
         {"apicidsize",   0x80000008, NA, CPUID_REG_ECX, 12,  4},
         {"nc",           0x80000008, NA, CPUID_REG_ECX,  0,  8},
         {"svm_npt",      0x8000000a, NA, CPUID_REG_EDX,  0,  1},
diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
index 44991f6329..0981366e06 100644
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -140,7 +140,20 @@ static const char *str_e8b[32] =
 {
     [ 0] = "clzero",
 
-    [1 ... 31] = "REZ",
+    [1 ... 11] = "REZ",
+
+    [12] = "ibpb",
+
+    [13 ... 31] = "REZ",
+};
+
+static const char *str_7d0[32] =
+{
+    [0 ... 25] = "REZ",
+
+    [26] = "ibrsb",         [27] = "stibp",
+
+    [28 ... 31] = "REZ",
 };
 
 static struct {
@@ -158,6 +171,7 @@ static struct {
     { "0x00000007:0.ecx", "7c0", str_7c0 },
     { "0x80000007.edx",   "e7d", str_e7d },
     { "0x80000008.ebx",   "e8b", str_e8b },
+    { "0x00000007:0.edx", "7d0", str_7d0 },
 };
 
 #define COL_ALIGN "18"
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 3e48882385..948b0a91ea 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -350,7 +350,7 @@ static void generic_identify(struct cpuinfo_x86 *c)
 		cpuid_count(0x00000007, 0, &tmp,
 			    &c->x86_capability[cpufeat_word(X86_FEATURE_FSGSBASE)],
 			    &c->x86_capability[cpufeat_word(X86_FEATURE_PKU)],
-			    &tmp);
+			    &c->x86_capability[cpufeat_word(X86_FEATURE_IBRSB)]);
 }
 
 /*
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index d601c028d8..89e7287e43 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -67,8 +67,25 @@ custom_param("bti", parse_bti);
 
 static void __init print_details(enum ind_thunk thunk)
 {
+    unsigned int _7d0 = 0, e8b = 0, tmp;
+
+    /* Collect diagnostics about available mitigations. */
+    if ( boot_cpu_data.cpuid_level >= 7 )
+        cpuid_count(7, 0, &tmp, &tmp, &tmp, &_7d0);
+    if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 )
+        cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp);
+
     printk(XENLOG_DEBUG "Speculative mitigation facilities:\n");
 
+    /* Hardware features which pertain to speculative mitigations. */
+    if ( (_7d0 & (cpufeat_mask(X86_FEATURE_IBRSB) |
+                  cpufeat_mask(X86_FEATURE_STIBP))) ||
+         (e8b & cpufeat_mask(X86_FEATURE_IBPB)) )
+        printk(XENLOG_DEBUG "  Hardware features:%s%s%s\n",
+               (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
+               (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP"     : "",
+               (e8b  & cpufeat_mask(X86_FEATURE_IBPB))  ? " IBPB"      : "");
+
     /* Compiled-in support which pertains to BTI mitigations. */
     if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) )
         printk(XENLOG_DEBUG "  Compiled-in support: INDIRECT_THUNK\n");
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 4dbcf863e0..dc3a3357ef 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -24,6 +24,9 @@ XEN_CPUFEATURE(MSR_MISC_FEATURES, (FSCAPINTS+0)*32+13) /* MISC_FEATURES_ENABLES
 XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+14) /* lfence set as Dispatch Serialising */
 XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+15) /* Use IND_THUNK_LFENCE */
 XEN_CPUFEATURE(IND_THUNK_JMP,   (FSCAPINTS+0)*32+16) /* Use IND_THUNK_JMP */
+XEN_CPUFEATURE(XEN_IBPB,        (FSCAPINTS+0)*32+17) /* IBRSB || IBPB */
+XEN_CPUFEATURE(XEN_IBRS_SET,    (FSCAPINTS+0)*32+18) /* IBRSB && IRBS set in Xen */
+XEN_CPUFEATURE(XEN_IBRS_CLEAR,  (FSCAPINTS+0)*32+19) /* IBRSB && IBRS clear in Xen */
 
 #define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */
 
diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
index c45d0cd652..fc5bf799fe 100644
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -31,6 +31,14 @@
 #define EFER_LMSLE		(1<<_EFER_LMSLE)
 #define EFER_FFXSE		(1<<_EFER_FFXSE)
 
+/* Speculation Controls. */
+#define MSR_SPEC_CTRL			0x00000048
+#define SPEC_CTRL_IBRS			(_AC(1, ULL) << 0)
+#define SPEC_CTRL_STIBP			(_AC(1, ULL) << 1)
+
+#define MSR_PRED_CMD			0x00000049
+#define PRED_CMD_IBPB			(_AC(1, ULL) << 0)
+
 /* Intel MSRs. Some also available on other CPUs */
 #define MSR_IA32_PERFCTR0		0x000000c1
 #define MSR_IA32_A_PERFCTR0		0x000004c1
diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
index 9320c9e97c..a6040b2cc8 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -233,6 +233,11 @@ XEN_CPUFEATURE(EFRO,          7*32+10) /*   APERF/MPERF Read Only interface */
 
 /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */
 XEN_CPUFEATURE(CLZERO,        8*32+ 0) /*A  CLZERO instruction */
+XEN_CPUFEATURE(IBPB,          8*32+12) /*   IBPB support only (no IBRS, used by AMD) */
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
+XEN_CPUFEATURE(IBRSB,         9*32+26) /*   IBRS and IBPB support (used by Intel) */
+XEN_CPUFEATURE(STIBP,         9*32+27) /*   STIBP */
 
 #endif /* XEN_CPUFEATURE */
 
diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
index 005cad9b43..9b2cb6f7a6 100755
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -254,6 +254,11 @@ def crunch_numbers(state):
         # are built on top of AVX512F
         AVX512F: [AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
                   AVX512BW, AVX512VL, AVX512VBMI],
+
+        # Single Thread Indirect Branch Predictors enumerates a new bit in the
+        # MSR enumerated by Indirect Branch Restricted Speculation/Indirect
+        # Branch Prediction Barrier enumeration.
+        IBRSB: [STIBP],
     }
 
     deep_features = tuple(sorted(deps.keys()))
-- 
2.14.3


From 532ccf4fd55cfd916f56279a71852585d726ab23 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:50:16 +0100
Subject: [PATCH 29/45] x86/cmdline: Introduce a command line option to disable
 IBRS/IBPB, STIBP and IBPB

Instead of gaining yet another top level boolean, introduce a more generic
cpuid= option.  Also introduce a helper function to parse a generic boolean
value.

This is part of XSA-254.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

xen/cmdline: Fix parse_boolean() for unadorned values

A command line such as "cpuid=no-ibrsb,no-stibp" tickles a bug in
parse_boolean() because the separating comma fails the NUL case.

Instead, check for slen == nlen which accounts for the boundary (if any)
passed via the 'e' parameter.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: 7850b1c00749df834ea2ad0c1f5d9364c4838795
master date: 2018-01-16 17:45:50 +0000
master commit: ac37ec1ddef234eeba6f438c29ff687c64962ebd
master date: 2018-01-31 10:47:12 +0000
---
 docs/misc/xen-command-line.markdown | 12 ++++++++++++
 xen/arch/x86/cpuid.c                | 35 +++++++++++++++++++++++++++++++++++
 xen/common/kernel.c                 | 36 ++++++++++++++++++++++++++++++++++++
 xen/include/xen/lib.h               |  7 +++++++
 4 files changed, 90 insertions(+)

diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index 5ecb5eb1f8..709e4de66a 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -434,6 +434,18 @@ choice of `dom0-kernel` is deprecated and not supported by all Dom0 kernels.
   respectively.
 * `verbose` option can be included as a string or also as `verbose=<integer>`
 
+### cpuid (x86)
+> `= List of comma separated booleans`
+
+This option allows for fine tuning of the facilities Xen will use, after
+accounting for hardware capabilities as enumerated via CPUID.
+
+Currently accepted:
+
+The Speculation Control hardware features `ibrsb`, `stibp`, `ibpb` are used by
+default if avaiable.  They can be ignored, e.g. `no-ibrsb`, at which point Xen
+won't use them itself, and won't offer them to guests.
+
 ### cpuid\_mask\_cpu (AMD only)
 > `= fam_0f_rev_c | fam_0f_rev_d | fam_0f_rev_e | fam_0f_rev_f | fam_0f_rev_g | fam_10_rev_b | fam_10_rev_c | fam_11_rev_b`
 
diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 63b2db99b8..7a7c8de489 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -17,6 +17,41 @@ uint32_t __read_mostly raw_featureset[FSCAPINTS];
 uint32_t __read_mostly pv_featureset[FSCAPINTS];
 uint32_t __read_mostly hvm_featureset[FSCAPINTS];
 
+static int __init parse_xen_cpuid(const char *s)
+{
+    const char *ss;
+    int val, rc = 0;
+
+    do {
+        ss = strchr(s, ',');
+        if ( !ss )
+            ss = strchr(s, '\0');
+
+        if ( (val = parse_boolean("ibpb", s, ss)) >= 0 )
+        {
+            if ( !val )
+                setup_clear_cpu_cap(X86_FEATURE_IBPB);
+        }
+        else if ( (val = parse_boolean("ibrsb", s, ss)) >= 0 )
+        {
+            if ( !val )
+                setup_clear_cpu_cap(X86_FEATURE_IBRSB);
+        }
+        else if ( (val = parse_boolean("stibp", s, ss)) >= 0 )
+        {
+            if ( !val )
+                setup_clear_cpu_cap(X86_FEATURE_STIBP);
+        }
+        else
+            rc = -EINVAL;
+
+        s = ss + 1;
+    } while ( *ss );
+
+    return rc;
+}
+custom_param("cpuid", parse_xen_cpuid);
+
 static void __init sanitise_featureset(uint32_t *fs)
 {
     /* for_each_set_bit() uses unsigned longs.  Extend with zeroes. */
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index d0edb13d4b..ed49c62a46 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -166,6 +166,42 @@ int __init parse_bool(const char *s)
     return -1;
 }
 
+int parse_boolean(const char *name, const char *s, const char *e)
+{
+    size_t slen, nlen;
+    int val = !!strncmp(s, "no-", 3);
+
+    if ( !val )
+        s += 3;
+
+    slen = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s);
+    nlen = strlen(name);
+
+    /* Does s now start with name? */
+    if ( slen < nlen || strncmp(s, name, nlen) )
+        return -1;
+
+    /* Exact, unadorned name?  Result depends on the 'no-' prefix. */
+    if ( slen == nlen )
+        return val;
+
+    /* =$SOMETHING?  Defer to the regular boolean parsing. */
+    if ( s[nlen] == '=' )
+    {
+        char buf[8];
+
+        s += nlen + 1;
+        if ( e <= s || e - s >= ARRAY_SIZE(buf) )
+            return -1;
+        memcpy(buf, s, e - s);
+        buf[e - s] = 0;
+        return parse_bool(buf);
+    }
+
+    /* Unrecognised.  Give up. */
+    return -1;
+}
+
 unsigned int tainted;
 
 /**
diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h
index d1171b774b..b9d1c87ffd 100644
--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -68,6 +68,13 @@ struct domain;
 void cmdline_parse(const char *cmdline);
 int parse_bool(const char *s);
 
+/**
+ * Given a specific name, parses a string of the form:
+ *   [no-]$NAME[=...]
+ * returning 0 or 1 for a recognised boolean, or -1 for an error.
+ */
+int parse_boolean(const char *name, const char *s, const char *e);
+
 /*#define DEBUG_TRACE_DUMP*/
 #ifdef DEBUG_TRACE_DUMP
 extern void debugtrace_dump(void);
-- 
2.14.3


From 2cd189eb55af8b04185b473ac2885f76b3d87efe Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Thu, 8 Feb 2018 12:51:18 +0100
Subject: [PATCH 31/45] x86: fix GET_STACK_END

AIUI the purpose of having the .if directive is to make GET_STACK_END
work with any general purpose registers. The code as-is would produce
the wrong result for r8. Fix it.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
master commit: 8155476765a5bdecea1534b46562cf28e0113a9a
master date: 2018-01-25 11:34:17 +0000
---
 xen/include/asm-x86/asm_defns.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
index 358deffb5b..edd7f05acc 100644
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -120,7 +120,7 @@ void ret_from_intr(void);
 
 #define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field)
 #define GET_STACK_END(reg)                        \
-        .if .Lr##reg > 8;                         \
+        .if .Lr##reg >= 8;                        \
         movq $STACK_SIZE-1, %r##reg;              \
         .else;                                    \
         movl $STACK_SIZE-1, %e##reg;              \
-- 
2.14.3


From c3d195cd91385531ed12af2576bfedcab3118211 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:51:55 +0100
Subject: [PATCH 32/45] x86/cpuid: Handling of IBRS/IBPB, STIBP and IBRS for
 guests

Intel specifies IBRS/IBPB (combined, in a single bit) and STIBP as a separate
bit.  AMD specifies IBPB alone in a 3rd bit.

AMD's IBPB is a subset of Intel's combined IBRS/IBPB.  For performance
reasons, administrators might wish to express "IBPB only" even on Intel
hardware, so we allow the AMD bit to be used for this purpose.

The behaviour of STIBP is more complicated.

It is our current understanding that STIBP will be advertised on HT-capable
hardware irrespective of whether HT is enabled, but not advertised on
HT-incapable hardware.  However, for ease of virtualisation, STIBP's
functionality is ignored rather than reserved by microcode/hardware on
HT-incapable hardware.

For guest safety, we treat STIBP as special, always override the toolstack
choice, and always advertise STIBP if IBRS is available.  This removes the
corner case where STIBP is not advertised, but the guest is running on
HT-capable hardware where it does matter.

Finally as a bugfix, update the libxc CPUID logic to understand the e8b
feature leaf, which has the side effect of also offering CLZERO to guests on
applicable hardware.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: d297b56682e730d598e2529cc6998151d3b6f6f8
master date: 2018-01-26 14:10:21 +0000
---
 tools/libxc/xc_cpuid_x86.c                  |  4 +++-
 xen/arch/x86/cpuid.c                        | 20 ++++++++++++++++++++
 xen/arch/x86/domctl.c                       | 22 ++++++++++++++++++----
 xen/include/public/arch-x86/cpufeatureset.h |  2 +-
 4 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
index 2779925314..978451aa55 100644
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -583,7 +583,9 @@ static void xc_cpuid_hvm_policy(xc_interface *xch,
 
     case 0x80000008:
         regs[0] &= 0x0000ffffu;
-        regs[1] = regs[3] = 0;
+        regs[1] = info->featureset[featureword_of(X86_FEATURE_CLZERO)];
+        /* regs[2] handled in the per-vendor logic. */
+        regs[3] = 0;
         break;
 
     case 0x00000002: /* Intel cache info (dumped by AMD policy) */
diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 7a7c8de489..451952cabe 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -154,6 +154,16 @@ static void __init calculate_pv_featureset(void)
     __set_bit(X86_FEATURE_X2APIC, pv_featureset);
     __set_bit(X86_FEATURE_CMP_LEGACY, pv_featureset);
 
+    /* On hardware with IBRS/IBPB support, there are further adjustments. */
+    if ( test_bit(X86_FEATURE_IBRSB, pv_featureset) )
+    {
+        /* Offer STIBP unconditionally.  It is a nop on non-HT hardware. */
+        __set_bit(X86_FEATURE_STIBP, pv_featureset);
+
+        /* AMD's IBPB is a subset of IBRS/IBPB. */
+        __set_bit(X86_FEATURE_IBPB, pv_featureset);
+    }
+
     sanitise_featureset(pv_featureset);
 }
 
@@ -210,6 +220,16 @@ static void __init calculate_hvm_featureset(void)
             __clear_bit(X86_FEATURE_XSAVES, hvm_featureset);
     }
 
+    /* On hardware with IBRS/IBPB support, there are further adjustments. */
+    if ( test_bit(X86_FEATURE_IBRSB, hvm_featureset) )
+    {
+        /* Offer STIBP unconditionally.  It is a nop on non-HT hardware. */
+        __set_bit(X86_FEATURE_STIBP, hvm_featureset);
+
+        /* AMD's IBPB is a subset of IBRS/IBPB. */
+        __set_bit(X86_FEATURE_IBPB, hvm_featureset);
+    }
+
     sanitise_featureset(hvm_featureset);
 }
 
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 84e585613a..2b5cab27d2 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -49,7 +49,7 @@ static int gdbsx_guest_mem_io(domid_t domid, struct xen_domctl_gdbsx_memio *iop)
 }
 
 static void update_domain_cpuid_info(struct domain *d,
-                                     const xen_domctl_cpuid_t *ctl)
+                                     cpuid_input_t *ctl)
 {
     bool call_policy_changed = false; /* Avoid for_each_vcpu() unnecessarily */
 
@@ -169,6 +169,18 @@ static void update_domain_cpuid_info(struct domain *d,
 
             d->arch.pv_domain.cpuidmasks->_7ab0 = mask;
         }
+
+        /*
+         * Override STIBP to match IBRS.  Guests can safely use STIBP
+         * functionality on non-HT hardware, but can't necesserily protect
+         * themselves from SP2/Spectre/Branch Target Injection if STIBP is
+         * hidden on HT-capable hardware.
+         */
+        if ( ctl->edx & cpufeat_mask(X86_FEATURE_IBRSB) )
+            ctl->edx |= cpufeat_mask(X86_FEATURE_STIBP);
+        else
+            ctl->edx &= ~cpufeat_mask(X86_FEATURE_STIBP);
+
         break;
 
     case 0xd:
@@ -902,16 +914,18 @@ long arch_do_domctl(
         {
             if ( i < MAX_CPUID_INPUT )
                 cpuid->input[0] = XEN_CPUID_INPUT_UNUSED;
+            else
+                cpuid = NULL;
         }
         else if ( i < MAX_CPUID_INPUT )
             *cpuid = *ctl;
         else if ( unused )
-            *unused = *ctl;
+            *(cpuid = unused) = *ctl;
         else
             ret = -ENOENT;
 
-        if ( !ret )
-            update_domain_cpuid_info(d, ctl);
+        if ( !ret && cpuid )
+            update_domain_cpuid_info(d, cpuid);
 
         domain_unpause(d);
         break;
diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
index a6040b2cc8..5f96c1c23e 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -237,7 +237,7 @@ XEN_CPUFEATURE(IBPB,          8*32+12) /*   IBPB support only (no IBRS, used by
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
 XEN_CPUFEATURE(IBRSB,         9*32+26) /*   IBRS and IBPB support (used by Intel) */
-XEN_CPUFEATURE(STIBP,         9*32+27) /*   STIBP */
+XEN_CPUFEATURE(STIBP,         9*32+27) /*!  STIBP */
 
 #endif /* XEN_CPUFEATURE */
 
-- 
2.14.3


From 29e7171e9dd0aa8e35f790157d781dff22f6a970 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:52:37 +0100
Subject: [PATCH 33/45] x86/msr: Emulation of MSR_{SPEC_CTRL,PRED_CMD} for
 guests

As per the spec currently available here:

https://software.intel.com/sites/default/files/managed/c5/63/336996-Speculative-Execution-Side-Channel-Mitigations.pdf

MSR_ARCH_CAPABILITIES will only come into existence on new hardware, but is
implemented as a straight #GP for now to avoid being leaky when new hardware
arrives.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: ea58a679a6190e714a592f1369b660769a48a80c
master date: 2018-01-26 14:10:21 +0000
---
 xen/arch/x86/domain.c           |  5 ++--
 xen/arch/x86/hvm/hvm.c          | 54 +++++++++++++++++++++++++++++++++++++++--
 xen/arch/x86/traps.c            | 49 +++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/domain.h    |  4 ++-
 xen/include/asm-x86/msr-index.h |  2 ++
 5 files changed, 108 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 7e74832af5..db2e57e7ce 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2684,7 +2684,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
 }
 
 void domain_cpuid(
-    struct domain *d,
+    const struct domain *d,
     unsigned int  input,
     unsigned int  sub_input,
     unsigned int  *eax,
@@ -2692,12 +2692,11 @@ void domain_cpuid(
     unsigned int  *ecx,
     unsigned int  *edx)
 {
-    cpuid_input_t *cpuid;
     int i;
 
     for ( i = 0; i < MAX_CPUID_INPUT; i++ )
     {
-        cpuid = &d->arch.cpuids[i];
+        const cpuid_input_t *cpuid = &d->arch.cpuids[i];
 
         if ( (cpuid->input[0] == input) &&
              ((cpuid->input[1] == XEN_CPUID_INPUT_UNUSED) ||
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index a5ad7ca1c3..876dcfe662 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3840,7 +3840,7 @@ int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
 
     switch ( msr )
     {
-        unsigned int eax, ebx, ecx, index;
+        unsigned int eax, ebx, ecx, edx, index;
 
     case MSR_EFER:
         *msr_content = v->arch.hvm_vcpu.guest_efer;
@@ -3927,6 +3927,21 @@ int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
             goto gp_fault;
         break;
 
+    case MSR_PRED_CMD:
+        /* Write-only */
+        goto gp_fault;
+
+    case MSR_SPEC_CTRL:
+        hvm_cpuid(7, NULL, NULL, NULL, &edx);
+        if ( !(edx & cpufeat_mask(X86_FEATURE_IBRSB)) )
+            goto gp_fault;
+        *msr_content = v->arch.spec_ctrl;
+        break;
+
+    case MSR_ARCH_CAPABILITIES:
+        /* Not implemented yet. */
+        goto gp_fault;
+
     case MSR_K8_ENABLE_C1E:
     case MSR_AMD64_NB_CFG:
          /*
@@ -3993,7 +4008,7 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
 
     switch ( msr )
     {
-        unsigned int eax, ebx, ecx, index;
+        unsigned int eax, ebx, ecx, edx, index;
 
     case MSR_EFER:
         if ( hvm_set_efer(msr_content) )
@@ -4094,6 +4109,41 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
             goto gp_fault;
         break;
 
+    case MSR_SPEC_CTRL:
+        hvm_cpuid(7, NULL, NULL, NULL, &edx);
+        if ( !(edx & cpufeat_mask(X86_FEATURE_IBRSB)) )
+            goto gp_fault; /* MSR available? */
+
+        /*
+         * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored)
+         * when STIBP isn't enumerated in hardware.
+         */
+
+        if ( msr_content & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) )
+            goto gp_fault; /* Rsvd bit set? */
+
+        v->arch.spec_ctrl = msr_content;
+        break;
+
+    case MSR_PRED_CMD:
+        hvm_cpuid(7, NULL, NULL, NULL, &edx);
+        hvm_cpuid(0x80000008, NULL, &ebx, NULL, NULL);
+        if ( !(edx & cpufeat_mask(X86_FEATURE_IBRSB)) &&
+             !(ebx & cpufeat_mask(X86_FEATURE_IBPB)) )
+            goto gp_fault; /* MSR available? */
+
+        /*
+         * The only defined behaviour is when writing PRED_CMD_IBPB.  In
+         * practice, real hardware accepts any value without faulting.
+         */
+        if ( msr_content & PRED_CMD_IBPB )
+            wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB);
+        break;
+
+    case MSR_ARCH_CAPABILITIES:
+        /* Read-only */
+        goto gp_fault;
+
     case MSR_AMD64_NB_CFG:
         /* ignore the write */
         break;
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 5d6ccde9f9..7c6af7069d 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2431,6 +2431,7 @@ static int priv_op_read_msr(unsigned int reg, uint64_t *val,
     switch ( reg )
     {
         int rc;
+        uint32_t edx, dummy;
 
     case MSR_FS_BASE:
         if ( is_pv_32bit_domain(currd) )
@@ -2504,6 +2505,17 @@ static int priv_op_read_msr(unsigned int reg, uint64_t *val,
         *val = 0;
         return X86EMUL_OKAY;
 
+    case MSR_PRED_CMD:
+        /* Write-only */
+        break;
+
+    case MSR_SPEC_CTRL:
+        domain_cpuid(currd, 7, 0, &dummy, &dummy, &dummy, &edx);
+        if ( !(edx & cpufeat_mask(X86_FEATURE_IBRSB)) )
+            break;
+        *val = curr->arch.spec_ctrl;
+        return X86EMUL_OKAY;
+
     case MSR_INTEL_PLATFORM_INFO:
         if ( !boot_cpu_has(X86_FEATURE_MSR_PLATFORM_INFO) )
             break;
@@ -2512,6 +2524,10 @@ static int priv_op_read_msr(unsigned int reg, uint64_t *val,
             *val |= MSR_PLATFORM_INFO_CPUID_FAULTING;
         return X86EMUL_OKAY;
 
+    case MSR_ARCH_CAPABILITIES:
+        /* Not implemented yet. */
+        break;
+
     case MSR_INTEL_MISC_FEATURES_ENABLES:
         if ( !boot_cpu_has(X86_FEATURE_MSR_MISC_FEATURES) )
             break;
@@ -2577,6 +2593,7 @@ static int priv_op_write_msr(unsigned int reg, uint64_t val,
     {
         uint64_t temp;
         int rc;
+        uint32_t ebx, edx, dummy;
 
     case MSR_FS_BASE:
         if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
@@ -2716,9 +2733,41 @@ static int priv_op_write_msr(unsigned int reg, uint64_t val,
         return X86EMUL_OKAY;
 
     case MSR_INTEL_PLATFORM_INFO:
+    case MSR_ARCH_CAPABILITIES:
         /* The MSR is read-only. */
         break;
 
+    case MSR_SPEC_CTRL:
+        domain_cpuid(currd, 7, 0, &dummy, &dummy, &dummy, &edx);
+        if ( !(edx & cpufeat_mask(X86_FEATURE_IBRSB)) )
+            break; /* MSR available? */
+
+        /*
+         * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored)
+         * when STIBP isn't enumerated in hardware.
+         */
+
+        if ( val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) )
+            break; /* Rsvd bit set? */
+
+        curr->arch.spec_ctrl = val;
+        return X86EMUL_OKAY;
+
+    case MSR_PRED_CMD:
+        domain_cpuid(currd, 7, 0, &dummy, &dummy, &dummy, &edx);
+        domain_cpuid(currd, 0x80000008, 0, &dummy, &ebx, &dummy, &dummy);
+        if ( !(edx & cpufeat_mask(X86_FEATURE_IBRSB)) &&
+             !(ebx & cpufeat_mask(X86_FEATURE_IBPB)) )
+            break; /* MSR available? */
+
+        /*
+         * The only defined behaviour is when writing PRED_CMD_IBPB.  In
+         * practice, real hardware accepts any value without faulting.
+         */
+        if ( val & PRED_CMD_IBPB )
+            wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB);
+        return X86EMUL_OKAY;
+
     case MSR_INTEL_MISC_FEATURES_ENABLES:
         if ( !boot_cpu_has(X86_FEATURE_MSR_MISC_FEATURES) ||
              (val & ~MSR_MISC_FEATURES_CPUID_FAULTING) )
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 33f1c88af9..8699fa31b8 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -576,6 +576,8 @@ struct arch_vcpu
 
     struct paging_vcpu paging;
 
+    uint32_t spec_ctrl;
+
     uint32_t gdbsx_vcpu_event;
 
     /* A secondary copy of the vcpu time info. */
@@ -615,7 +617,7 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
              X86_CR4_OSXSAVE | X86_CR4_SMEP |               \
              X86_CR4_FSGSBASE | X86_CR4_SMAP))
 
-void domain_cpuid(struct domain *d,
+void domain_cpuid(const struct domain *d,
                   unsigned int  input,
                   unsigned int  sub_input,
                   unsigned int  *eax,
diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
index fc5bf799fe..05ad0b979c 100644
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -39,6 +39,8 @@
 #define MSR_PRED_CMD			0x00000049
 #define PRED_CMD_IBPB			(_AC(1, ULL) << 0)
 
+#define MSR_ARCH_CAPABILITIES		0x0000010a
+
 /* Intel MSRs. Some also available on other CPUs */
 #define MSR_IA32_PERFCTR0		0x000000c1
 #define MSR_IA32_A_PERFCTR0		0x000004c1
-- 
2.14.3


From e6bcb416a5f5489366fc20f45fd92a703ad96e15 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:53:11 +0100
Subject: [PATCH 34/45] x86/migrate: Move MSR_SPEC_CTRL on migrate

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: 0cf2a4eb769302b7d7d7835540e7b2f15006df30
master date: 2018-01-26 14:10:21 +0000
---
 xen/arch/x86/domctl.c      | 31 +++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/vmx/vmx.c | 23 +++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 2b5cab27d2..7b2dddcf2a 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1272,6 +1272,8 @@ long arch_do_domctl(
                 vmsrs->msr_count = nr_msrs;
             else
             {
+                uint32_t edx, dummy;
+
                 i = 0;
 
                 vcpu_pause(v);
@@ -1322,6 +1324,21 @@ long arch_do_domctl(
                     ++i;
                 }
 
+                domain_cpuid(d, 7, 0, &dummy, &dummy, &dummy, &edx);
+                if ( (edx & cpufeat_mask(X86_FEATURE_IBRSB)) &&
+                     v->arch.spec_ctrl )
+                {
+                    if ( i < vmsrs->msr_count && !ret )
+                    {
+                        msr.index = MSR_SPEC_CTRL;
+                        msr.reserved = 0;
+                        msr.value = v->arch.spec_ctrl;
+                        if ( copy_to_guest_offset(vmsrs->msrs, i, &msr, 1) )
+                            ret = -EFAULT;
+                    }
+                    ++i;
+                }
+
                 vcpu_unpause(v);
 
                 if ( i > vmsrs->msr_count && !ret )
@@ -1349,6 +1366,20 @@ long arch_do_domctl(
 
                 switch ( msr.index )
                 {
+                case MSR_SPEC_CTRL:
+                    if ( !boot_cpu_has(X86_FEATURE_IBRSB) )
+                        break; /* MSR available? */
+
+                    /*
+                     * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e.
+                     * ignored) when STIBP isn't enumerated in hardware.
+                     */
+
+                    if ( msr.value & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) )
+                        break;
+                    v->arch.spec_ctrl = msr.value;
+                    continue;
+
                 case MSR_INTEL_MISC_FEATURES_ENABLES:
                     v->arch.cpuid_faulting = !!(msr.value &
                                                 MSR_MISC_FEATURES_CPUID_FAULTING);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index a7053527f1..e217a09bac 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -787,12 +787,15 @@ static int vmx_load_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
 static unsigned int __init vmx_init_msr(void)
 {
     return 1 /* MISC_FEATURES_ENABLES */ +
+           !!boot_cpu_has(X86_FEATURE_IBRSB) +
            (cpu_has_mpx && cpu_has_vmx_mpx) +
            (cpu_has_xsaves && cpu_has_vmx_xsaves);
 }
 
 static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt)
 {
+    uint32_t edx, dummy;
+
     vmx_vmcs_enter(v);
 
     if ( v->arch.cpuid_faulting )
@@ -801,6 +804,13 @@ static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt)
         ctxt->msr[ctxt->count++].val = MSR_MISC_FEATURES_CPUID_FAULTING;
     }
 
+    domain_cpuid(v->domain, 7, 0, &dummy, &dummy, &dummy, &edx);
+    if ( (edx & cpufeat_mask(X86_FEATURE_IBRSB)) && v->arch.spec_ctrl )
+    {
+        ctxt->msr[ctxt->count].index = MSR_SPEC_CTRL;
+        ctxt->msr[ctxt->count++].val = v->arch.spec_ctrl;
+    }
+
     if ( cpu_has_mpx && cpu_has_vmx_mpx )
     {
         __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val);
@@ -829,6 +839,19 @@ static int vmx_load_msr(struct vcpu *v, struct hvm_msr *ctxt)
     {
         switch ( ctxt->msr[i].index )
         {
+        case MSR_SPEC_CTRL:
+            if ( !boot_cpu_has(X86_FEATURE_IBRSB) )
+                err = -ENXIO; /* MSR available? */
+            /*
+             * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e.
+             * ignored) when STIBP isn't enumerated in hardware.
+             */
+            else if ( ctxt->msr[i].val &
+                      ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) )
+                err = -ENXIO;
+            else
+                v->arch.spec_ctrl = ctxt->msr[i].val;
+            break;
         case MSR_INTEL_MISC_FEATURES_ENABLES:
             v->arch.cpuid_faulting = !!(ctxt->msr[i].val &
                                         MSR_MISC_FEATURES_CPUID_FAULTING);
-- 
2.14.3


From ff570a3ee0b42a036df1e8c2b05730192ad4bd90 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:53:40 +0100
Subject: [PATCH 35/45] x86/hvm: Permit guests direct access to
 MSR_{SPEC_CTRL,PRED_CMD}

For performance reasons, HVM guests should have direct access to these MSRs
when possible.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
master commit: 5a2fe171144ebcc908ea1fca45058d6010f6a286
master date: 2018-01-26 14:10:21 +0000
---
 xen/arch/x86/domctl.c      | 13 +++++++++++++
 xen/arch/x86/hvm/svm/svm.c |  7 +++++++
 xen/arch/x86/hvm/vmx/vmx.c | 21 +++++++++++++++++++++
 3 files changed, 41 insertions(+)

diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 7b2dddcf2a..0c9fe39a18 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -181,6 +181,11 @@ static void update_domain_cpuid_info(struct domain *d,
         else
             ctl->edx &= ~cpufeat_mask(X86_FEATURE_STIBP);
 
+        /*
+         * If the IBRS/IBPB policy has changed, we need to recalculate the MSR
+         * interception bitmaps.
+         */
+        call_policy_changed = is_hvm_domain(d);
         break;
 
     case 0xd:
@@ -241,6 +246,14 @@ static void update_domain_cpuid_info(struct domain *d,
             d->arch.pv_domain.cpuidmasks->e1cd = mask;
         }
         break;
+
+    case 0x80000008:
+        /*
+         * If the IBPB policy has changed, we need to recalculate the MSR
+         * interception bitmaps.
+         */
+        call_policy_changed = is_hvm_domain(d);
+        break;
     }
 
     if ( call_policy_changed )
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 1b84aa6f0a..19e02658be 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -612,6 +612,7 @@ static void svm_cpuid_policy_changed(struct vcpu *v)
     struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
     struct vmcb_struct *vmcb = arch_svm->vmcb;
     u32 bitmap = vmcb_get_exception_intercepts(vmcb);
+    uint32_t ebx, dummy;
 
     if ( opt_hvm_fep ||
          (v->domain->arch.x86_vendor != boot_cpu_data.x86_vendor) )
@@ -620,6 +621,12 @@ static void svm_cpuid_policy_changed(struct vcpu *v)
         bitmap &= ~(1U << TRAP_invalid_op);
 
     vmcb_set_exception_intercepts(vmcb, bitmap);
+
+    /* Give access to MSR_PRED_CMD if the guest has been told about it. */
+    domain_cpuid(v->domain, 0x80000008, 0, &dummy, &ebx, &dummy, &dummy);
+    svm_intercept_msr(v, MSR_PRED_CMD,
+                      ebx & cpufeat_mask(X86_FEATURE_IBPB) ? MSR_INTERCEPT_NONE
+                                                           : MSR_INTERCEPT_RW);
 }
 
 static void svm_sync_vmcb(struct vcpu *v)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index e217a09bac..eb67cb9002 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -545,6 +545,8 @@ void vmx_update_exception_bitmap(struct vcpu *v)
 
 static void vmx_cpuid_policy_changed(struct vcpu *v)
 {
+    uint32_t _7d0, e8b, dummy;
+
     if ( opt_hvm_fep ||
          (v->domain->arch.x86_vendor != boot_cpu_data.x86_vendor) )
         v->arch.hvm_vmx.exception_bitmap |= (1U << TRAP_invalid_op);
@@ -554,6 +556,25 @@ static void vmx_cpuid_policy_changed(struct vcpu *v)
     vmx_vmcs_enter(v);
     vmx_update_exception_bitmap(v);
     vmx_vmcs_exit(v);
+
+    domain_cpuid(v->domain, 7, 0, &dummy, &dummy, &dummy, &_7d0);
+    domain_cpuid(v->domain, 0x80000008, 0, &dummy, &e8b, &dummy, &dummy);
+
+    /*
+     * We can safely pass MSR_SPEC_CTRL through to the guest, even if STIBP
+     * isn't enumerated in hardware, as SPEC_CTRL_STIBP is ignored.
+     */
+    if ( _7d0 & cpufeat_mask(X86_FEATURE_IBRSB) )
+        vmx_disable_intercept_for_msr(v, MSR_SPEC_CTRL, MSR_TYPE_R | MSR_TYPE_W);
+    else
+        vmx_enable_intercept_for_msr(v, MSR_SPEC_CTRL, MSR_TYPE_R | MSR_TYPE_W);
+
+    /* MSR_PRED_CMD is safe to pass through if the guest knows about it. */
+    if ( (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ||
+         (e8b & cpufeat_mask(X86_FEATURE_IBPB)) )
+        vmx_disable_intercept_for_msr(v, MSR_PRED_CMD, MSR_TYPE_R | MSR_TYPE_W);
+    else
+        vmx_enable_intercept_for_msr(v, MSR_PRED_CMD, MSR_TYPE_R | MSR_TYPE_W);
 }
 
 static int vmx_guest_x86_mode(struct vcpu *v)
-- 
2.14.3


From e09a5c2917506cf9d95d85f65b2df158a494649c Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:54:28 +0100
Subject: [PATCH 36/45] x86/entry: Organise the use of MSR_SPEC_CTRL at each
 entry/exit point

We need to be able to either set or clear IBRS in Xen context, as well as
restore appropriate guest values in guest context.  See the documentation in
asm-x86/spec_ctrl_asm.h for details.

With the contemporary microcode, writes to %cr3 are slower when SPEC_CTRL.IBRS
is set.  Therefore, the positioning of SPEC_CTRL_{ENTRY/EXIT}* is important.

Ideally, the IBRS_SET/IBRS_CLEAR hunks might be positioned either side of the
%cr3 change, but that is rather more complicated to arrange, and could still
result in a guest controlled value in SPEC_CTRL during the %cr3 change,
negating the saving if the guest chose to have IBRS set.

Therefore, we optimise for the pre-Skylake case (being far more common in the
field than Skylake and later, at the moment), where we have a Xen-preferred
value of IBRS clear when switching %cr3.

There is a semi-unrelated bugfix, where various asm_defn.h macros have a
hidden dependency on PAGE_SIZE, which results in an assembler error if used in
a .macro definition.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: 5e7962901131186d3514528ed57c7a9901a15a3e
master date: 2018-01-26 14:10:21 +0000
---
 xen/arch/x86/hvm/svm/entry.S        |  10 +-
 xen/arch/x86/hvm/vmx/entry.S        |  18 +++
 xen/arch/x86/setup.c                |   1 +
 xen/arch/x86/smpboot.c              |   2 +
 xen/arch/x86/x86_64/asm-offsets.c   |   3 +
 xen/arch/x86/x86_64/compat/entry.S  |  13 +++
 xen/arch/x86/x86_64/entry.S         |  47 +++++++-
 xen/include/asm-x86/asm_defns.h     |   3 +
 xen/include/asm-x86/current.h       |   6 +
 xen/include/asm-x86/nops.h          |   6 +
 xen/include/asm-x86/spec_ctrl.h     |   9 ++
 xen/include/asm-x86/spec_ctrl_asm.h | 224 ++++++++++++++++++++++++++++++++++++
 12 files changed, 336 insertions(+), 6 deletions(-)
 create mode 100644 xen/include/asm-x86/spec_ctrl_asm.h

diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
index 64f0dcd909..289e94639c 100644
--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -78,6 +78,11 @@ UNLIKELY_END(svm_trace)
         or   $X86_EFLAGS_MBS,%rax
         mov  %rax,VMCB_rflags(%rcx)
 
+        mov VCPU_arch_spec_ctrl(%rbx), %eax
+
+        /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+        SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
         pop  %r15
         pop  %r14
         pop  %r13
@@ -100,8 +105,11 @@ UNLIKELY_END(svm_trace)
         SAVE_ALL
 
         GET_CURRENT(bx)
-        mov  VCPU_svm_vmcb(%rbx),%rcx
 
+        SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
+        mov  VCPU_svm_vmcb(%rbx),%rcx
         movb $0,VCPU_svm_vmcb_in_sync(%rbx)
         mov  VMCB_rax(%rcx),%rax
         mov  %rax,UREGS_rax(%rsp)
diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
index cc6188e7db..7aa0e852ee 100644
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -37,6 +37,9 @@ ENTRY(vmx_asm_vmexit_handler)
         movb $1,VCPU_vmx_launched(%rbx)
         mov  %rax,VCPU_hvm_guest_cr2(%rbx)
 
+        SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         mov  %rsp,%rdi
         call vmx_vmexit_handler
 
@@ -65,6 +68,12 @@ UNLIKELY_END(realmode)
 
         mov  %rsp,%rdi
         call vmx_vmenter_helper
+
+        mov VCPU_arch_spec_ctrl(%rbx), %eax
+
+        /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+        SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
         mov  VCPU_hvm_guest_cr2(%rbx),%rax
 
         pop  %r15
@@ -96,6 +105,15 @@ UNLIKELY_END(realmode)
 .Lvmx_vmentry_fail:
         sti
         SAVE_ALL
+
+        /*
+         * PV variant needed here as no guest code has executed (so
+         * MSR_SPEC_CTRL can't have changed value), and NMIs/MCEs are liable
+         * to hit (in which case the HVM variant might corrupt things).
+         */
+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         call vmx_vmentry_failure
         BUG  /* vmx_vmentry_failure() shouldn't return. */
 
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 930230d4b9..160b36b0ea 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -661,6 +661,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     set_processor_id(0);
     set_current((struct vcpu *)0xfffff000); /* debug sanity. */
     idle_vcpu[0] = current;
+    init_shadow_spec_ctrl_state();
 
     percpu_init_areas();
 
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index c19508f960..21a8685739 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -41,6 +41,7 @@
 #include <asm/flushtlb.h>
 #include <asm/msr.h>
 #include <asm/mtrr.h>
+#include <asm/spec_ctrl.h>
 #include <asm/time.h>
 #include <asm/tboot.h>
 #include <mach_apic.h>
@@ -300,6 +301,7 @@ void start_secondary(void *unused)
     set_current(idle_vcpu[cpu]);
     this_cpu(curr_vcpu) = idle_vcpu[cpu];
     rdmsrl(MSR_EFER, this_cpu(efer));
+    init_shadow_spec_ctrl_state();
 
     /*
      * Just as during early bootstrap, it is convenient here to disable
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index 325abdc4f2..128d8f70a5 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -88,6 +88,7 @@ void __dummy__(void)
     OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss);
     OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl);
     OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags);
+    OFFSET(VCPU_arch_spec_ctrl, struct vcpu, arch.spec_ctrl);
     OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending);
     OFFSET(VCPU_mce_pending, struct vcpu, mce_pending);
     OFFSET(VCPU_nmi_old_mask, struct vcpu, nmi_state.old_mask);
@@ -139,6 +140,8 @@ void __dummy__(void)
     OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
     OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3);
     OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3);
+    OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl);
+    OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl);
     DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
     BLANK();
 
diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index c8f68a01a0..9f65e91d7b 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -17,6 +17,10 @@ ENTRY(compat_hypercall)
         pushq $0
         movl  $TRAP_syscall, 4(%rsp)
         SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */
+
+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         CR4_PV32_RESTORE
 
         cmpb  $0,untrusted_msi(%rip)
@@ -148,6 +152,12 @@ ENTRY(compat_restore_all_guest)
         .popsection
         or    $X86_EFLAGS_IF,%r11
         mov   %r11d,UREGS_eflags(%rsp)
+
+        mov VCPU_arch_spec_ctrl(%rbx), %eax
+
+        /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+        SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
         RESTORE_ALL adj=8 compat=1
 .Lft0:  iretq
         _ASM_PRE_EXTABLE(.Lft0, handle_exception)
@@ -206,6 +216,9 @@ ENTRY(cstar_enter)
         movl  $TRAP_syscall, 4(%rsp)
         SAVE_ALL
 
+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         GET_STACK_END(bx)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 20536d44aa..083318e0b8 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -37,6 +37,9 @@ ENTRY(switch_to_kernel)
 restore_all_guest:
         ASSERT_INTERRUPTS_DISABLED
 
+        /* Stash guest SPEC_CTRL value while we can read struct vcpu. */
+        mov   VCPU_arch_spec_ctrl(%rbx), %r15d
+
         /* Copy guest mappings and switch to per-CPU root page table. */
         mov   %cr3, %r9
         GET_STACK_END(dx)
@@ -64,6 +67,12 @@ restore_all_guest:
         write_cr3 rax, rdi, rsi
 .Lrag_keep_cr3:
 
+        /* Restore stashed SPEC_CTRL value. */
+        mov   %r15d, %eax
+
+        /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+        SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
         RESTORE_ALL
         testw $TRAP_syscall,4(%rsp)
         jz    iret_exit_to_guest
@@ -102,9 +111,9 @@ restore_all_xen:
          * Check whether we need to switch to the per-CPU page tables, in
          * case we return to late PV exit code (from an NMI or #MC).
          */
-        GET_STACK_END(ax)
-        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rax), %rdx
-        mov   STACK_CPUINFO_FIELD(pv_cr3)(%rax), %rax
+        GET_STACK_END(bx)
+        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rdx
+        mov   STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax
         test  %rdx, %rdx
         /*
          * Ideally the condition would be "nsz", but such doesn't exist,
@@ -114,6 +123,9 @@ UNLIKELY_START(g, exit_cr3)
         write_cr3 rax, rdi, rsi
 UNLIKELY_END(exit_cr3)
 
+        /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+        SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */
+
         RESTORE_ALL adj=8
         iretq
 
@@ -144,6 +156,9 @@ ENTRY(lstar_enter)
         movl  $TRAP_syscall, 4(%rsp)
         SAVE_ALL
 
+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         GET_STACK_END(bx)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
@@ -247,6 +262,9 @@ GLOBAL(sysenter_eflags_saved)
         movl  $TRAP_syscall, 4(%rsp)
         SAVE_ALL
 
+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         GET_STACK_END(bx)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
@@ -293,6 +311,9 @@ ENTRY(int80_direct_trap)
         movl  $0x80, 4(%rsp)
         SAVE_ALL
 
+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         GET_STACK_END(bx)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
@@ -462,6 +483,10 @@ ENTRY(common_interrupt)
         SAVE_ALL CLAC
 
         GET_STACK_END(14)
+
+        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
         mov   %rcx, %r15
         neg   %rcx
@@ -500,6 +525,10 @@ GLOBAL(handle_exception)
         SAVE_ALL CLAC
 
         GET_STACK_END(14)
+
+        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
         mov   %rcx, %r15
         neg   %rcx
@@ -693,8 +722,12 @@ ENTRY(double_fault)
         /* Set AC to reduce chance of further SMAP faults */
         SAVE_ALL STAC
 
-        GET_STACK_END(bx)
-        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rbx
+        GET_STACK_END(14)
+
+        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
+        mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx
         test  %rbx, %rbx
         jz    .Ldblf_cr3_okay
         jns   .Ldblf_cr3_load
@@ -723,6 +756,10 @@ handle_ist_exception:
         SAVE_ALL CLAC
 
         GET_STACK_END(14)
+
+        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
         mov   %rcx, %r15
         neg   %rcx
diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
index edd7f05acc..d42c85899d 100644
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -7,6 +7,7 @@
 #include <asm/asm-offsets.h>
 #endif
 #include <asm/bug.h>
+#include <asm/page.h>
 #include <asm/processor.h>
 #include <asm/percpu.h>
 #include <xen/stringify.h>
@@ -376,4 +377,6 @@ static always_inline void stac(void)
 #define REX64_PREFIX "rex64/"
 #endif
 
+#include <asm/spec_ctrl_asm.h>
+
 #endif /* __X86_ASM_DEFNS_H__ */
diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
index 397fa4c38f..25415922af 100644
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -54,6 +54,12 @@ struct cpu_info {
      */
     unsigned long xen_cr3;
     unsigned long pv_cr3;
+
+    /* See asm-x86/spec_ctrl_asm.h for usage. */
+    unsigned int shadow_spec_ctrl;
+    bool         use_shadow_spec_ctrl;
+
+    unsigned long __pad;
     /* get_stack_bottom() must be 16-byte aligned */
 };
 
diff --git a/xen/include/asm-x86/nops.h b/xen/include/asm-x86/nops.h
index 1368a971df..f2c709995c 100644
--- a/xen/include/asm-x86/nops.h
+++ b/xen/include/asm-x86/nops.h
@@ -63,6 +63,12 @@
 #define ASM_NOP7 _ASM_MK_NOP(K8_NOP7)
 #define ASM_NOP8 _ASM_MK_NOP(K8_NOP8)
 
+#define ASM_NOP17 ASM_NOP8; ASM_NOP7; ASM_NOP2
+#define ASM_NOP21 ASM_NOP8; ASM_NOP8; ASM_NOP5
+#define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8
+#define ASM_NOP29 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP5
+#define ASM_NOP32 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8
+
 #define ASM_NOP_MAX 8
 
 #endif /* __X86_ASM_NOPS_H__ */
diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
index e088a551da..b451250282 100644
--- a/xen/include/asm-x86/spec_ctrl.h
+++ b/xen/include/asm-x86/spec_ctrl.h
@@ -20,8 +20,17 @@
 #ifndef __X86_SPEC_CTRL_H__
 #define __X86_SPEC_CTRL_H__
 
+#include <asm/current.h>
+
 void init_speculation_mitigations(void);
 
+static inline void init_shadow_spec_ctrl_state(void)
+{
+    struct cpu_info *info = get_cpu_info();
+
+    info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0;
+}
+
 #endif /* !__X86_SPEC_CTRL_H__ */
 
 /*
diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
new file mode 100644
index 0000000000..ecf33a50e4
--- /dev/null
+++ b/xen/include/asm-x86/spec_ctrl_asm.h
@@ -0,0 +1,224 @@
+/******************************************************************************
+ * include/asm-x86/spec_ctrl.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2017-2018 Citrix Systems Ltd.
+ */
+
+#ifndef __X86_SPEC_CTRL_ASM_H__
+#define __X86_SPEC_CTRL_ASM_H__
+
+#ifdef __ASSEMBLY__
+#include <asm/msr-index.h>
+
+/*
+ * Saving and restoring MSR_SPEC_CTRL state is a little tricky.
+ *
+ * We want the guests choice of SPEC_CTRL while in guest context, and Xen's
+ * choice (set or clear, depending on the hardware) while running in Xen
+ * context.  Therefore, a simplistic algorithm is:
+ *
+ *  - Set/clear IBRS on entry to Xen
+ *  - Set the guests' choice on exit to guest
+ *  - Leave SPEC_CTRL unchanged on exit to xen
+ *
+ * There are two complicating factors:
+ *  1) HVM guests can have direct access to the MSR, so it can change
+ *     behind Xen's back.
+ *  2) An NMI or MCE can interrupt at any point, including early in the entry
+ *     path, or late in the exit path after restoring the guest value.  This
+ *     will corrupt the guest value.
+ *
+ * Factor 1 is dealt with by relying on NMIs/MCEs being blocked immediately
+ * after VMEXIT.  The VMEXIT-specific code reads MSR_SPEC_CTRL and updates
+ * current before loading Xen's MSR_SPEC_CTRL setting.
+ *
+ * Factor 2 is harder.  We maintain a shadow_spec_ctrl value, and
+ * use_shadow_spec_ctrl boolean per cpu.  The synchronous use is:
+ *
+ *  1) Store guest value in shadow_spec_ctrl
+ *  2) Set use_shadow_spec_ctrl boolean
+ *  3) Load guest value into MSR_SPEC_CTRL
+ *  4) Exit to guest
+ *  5) Entry from guest
+ *  6) Clear use_shadow_spec_ctrl boolean
+ *  7) Load Xen's value into MSR_SPEC_CTRL
+ *
+ * The asynchronous use for interrupts/exceptions is:
+ *  -  Set/clear IBRS on entry to Xen
+ *  -  On exit to Xen, check use_shadow_spec_ctrl
+ *  -  If set, load shadow_spec_ctrl
+ *
+ * Therefore, an interrupt/exception which hits the synchronous path between
+ * steps 2 and 6 will restore the shadow value rather than leaving Xen's value
+ * loaded and corrupting the value used in guest context.
+ *
+ * The following ASM fragments implement this algorithm.  See their local
+ * comments for further details.
+ *  - SPEC_CTRL_ENTRY_FROM_VMEXIT
+ *  - SPEC_CTRL_ENTRY_FROM_PV
+ *  - SPEC_CTRL_ENTRY_FROM_INTR
+ *  - SPEC_CTRL_EXIT_TO_XEN
+ *  - SPEC_CTRL_EXIT_TO_GUEST
+ */
+
+.macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req
+/*
+ * Requires %rbx=current, %rsp=regs/cpuinfo
+ * Clobbers %rax, %rcx, %rdx
+ *
+ * The common case is that a guest has direct access to MSR_SPEC_CTRL, at
+ * which point we need to save the guest value before setting IBRS for Xen.
+ * Unilaterally saving the guest value is shorter and faster than checking.
+ */
+    mov $MSR_SPEC_CTRL, %ecx
+    rdmsr
+
+    /* Stash the value from hardware. */
+    mov %eax, VCPU_arch_spec_ctrl(%rbx)
+    xor %edx, %edx
+
+    /* Clear SPEC_CTRL shadowing *before* loading Xen's value. */
+    movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp)
+
+    /* Load Xen's intended value. */
+    mov $\ibrs_val, %eax
+    wrmsr
+.endm
+
+.macro DO_SPEC_CTRL_ENTRY maybexen:req ibrs_val:req
+/*
+ * Requires %rsp=regs (also cpuinfo if !maybexen)
+ * Requires %r14=stack_end (if maybexen)
+ * Clobbers %rax, %rcx, %rdx
+ *
+ * PV guests can't update MSR_SPEC_CTRL behind Xen's back, so no need to read
+ * it back.  Entries from guest context need to clear SPEC_CTRL shadowing,
+ * while entries from Xen must leave shadowing in its current state.
+ */
+    mov $MSR_SPEC_CTRL, %ecx
+    xor %edx, %edx
+
+    /*
+     * Clear SPEC_CTRL shadowing *before* loading Xen's value.  If entering
+     * from a possibly-xen context, %rsp doesn't necessarily alias the cpuinfo
+     * block so calculate the position directly.
+     */
+    .if \maybexen
+        /* Branchless `if ( !xen ) clear_shadowing` */
+        testb $3, UREGS_cs(%rsp)
+        setz %al
+        and %al, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14)
+    .else
+        movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp)
+    .endif
+
+    /* Load Xen's intended value. */
+    mov $\ibrs_val, %eax
+    wrmsr
+.endm
+
+.macro DO_SPEC_CTRL_EXIT_TO_XEN
+/*
+ * Requires %rbx=stack_end
+ * Clobbers %rax, %rcx, %rdx
+ *
+ * When returning to Xen context, look to see whether SPEC_CTRL shadowing is
+ * in effect, and reload the shadow value.  This covers race conditions which
+ * exist with an NMI/MCE/etc hitting late in the return-to-guest path.
+ */
+    xor %edx, %edx
+
+    cmpb %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%rbx)
+    je .L\@_skip
+
+    mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
+    mov $MSR_SPEC_CTRL, %ecx
+    wrmsr
+
+.L\@_skip:
+.endm
+
+.macro DO_SPEC_CTRL_EXIT_TO_GUEST
+/*
+ * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo
+ * Clobbers %rcx, %rdx
+ *
+ * When returning to guest context, set up SPEC_CTRL shadowing and load the
+ * guest value.
+ */
+    /* Set up shadow value *before* enabling shadowing. */
+    mov %eax, CPUINFO_shadow_spec_ctrl(%rsp)
+
+    /* Set SPEC_CTRL shadowing *before* loading the guest value. */
+    movb $1, CPUINFO_use_shadow_spec_ctrl(%rsp)
+
+    mov $MSR_SPEC_CTRL, %ecx
+    xor %edx, %edx
+    wrmsr
+.endm
+
+/* Use after a VMEXIT from an HVM guest. */
+#define SPEC_CTRL_ENTRY_FROM_VMEXIT                                     \
+    ALTERNATIVE_2 __stringify(ASM_NOP32),                               \
+        __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT                      \
+                    ibrs_val=SPEC_CTRL_IBRS),                           \
+        X86_FEATURE_XEN_IBRS_SET,                                       \
+        __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT                      \
+                    ibrs_val=0),                                        \
+        X86_FEATURE_XEN_IBRS_CLEAR
+
+/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
+#define SPEC_CTRL_ENTRY_FROM_PV                                         \
+    ALTERNATIVE_2 __stringify(ASM_NOP21),                               \
+        __stringify(DO_SPEC_CTRL_ENTRY maybexen=0                       \
+                    ibrs_val=SPEC_CTRL_IBRS),                           \
+        X86_FEATURE_XEN_IBRS_SET,                                       \
+        __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 ibrs_val=0),          \
+        X86_FEATURE_XEN_IBRS_CLEAR
+
+/* Use in interrupt/exception context.  May interrupt Xen or PV context. */
+#define SPEC_CTRL_ENTRY_FROM_INTR                                       \
+    ALTERNATIVE_2 __stringify(ASM_NOP29),                               \
+        __stringify(DO_SPEC_CTRL_ENTRY maybexen=1                       \
+                    ibrs_val=SPEC_CTRL_IBRS),                           \
+        X86_FEATURE_XEN_IBRS_SET,                                       \
+        __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 ibrs_val=0),          \
+        X86_FEATURE_XEN_IBRS_CLEAR
+
+/* Use when exiting to Xen context. */
+#define SPEC_CTRL_EXIT_TO_XEN                                           \
+    ALTERNATIVE_2 __stringify(ASM_NOP17),                               \
+        DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_SET,             \
+        DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_CLEAR
+
+/* Use when exiting to guest context. */
+#define SPEC_CTRL_EXIT_TO_GUEST                                         \
+    ALTERNATIVE_2 __stringify(ASM_NOP24),                               \
+        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_SET,           \
+        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_CLEAR
+
+#endif /* __ASSEMBLY__ */
+#endif /* !__X86_SPEC_CTRL_ASM_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
2.14.3


From 86153856f857f786b95ecc4f81260477d75dc15c Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:55:17 +0100
Subject: [PATCH 37/45] x86/entry: Organise the clobbering of the RSB/RAS on
 entry to Xen

ret instructions are speculated directly to values recorded in the Return
Stack Buffer/Return Address Stack, as there is no uncertainty in well-formed
code.  Guests can take advantage of this in two ways:

  1) If they can find a path in Xen which executes more ret instructions than
     call instructions.  (At least one in the waitqueue infrastructure,
     probably others.)

  2) Use the fact that the RSB/RAS in hardware is actually a circular stack
     without a concept of empty.  (When it logically empties, stale values
     will start being used.)

To mitigate, overwrite the RSB on entry to Xen with gadgets which will capture
and contain rogue speculation.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: e6c0128e9ab25bf66df11377a33ee5584d7f99e3
master date: 2018-01-26 14:10:21 +0000
---
 xen/include/asm-x86/cpufeature.h    |  2 ++
 xen/include/asm-x86/nops.h          |  1 +
 xen/include/asm-x86/spec_ctrl_asm.h | 44 +++++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index dc3a3357ef..1074f4b0c9 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -27,6 +27,8 @@ XEN_CPUFEATURE(IND_THUNK_JMP,   (FSCAPINTS+0)*32+16) /* Use IND_THUNK_JMP */
 XEN_CPUFEATURE(XEN_IBPB,        (FSCAPINTS+0)*32+17) /* IBRSB || IBPB */
 XEN_CPUFEATURE(XEN_IBRS_SET,    (FSCAPINTS+0)*32+18) /* IBRSB && IRBS set in Xen */
 XEN_CPUFEATURE(XEN_IBRS_CLEAR,  (FSCAPINTS+0)*32+19) /* IBRSB && IBRS clear in Xen */
+XEN_CPUFEATURE(RSB_NATIVE,      (FSCAPINTS+0)*32+20) /* RSB overwrite needed for native */
+XEN_CPUFEATURE(RSB_VMEXIT,      (FSCAPINTS+0)*32+21) /* RSB overwrite needed for vmexit */
 
 #define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */
 
diff --git a/xen/include/asm-x86/nops.h b/xen/include/asm-x86/nops.h
index f2c709995c..f00bd16a70 100644
--- a/xen/include/asm-x86/nops.h
+++ b/xen/include/asm-x86/nops.h
@@ -68,6 +68,7 @@
 #define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8
 #define ASM_NOP29 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP5
 #define ASM_NOP32 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8
+#define ASM_NOP40 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8
 
 #define ASM_NOP_MAX 8
 
diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
index ecf33a50e4..56dc65e3dc 100644
--- a/xen/include/asm-x86/spec_ctrl_asm.h
+++ b/xen/include/asm-x86/spec_ctrl_asm.h
@@ -74,6 +74,44 @@
  *  - SPEC_CTRL_EXIT_TO_GUEST
  */
 
+.macro DO_OVERWRITE_RSB
+/*
+ * Requires nothing
+ * Clobbers %rax, %rcx
+ *
+ * Requires 256 bytes of stack space, but %rsp has no net change. Based on
+ * Google's performance numbers, the loop is unrolled to 16 iterations and two
+ * calls per iteration.
+ *
+ * The call filling the RSB needs a nonzero displacement.  A nop would do, but
+ * we use "1: pause; lfence; jmp 1b" to safely contains any ret-based
+ * speculation, even if the loop is speculatively executed prematurely.
+ *
+ * %rsp is preserved by using an extra GPR because a) we've got plenty spare,
+ * b) the two movs are shorter to encode than `add $32*8, %rsp`, and c) can be
+ * optimised with mov-elimination in modern cores.
+ */
+    mov $16, %ecx                   /* 16 iterations, two calls per loop */
+    mov %rsp, %rax                  /* Store the current %rsp */
+
+.L\@_fill_rsb_loop:
+
+    .irp n, 1, 2                    /* Unrolled twice. */
+    call .L\@_insert_rsb_entry_\n   /* Create an RSB entry. */
+
+.L\@_capture_speculation_\n:
+    pause
+    lfence
+    jmp .L\@_capture_speculation_\n /* Capture rogue speculation. */
+
+.L\@_insert_rsb_entry_\n:
+    .endr
+
+    sub $1, %ecx
+    jnz .L\@_fill_rsb_loop
+    mov %rax, %rsp                  /* Restore old %rsp */
+.endm
+
 .macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req
 /*
  * Requires %rbx=current, %rsp=regs/cpuinfo
@@ -172,6 +210,8 @@
 
 /* Use after a VMEXIT from an HVM guest. */
 #define SPEC_CTRL_ENTRY_FROM_VMEXIT                                     \
+    ALTERNATIVE __stringify(ASM_NOP40),                                 \
+        DO_OVERWRITE_RSB, X86_FEATURE_RSB_VMEXIT;                       \
     ALTERNATIVE_2 __stringify(ASM_NOP32),                               \
         __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT                      \
                     ibrs_val=SPEC_CTRL_IBRS),                           \
@@ -182,6 +222,8 @@
 
 /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
 #define SPEC_CTRL_ENTRY_FROM_PV                                         \
+    ALTERNATIVE __stringify(ASM_NOP40),                                 \
+        DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE;                       \
     ALTERNATIVE_2 __stringify(ASM_NOP21),                               \
         __stringify(DO_SPEC_CTRL_ENTRY maybexen=0                       \
                     ibrs_val=SPEC_CTRL_IBRS),                           \
@@ -191,6 +233,8 @@
 
 /* Use in interrupt/exception context.  May interrupt Xen or PV context. */
 #define SPEC_CTRL_ENTRY_FROM_INTR                                       \
+    ALTERNATIVE __stringify(ASM_NOP40),                                 \
+        DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE;                       \
     ALTERNATIVE_2 __stringify(ASM_NOP29),                               \
         __stringify(DO_SPEC_CTRL_ENTRY maybexen=1                       \
                     ibrs_val=SPEC_CTRL_IBRS),                           \
-- 
2.14.3


From 64c1742b206344c51db130b0bb47fc299a1462ca Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:55:50 +0100
Subject: [PATCH 38/45] x86/entry: Avoid using alternatives in NMI/#MC paths

This patch is deliberately arranged to be easy to revert if/when alternatives
patching becomes NMI/#MC safe.

For safety, there must be a dispatch serialising instruction in (what is
logically) DO_SPEC_CTRL_ENTRY so that, in the case that Xen needs IBRS set in
context, an attacker can't speculate around the WRMSR and reach an indirect
branch within the speculation window.

Using conditionals opens this attack vector up, so the else clause gets an
LFENCE to force the pipeline to catch up before continuing.  This also covers
the safety of RSB conditional, as execution it is guaranteed to either hit the
WRMSR or LFENCE.

One downside of not using alternatives is that there unconditionally an LFENCE
in the IST path in cases where we are not using the features from IBRS-capable
microcode.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: 3fffaf9c13e9502f09ad4ab1aac3f8b7b9398f6f
master date: 2018-01-26 14:10:21 +0000
---
 xen/arch/x86/spec_ctrl.c            |  8 +++++
 xen/arch/x86/x86_64/asm-offsets.c   |  1 +
 xen/arch/x86/x86_64/entry.S         |  6 ++--
 xen/include/asm-x86/current.h       |  1 +
 xen/include/asm-x86/spec_ctrl.h     |  1 +
 xen/include/asm-x86/spec_ctrl_asm.h | 67 +++++++++++++++++++++++++++++++++++++
 6 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index 89e7287e43..cc1c972845 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -20,8 +20,10 @@
 #include <xen/init.h>
 #include <xen/lib.h>
 
+#include <asm/msr-index.h>
 #include <asm/processor.h>
 #include <asm/spec_ctrl.h>
+#include <asm/spec_ctrl_asm.h>
 
 static enum ind_thunk {
     THUNK_DEFAULT, /* Decide which thunk to use at boot time. */
@@ -150,6 +152,12 @@ void __init init_speculation_mitigations(void)
     print_details(thunk);
 }
 
+static void __init __maybe_unused build_assertions(void)
+{
+    /* The optimised assembly relies on this alias. */
+    BUILD_BUG_ON(BTI_IST_IBRS != SPEC_CTRL_IBRS);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index 128d8f70a5..fe69a680ca 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -142,6 +142,7 @@ void __dummy__(void)
     OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3);
     OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl);
     OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl);
+    OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info);
     DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
     BLANK();
 
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 083318e0b8..f24bffb574 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -124,7 +124,7 @@ UNLIKELY_START(g, exit_cr3)
 UNLIKELY_END(exit_cr3)
 
         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
-        SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */
+        SPEC_CTRL_EXIT_TO_XEN_IST /* Req: %rbx=end, Clob: acd */
 
         RESTORE_ALL adj=8
         iretq
@@ -724,7 +724,7 @@ ENTRY(double_fault)
 
         GET_STACK_END(14)
 
-        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+        SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx
@@ -757,7 +757,7 @@ handle_ist_exception:
 
         GET_STACK_END(14)
 
-        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+        SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
index 25415922af..5f8f687b27 100644
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -58,6 +58,7 @@ struct cpu_info {
     /* See asm-x86/spec_ctrl_asm.h for usage. */
     unsigned int shadow_spec_ctrl;
     bool         use_shadow_spec_ctrl;
+    uint8_t      bti_ist_info;
 
     unsigned long __pad;
     /* get_stack_bottom() must be 16-byte aligned */
diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
index b451250282..c454b02b66 100644
--- a/xen/include/asm-x86/spec_ctrl.h
+++ b/xen/include/asm-x86/spec_ctrl.h
@@ -29,6 +29,7 @@ static inline void init_shadow_spec_ctrl_state(void)
     struct cpu_info *info = get_cpu_info();
 
     info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0;
+    info->bti_ist_info = 0;
 }
 
 #endif /* !__X86_SPEC_CTRL_H__ */
diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
index 56dc65e3dc..7a43daf231 100644
--- a/xen/include/asm-x86/spec_ctrl_asm.h
+++ b/xen/include/asm-x86/spec_ctrl_asm.h
@@ -20,6 +20,11 @@
 #ifndef __X86_SPEC_CTRL_ASM_H__
 #define __X86_SPEC_CTRL_ASM_H__
 
+/* Encoding of the bottom bits in cpuinfo.bti_ist_info */
+#define BTI_IST_IBRS  (1 << 0)
+#define BTI_IST_WRMSR (1 << 1)
+#define BTI_IST_RSB   (1 << 2)
+
 #ifdef __ASSEMBLY__
 #include <asm/msr-index.h>
 
@@ -254,6 +259,68 @@
         DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_SET,           \
         DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_CLEAR
 
+/* TODO: Drop these when the alternatives infrastructure is NMI/#MC safe. */
+.macro SPEC_CTRL_ENTRY_FROM_INTR_IST
+/*
+ * Requires %rsp=regs, %r14=stack_end
+ * Clobbers %rax, %rcx, %rdx
+ *
+ * This is logical merge of DO_OVERWRITE_RSB and DO_SPEC_CTRL_ENTRY
+ * maybexen=1, but with conditionals rather than alternatives.
+ */
+    movzbl STACK_CPUINFO_FIELD(bti_ist_info)(%r14), %eax
+
+    testb $BTI_IST_RSB, %al
+    jz .L\@_skip_rsb
+
+    DO_OVERWRITE_RSB
+
+.L\@_skip_rsb:
+
+    testb $BTI_IST_WRMSR, %al
+    jz .L\@_skip_wrmsr
+
+    xor %edx, %edx
+    testb $3, UREGS_cs(%rsp)
+    setz %dl
+    and %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14)
+
+.L\@_entry_from_xen:
+    /*
+     * Load Xen's intended value.  SPEC_CTRL_IBRS vs 0 is encoded in the
+     * bottom bit of bti_ist_info, via a deliberate alias with BTI_IST_IBRS.
+     */
+    mov $MSR_SPEC_CTRL, %ecx
+    and $BTI_IST_IBRS, %eax
+    wrmsr
+
+    /* Opencoded UNLIKELY_START() with no condition. */
+UNLIKELY_DISPATCH_LABEL(\@_serialise):
+    .subsection 1
+    /*
+     * In the case that we might need to set SPEC_CTRL.IBRS for safety, we
+     * need to ensure that an attacker can't poison the `jz .L\@_skip_wrmsr`
+     * to speculate around the WRMSR.  As a result, we need a dispatch
+     * serialising instruction in the else clause.
+     */
+.L\@_skip_wrmsr:
+    lfence
+    UNLIKELY_END(\@_serialise)
+.endm
+
+.macro SPEC_CTRL_EXIT_TO_XEN_IST
+/*
+ * Requires %rbx=stack_end
+ * Clobbers %rax, %rcx, %rdx
+ */
+    testb $BTI_IST_WRMSR, STACK_CPUINFO_FIELD(bti_ist_info)(%rbx)
+    jz .L\@_skip
+
+    DO_SPEC_CTRL_EXIT_TO_XEN
+
+.L\@_skip:
+.endm
+
 #endif /* __ASSEMBLY__ */
 #endif /* !__X86_SPEC_CTRL_ASM_H__ */
 
-- 
2.14.3


From c0bfde68ccd941b14a2f0ca54c61a83796156ea6 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:56:25 +0100
Subject: [PATCH 39/45] x86/boot: Calculate the most appropriate BTI mitigation
 to use

See the logic and comments in init_speculation_mitigations() for further
details.

There are two controls for RSB overwriting, because in principle there are
cases where it might be safe to forego rsb_native (Off the top of my head,
SMEP active, no 32bit PV guests at all, no use of vmevent/paging subsystems
for HVM guests, but I make no guarantees that this list of restrictions is
exhaustive).

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

x86/spec_ctrl: Fix determination of when to use IBRS

The original version of this logic was:

    /*
     * On Intel hardware, we'd like to use retpoline in preference to
     * IBRS, but only if it is safe on this hardware.
     */
    else if ( boot_cpu_has(X86_FEATURE_IBRSB) )
    {
        if ( retpoline_safe() )
            thunk = THUNK_RETPOLINE;
        else
            ibrs = true;
    }

but it was changed by a request during review.  Sadly, the result is buggy as
it breaks the later fallback logic by allowing IBRS to appear as available
when in fact it isn't.

This in practice means that on repoline-unsafe hardware without IBRS, we
select THUNK_JUMP despite intending to select THUNK_RETPOLINE.

Reported-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: 2713715305ca516f698d58cec5e0b322c3b2c4eb
master date: 2018-01-26 14:10:21 +0000
master commit: 30cbd0c83ef3d0edac2d5bcc41a9a2b7a843ae58
master date: 2018-02-06 18:32:58 +0000
---
 docs/misc/xen-command-line.markdown |  10 ++-
 xen/arch/x86/spec_ctrl.c            | 141 +++++++++++++++++++++++++++++++++++-
 xen/include/asm-x86/spec_ctrl.h     |   4 +-
 3 files changed, 149 insertions(+), 6 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index 709e4de66a..d29b001801 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -253,7 +253,7 @@ enough. Setting this to a high value may cause boot failure, particularly if
 the NMI watchdog is also enabled.
 
 ### bti (x86)
-> `= List of [ thunk=retpoline|lfence|jmp ]`
+> `= List of [ thunk=retpoline|lfence|jmp, ibrs=<bool>, rsb_{vmexit,native}=<bool> ]`
 
 Branch Target Injection controls.  By default, Xen will pick the most
 appropriate BTI mitigations based on compiled in support, loaded microcode,
@@ -268,6 +268,14 @@ locations.  The default thunk is `retpoline` (generally preferred for Intel
 hardware), with the alternatives being `jmp` (a `jmp *%reg` gadget, minimal
 overhead), and `lfence` (an `lfence; jmp *%reg` gadget, preferred for AMD).
 
+On hardware supporting IBRS, the `ibrs=` option can be used to force or
+prevent Xen using the feature itself.  If Xen is not using IBRS itself,
+functionality is still set up so IBRS can be virtualised for guests.
+
+The `rsb_vmexit=` and `rsb_native=` options can be used to fine tune when the
+RSB gets overwritten.  There are individual controls for an entry from HVM
+context, and an entry from a native (PV or Xen) context.
+
 ### xenheap\_megabytes (arm32)
 > `= <size>`
 
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index cc1c972845..8aefe29968 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -20,6 +20,7 @@
 #include <xen/init.h>
 #include <xen/lib.h>
 
+#include <asm/microcode.h>
 #include <asm/msr-index.h>
 #include <asm/processor.h>
 #include <asm/spec_ctrl.h>
@@ -33,11 +34,15 @@ static enum ind_thunk {
     THUNK_LFENCE,
     THUNK_JMP,
 } opt_thunk __initdata = THUNK_DEFAULT;
+static int8_t __initdata opt_ibrs = -1;
+static bool __initdata opt_rsb_native = true;
+static bool __initdata opt_rsb_vmexit = true;
+uint8_t __read_mostly default_bti_ist_info;
 
 static int __init parse_bti(const char *s)
 {
     const char *ss;
-    int rc = 0;
+    int val, rc = 0;
 
     do {
         ss = strchr(s, ',');
@@ -57,6 +62,12 @@ static int __init parse_bti(const char *s)
             else
                 rc = -EINVAL;
         }
+        else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 )
+            opt_ibrs = val;
+        else if ( (val = parse_boolean("rsb_native", s, ss)) >= 0 )
+            opt_rsb_native = val;
+        else if ( (val = parse_boolean("rsb_vmexit", s, ss)) >= 0 )
+            opt_rsb_vmexit = val;
         else
             rc = -EINVAL;
 
@@ -93,24 +104,84 @@ static void __init print_details(enum ind_thunk thunk)
         printk(XENLOG_DEBUG "  Compiled-in support: INDIRECT_THUNK\n");
 
     printk(XENLOG_INFO
-           "BTI mitigations: Thunk %s\n",
+           "BTI mitigations: Thunk %s, Others:%s%s%s\n",
            thunk == THUNK_NONE      ? "N/A" :
            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
            thunk == THUNK_LFENCE    ? "LFENCE" :
-           thunk == THUNK_JMP       ? "JMP" : "?");
+           thunk == THUNK_JMP       ? "JMP" : "?",
+           boot_cpu_has(X86_FEATURE_XEN_IBRS_SET)    ? " IBRS+" :
+           boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR)  ? " IBRS-"      : "",
+           boot_cpu_has(X86_FEATURE_RSB_NATIVE)      ? " RSB_NATIVE" : "",
+           boot_cpu_has(X86_FEATURE_RSB_VMEXIT)      ? " RSB_VMEXIT" : "");
+}
+
+/* Calculate whether Retpoline is known-safe on this CPU. */
+static bool __init retpoline_safe(void)
+{
+    unsigned int ucode_rev = this_cpu(ucode_cpu_info).cpu_sig.rev;
+
+    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+        return true;
+
+    if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+         boot_cpu_data.x86 != 6 )
+        return false;
+
+    switch ( boot_cpu_data.x86_model )
+    {
+    case 0x17: /* Penryn */
+    case 0x1d: /* Dunnington */
+    case 0x1e: /* Nehalem */
+    case 0x1f: /* Auburndale / Havendale */
+    case 0x1a: /* Nehalem EP */
+    case 0x2e: /* Nehalem EX */
+    case 0x25: /* Westmere */
+    case 0x2c: /* Westmere EP */
+    case 0x2f: /* Westmere EX */
+    case 0x2a: /* SandyBridge */
+    case 0x2d: /* SandyBridge EP/EX */
+    case 0x3a: /* IvyBridge */
+    case 0x3e: /* IvyBridge EP/EX */
+    case 0x3c: /* Haswell */
+    case 0x3f: /* Haswell EX/EP */
+    case 0x45: /* Haswell D */
+    case 0x46: /* Haswell H */
+        return true;
+
+        /*
+         * Broadwell processors are retpoline-safe after specific microcode
+         * versions.
+         */
+    case 0x3d: /* Broadwell */
+        return ucode_rev >= 0x28;
+    case 0x47: /* Broadwell H */
+        return ucode_rev >= 0x1b;
+    case 0x4f: /* Broadwell EP/EX */
+        return ucode_rev >= 0xb000025;
+    case 0x56: /* Broadwell D */
+        return false; /* TBD. */
+
+        /*
+         * Skylake and later processors are not retpoline-safe.
+         */
+    default:
+        return false;
+    }
 }
 
 void __init init_speculation_mitigations(void)
 {
     enum ind_thunk thunk = THUNK_DEFAULT;
+    bool ibrs = false;
 
     /*
      * Has the user specified any custom BTI mitigations?  If so, follow their
      * instructions exactly and disable all heuristics.
      */
-    if ( opt_thunk != THUNK_DEFAULT )
+    if ( opt_thunk != THUNK_DEFAULT || opt_ibrs != -1 )
     {
         thunk = opt_thunk;
+        ibrs  = !!opt_ibrs;
     }
     else
     {
@@ -126,7 +197,18 @@ void __init init_speculation_mitigations(void)
              */
             if ( cpu_has_lfence_dispatch )
                 thunk = THUNK_LFENCE;
+            /*
+             * On Intel hardware, we'd like to use retpoline in preference to
+             * IBRS, but only if it is safe on this hardware.
+             */
+            else if ( retpoline_safe() )
+                thunk = THUNK_RETPOLINE;
+            else if ( boot_cpu_has(X86_FEATURE_IBRSB) )
+                ibrs = true;
         }
+        /* Without compiler thunk support, use IBRS if available. */
+        else if ( boot_cpu_has(X86_FEATURE_IBRSB) )
+            ibrs = true;
     }
 
     /*
@@ -136,6 +218,13 @@ void __init init_speculation_mitigations(void)
     if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) )
         thunk = THUNK_NONE;
 
+    /*
+     * If IBRS is in use and thunks are compiled in, there is no point
+     * suffering extra overhead.  Switch to the least-overhead thunk.
+     */
+    if ( ibrs && thunk == THUNK_DEFAULT )
+        thunk = THUNK_JMP;
+
     /*
      * If there are still no thunk preferences, the compiled default is
      * actually retpoline, and it is better than nothing.
@@ -149,6 +238,50 @@ void __init init_speculation_mitigations(void)
     else if ( thunk == THUNK_JMP )
         setup_force_cpu_cap(X86_FEATURE_IND_THUNK_JMP);
 
+    if ( boot_cpu_has(X86_FEATURE_IBRSB) )
+    {
+        /*
+         * Even if we've chosen to not have IBRS set in Xen context, we still
+         * need the IBRS entry/exit logic to virtualise IBRS support for
+         * guests.
+         */
+        if ( ibrs )
+            setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_SET);
+        else
+            setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_CLEAR);
+
+        default_bti_ist_info |= BTI_IST_WRMSR | ibrs;
+    }
+
+    /*
+     * PV guests can poison the RSB to any virtual address from which
+     * they can execute a call instruction.  This is necessarily outside
+     * of the Xen supervisor mappings.
+     *
+     * With SMEP enabled, the processor won't speculate into user mappings.
+     * Therefore, in this case, we don't need to worry about poisoned entries
+     * from 64bit PV guests.
+     *
+     * 32bit PV guest kernels run in ring 1, so use supervisor mappings.
+     * If a processors speculates to 32bit PV guest kernel mappings, it is
+     * speculating in 64bit supervisor mode, and can leak data.
+     */
+    if ( opt_rsb_native )
+    {
+        setup_force_cpu_cap(X86_FEATURE_RSB_NATIVE);
+        default_bti_ist_info |= BTI_IST_RSB;
+    }
+
+    /*
+     * HVM guests can always poison the RSB to point at Xen supervisor
+     * mappings.
+     */
+    if ( opt_rsb_vmexit )
+        setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
+
+    /* (Re)init BSP state now that default_bti_ist_info has been calculated. */
+    init_shadow_spec_ctrl_state();
+
     print_details(thunk);
 }
 
diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
index c454b02b66..6120e4f561 100644
--- a/xen/include/asm-x86/spec_ctrl.h
+++ b/xen/include/asm-x86/spec_ctrl.h
@@ -24,12 +24,14 @@
 
 void init_speculation_mitigations(void);
 
+extern uint8_t default_bti_ist_info;
+
 static inline void init_shadow_spec_ctrl_state(void)
 {
     struct cpu_info *info = get_cpu_info();
 
     info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0;
-    info->bti_ist_info = 0;
+    info->bti_ist_info = default_bti_ist_info;
 }
 
 #endif /* !__X86_SPEC_CTRL_H__ */
-- 
2.14.3


From fee4689c5c60b699f4dea21a21a2ba17887d2f49 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:56:54 +0100
Subject: [PATCH 40/45] x86/ctxt: Issue a speculation barrier between vcpu
 contexts

Issuing an IBPB command flushes the Branch Target Buffer, so that any poison
left by one vcpu won't remain when beginning to execute the next.

The cost of IBPB is substantial, and skipped on transition to idle, as Xen's
idle code is robust already.  All transitions into vcpu context are fully
serialising in practice (and under consideration for being retroactively
declared architecturally serialising), so a cunning attacker cannot use SP1 to
try and skip the flush.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: a2ed643ed783020f885035432e9c0919756921d1
master date: 2018-01-26 14:10:21 +0000
---
 docs/misc/xen-command-line.markdown |  5 ++++-
 xen/arch/x86/domain.c               | 29 +++++++++++++++++++++++++++++
 xen/arch/x86/spec_ctrl.c            | 10 +++++++++-
 xen/include/asm-x86/spec_ctrl.h     |  1 +
 4 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index d29b001801..de5b79e41e 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -253,7 +253,7 @@ enough. Setting this to a high value may cause boot failure, particularly if
 the NMI watchdog is also enabled.
 
 ### bti (x86)
-> `= List of [ thunk=retpoline|lfence|jmp, ibrs=<bool>, rsb_{vmexit,native}=<bool> ]`
+> `= List of [ thunk=retpoline|lfence|jmp, ibrs=<bool>, ibpb=<bool>, rsb_{vmexit,native}=<bool> ]`
 
 Branch Target Injection controls.  By default, Xen will pick the most
 appropriate BTI mitigations based on compiled in support, loaded microcode,
@@ -272,6 +272,9 @@ On hardware supporting IBRS, the `ibrs=` option can be used to force or
 prevent Xen using the feature itself.  If Xen is not using IBRS itself,
 functionality is still set up so IBRS can be virtualised for guests.
 
+On hardware supporting IBPB, the `ibpb=` option can be used to prevent Xen
+from issuing Branch Prediction Barriers on vcpu context switches.
+
 The `rsb_vmexit=` and `rsb_native=` options can be used to fine tune when the
 RSB gets overwritten.  There are individual controls for an entry from HVM
 context, and an entry from a native (PV or Xen) context.
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index db2e57e7ce..0f2716c06d 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -63,6 +63,7 @@
 #include <xen/iommu.h>
 #include <compat/vcpu.h>
 #include <asm/psr.h>
+#include <asm/spec_ctrl.h>
 
 DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
 
@@ -2181,6 +2182,34 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
         }
 
         ctxt_switch_levelling(next);
+
+        if ( opt_ibpb && !is_idle_domain(nextd) )
+        {
+            static DEFINE_PER_CPU(unsigned int, last);
+            unsigned int *last_id = &this_cpu(last);
+
+            /*
+             * Squash the domid and vcpu id together for comparison
+             * efficiency.  We could in principle stash and compare the struct
+             * vcpu pointer, but this risks a false alias if a domain has died
+             * and the same 4k page gets reused for a new vcpu.
+             */
+            unsigned int next_id = (((unsigned int)nextd->domain_id << 16) |
+                                    (uint16_t)next->vcpu_id);
+            BUILD_BUG_ON(MAX_VIRT_CPUS > 0xffff);
+
+            /*
+             * When scheduling from a vcpu, to idle, and back to the same vcpu
+             * (which might be common in a lightly loaded system, or when
+             * using vcpu pinning), there is no need to issue IBPB, as we are
+             * returning to the same security context.
+             */
+            if ( *last_id != next_id )
+            {
+                wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB);
+                *last_id = next_id;
+            }
+        }
     }
 
     context_saved(prev);
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index 8aefe29968..8ad992a700 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -37,6 +37,7 @@ static enum ind_thunk {
 static int8_t __initdata opt_ibrs = -1;
 static bool __initdata opt_rsb_native = true;
 static bool __initdata opt_rsb_vmexit = true;
+bool __read_mostly opt_ibpb = true;
 uint8_t __read_mostly default_bti_ist_info;
 
 static int __init parse_bti(const char *s)
@@ -64,6 +65,8 @@ static int __init parse_bti(const char *s)
         }
         else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 )
             opt_ibrs = val;
+        else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 )
+            opt_ibpb = val;
         else if ( (val = parse_boolean("rsb_native", s, ss)) >= 0 )
             opt_rsb_native = val;
         else if ( (val = parse_boolean("rsb_vmexit", s, ss)) >= 0 )
@@ -104,13 +107,14 @@ static void __init print_details(enum ind_thunk thunk)
         printk(XENLOG_DEBUG "  Compiled-in support: INDIRECT_THUNK\n");
 
     printk(XENLOG_INFO
-           "BTI mitigations: Thunk %s, Others:%s%s%s\n",
+           "BTI mitigations: Thunk %s, Others:%s%s%s%s\n",
            thunk == THUNK_NONE      ? "N/A" :
            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
            thunk == THUNK_LFENCE    ? "LFENCE" :
            thunk == THUNK_JMP       ? "JMP" : "?",
            boot_cpu_has(X86_FEATURE_XEN_IBRS_SET)    ? " IBRS+" :
            boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR)  ? " IBRS-"      : "",
+           opt_ibpb                                  ? " IBPB"       : "",
            boot_cpu_has(X86_FEATURE_RSB_NATIVE)      ? " RSB_NATIVE" : "",
            boot_cpu_has(X86_FEATURE_RSB_VMEXIT)      ? " RSB_VMEXIT" : "");
 }
@@ -279,6 +283,10 @@ void __init init_speculation_mitigations(void)
     if ( opt_rsb_vmexit )
         setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
 
+    /* Check we have hardware IBPB support before using it... */
+    if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) )
+        opt_ibpb = false;
+
     /* (Re)init BSP state now that default_bti_ist_info has been calculated. */
     init_shadow_spec_ctrl_state();
 
diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
index 6120e4f561..e328b0f509 100644
--- a/xen/include/asm-x86/spec_ctrl.h
+++ b/xen/include/asm-x86/spec_ctrl.h
@@ -24,6 +24,7 @@
 
 void init_speculation_mitigations(void);
 
+extern bool opt_ibpb;
 extern uint8_t default_bti_ist_info;
 
 static inline void init_shadow_spec_ctrl_state(void)
-- 
2.14.3


From 76bdfe894ab2205f597e52448d620982b84565c4 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:57:19 +0100
Subject: [PATCH 41/45] x86/cpuid: Offer Indirect Branch Controls to guests

With all infrastructure in place, it is now safe to let guests see and use
these features.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
master commit: 67c6838ddacfa646f9d1ae802bd0f16a935665b8
master date: 2018-01-26 14:10:21 +0000
---
 xen/include/public/arch-x86/cpufeatureset.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
index 5f96c1c23e..7457cb8a4c 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -233,11 +233,11 @@ XEN_CPUFEATURE(EFRO,          7*32+10) /*   APERF/MPERF Read Only interface */
 
 /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */
 XEN_CPUFEATURE(CLZERO,        8*32+ 0) /*A  CLZERO instruction */
-XEN_CPUFEATURE(IBPB,          8*32+12) /*   IBPB support only (no IBRS, used by AMD) */
+XEN_CPUFEATURE(IBPB,          8*32+12) /*A  IBPB support only (no IBRS, used by AMD) */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
-XEN_CPUFEATURE(IBRSB,         9*32+26) /*   IBRS and IBPB support (used by Intel) */
-XEN_CPUFEATURE(STIBP,         9*32+27) /*!  STIBP */
+XEN_CPUFEATURE(IBRSB,         9*32+26) /*A  IBRS and IBPB support (used by Intel) */
+XEN_CPUFEATURE(STIBP,         9*32+27) /*A! STIBP */
 
 #endif /* XEN_CPUFEATURE */
 
-- 
2.14.3


From 99ed7863b29ea170e50749fe22991b964cbce6ba Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 8 Feb 2018 12:57:50 +0100
Subject: [PATCH 42/45] x86/idle: Clear SPEC_CTRL while idle

On contemporary hardware, setting IBRS/STIBP has a performance impact on
adjacent hyperthreads.  It is therefore recommended to clear the setting
before becoming idle, to avoid an idle core preventing adjacent userspace
execution from running at full performance.

Care must be taken to ensure there are no ret or indirect branch instructions
between spec_ctrl_{enter,exit}_idle() invocations, which are forced always
inline.  Care must also be taken to avoid using spec_ctrl_enter_idle() between
flushing caches and becoming idle, in cases where that matters.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: 4c7e478d597b0346eef3a256cfd6794ac778b608
master date: 2018-01-26 14:10:21 +0000
---
 xen/arch/x86/acpi/cpu_idle.c    | 21 +++++++++++++++++++++
 xen/arch/x86/cpu/mwait-idle.c   |  7 +++++++
 xen/arch/x86/domain.c           |  8 ++++++++
 xen/include/asm-x86/spec_ctrl.h | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 70 insertions(+)

diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
index 536f236e6f..244667993d 100644
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -56,6 +56,7 @@
 #include <asm/mwait.h>
 #include <xen/notifier.h>
 #include <xen/cpu.h>
+#include <asm/spec_ctrl.h>
 
 /*#define DEBUG_PM_CX*/
 
@@ -408,8 +409,14 @@ void mwait_idle_with_hints(unsigned int eax, unsigned int ecx)
      */
     if ( (expires > NOW() || expires == 0) && !softirq_pending(cpu) )
     {
+        struct cpu_info *info = get_cpu_info();
+
         cpumask_set_cpu(cpu, &cpuidle_mwait_flags);
+
+        spec_ctrl_enter_idle(info);
         __mwait(eax, ecx);
+        spec_ctrl_exit_idle(info);
+
         cpumask_clear_cpu(cpu, &cpuidle_mwait_flags);
     }
 
@@ -424,6 +431,8 @@ static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
 
 static void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 {
+    struct cpu_info *info = get_cpu_info();
+
     switch ( cx->entry_method )
     {
     case ACPI_CSTATE_EM_FFH:
@@ -431,15 +440,19 @@ static void acpi_idle_do_entry(struct acpi_processor_cx *cx)
         acpi_processor_ffh_cstate_enter(cx);
         return;
     case ACPI_CSTATE_EM_SYSIO:
+        spec_ctrl_enter_idle(info);
         /* IO port based C-state */
         inb(cx->address);
         /* Dummy wait op - must do something useless after P_LVL2 read
            because chipsets cannot guarantee that STPCLK# signal
            gets asserted in time to freeze execution properly. */
         inl(pmtmr_ioport);
+        spec_ctrl_exit_idle(info);
         return;
     case ACPI_CSTATE_EM_HALT:
+        spec_ctrl_enter_idle(info);
         safe_halt();
+        spec_ctrl_exit_idle(info);
         local_irq_disable();
         return;
     }
@@ -567,7 +580,13 @@ static void acpi_processor_idle(void)
         if ( pm_idle_save )
             pm_idle_save();
         else
+        {
+            struct cpu_info *info = get_cpu_info();
+
+            spec_ctrl_enter_idle(info);
             safe_halt();
+            spec_ctrl_exit_idle(info);
+        }
         return;
     }
 
@@ -746,6 +765,7 @@ void acpi_dead_idle(void)
          * Otherwise, CPU may still hold dirty data, breaking cache coherency,
          * leading to strange errors.
          */
+        spec_ctrl_enter_idle(get_cpu_info());
         wbinvd();
 
         while ( 1 )
@@ -775,6 +795,7 @@ void acpi_dead_idle(void)
         u32 address = cx->address;
         u32 pmtmr_ioport_local = pmtmr_ioport;
 
+        spec_ctrl_enter_idle(get_cpu_info());
         wbinvd();
 
         while ( 1 )
diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c
index 290c57baa3..5f9737b6c1 100644
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -58,6 +58,7 @@
 #include <asm/hpet.h>
 #include <asm/mwait.h>
 #include <asm/msr.h>
+#include <asm/spec_ctrl.h>
 #include <acpi/cpufreq/cpufreq.h>
 
 #define MWAIT_IDLE_VERSION "0.4.1"
@@ -702,7 +703,13 @@ static void mwait_idle(void)
 		if (pm_idle_save)
 			pm_idle_save();
 		else
+		{
+			struct cpu_info *info = get_cpu_info();
+
+			spec_ctrl_enter_idle(info);
 			safe_halt();
+			spec_ctrl_exit_idle(info);
+		}
 		return;
 	}
 
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 0f2716c06d..013a1d2453 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -55,6 +55,7 @@
 #include <asm/hvm/viridian.h>
 #include <asm/debugreg.h>
 #include <asm/msr.h>
+#include <asm/spec_ctrl.h>
 #include <asm/traps.h>
 #include <asm/nmi.h>
 #include <asm/mce.h>
@@ -76,9 +77,15 @@ static void paravirt_ctxt_switch_to(struct vcpu *v);
 
 static void default_idle(void)
 {
+    struct cpu_info *info = get_cpu_info();
+
     local_irq_disable();
     if ( cpu_is_haltable(smp_processor_id()) )
+    {
+        spec_ctrl_enter_idle(info);
         safe_halt();
+        spec_ctrl_exit_idle(info);
+    }
     else
         local_irq_enable();
 }
@@ -90,6 +97,7 @@ void default_dead_idle(void)
      * held by the CPUs spinning here indefinitely, and get discarded by
      * a subsequent INIT.
      */
+    spec_ctrl_enter_idle(get_cpu_info());
     wbinvd();
     for ( ; ; )
         halt();
diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
index e328b0f509..5ab4ff3f68 100644
--- a/xen/include/asm-x86/spec_ctrl.h
+++ b/xen/include/asm-x86/spec_ctrl.h
@@ -20,7 +20,9 @@
 #ifndef __X86_SPEC_CTRL_H__
 #define __X86_SPEC_CTRL_H__
 
+#include <asm/alternative.h>
 #include <asm/current.h>
+#include <asm/msr-index.h>
 
 void init_speculation_mitigations(void);
 
@@ -35,6 +37,38 @@ static inline void init_shadow_spec_ctrl_state(void)
     info->bti_ist_info = default_bti_ist_info;
 }
 
+/* WARNING! `ret`, `call *`, `jmp *` not safe after this call. */
+static always_inline void spec_ctrl_enter_idle(struct cpu_info *info)
+{
+    uint32_t val = 0;
+
+    /*
+     * Latch the new shadow value, then enable shadowing, then update the MSR.
+     * There are no SMP issues here; only local processor ordering concerns.
+     */
+    info->shadow_spec_ctrl = val;
+    barrier();
+    info->use_shadow_spec_ctrl = true;
+    barrier();
+    asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET)
+                   :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" );
+}
+
+/* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */
+static always_inline void spec_ctrl_exit_idle(struct cpu_info *info)
+{
+    uint32_t val = SPEC_CTRL_IBRS;
+
+    /*
+     * Disable shadowing before updating the MSR.  There are no SMP issues
+     * here; only local processor ordering concerns.
+     */
+    info->use_shadow_spec_ctrl = false;
+    barrier();
+    asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET)
+                   :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" );
+}
+
 #endif /* !__X86_SPEC_CTRL_H__ */
 
 /*
-- 
2.14.3


From 5938aa17b49595150cade3ddc2c1929ecd0df39a Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Fri, 23 Feb 2018 10:17:56 +0100
Subject: [PATCH 45/45] x86/PV: correctly count MSRs to migrate

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
 xen/arch/x86/domctl.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 0c9fe39a18..659dc9f4a2 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1260,7 +1260,8 @@ long arch_do_domctl(
         struct xen_domctl_vcpu_msrs *vmsrs = &domctl->u.vcpu_msrs;
         struct xen_domctl_vcpu_msr msr;
         struct vcpu *v;
-        uint32_t nr_msrs = 0;
+        uint32_t nr_msrs = 0, edx, dummy;
+        bool has_ibrsb;
 
         ret = -ESRCH;
         if ( (vmsrs->vcpu >= d->max_vcpus) ||
@@ -1276,6 +1277,10 @@ long arch_do_domctl(
         if ( boot_cpu_has(X86_FEATURE_DBEXT) )
             nr_msrs += 4;
 
+        domain_cpuid(d, 7, 0, &dummy, &dummy, &dummy, &edx);
+        has_ibrsb = (edx & cpufeat_mask(X86_FEATURE_IBRSB));
+        nr_msrs += !!cpu_has_cpuid_faulting + has_ibrsb;
+
         if ( domctl->cmd == XEN_DOMCTL_get_vcpu_msrs )
         {
             ret = 0; copyback = 1;
@@ -1285,8 +1290,6 @@ long arch_do_domctl(
                 vmsrs->msr_count = nr_msrs;
             else
             {
-                uint32_t edx, dummy;
-
                 i = 0;
 
                 vcpu_pause(v);
@@ -1337,9 +1340,7 @@ long arch_do_domctl(
                     ++i;
                 }
 
-                domain_cpuid(d, 7, 0, &dummy, &dummy, &dummy, &edx);
-                if ( (edx & cpufeat_mask(X86_FEATURE_IBRSB)) &&
-                     v->arch.spec_ctrl )
+                if ( has_ibrsb && v->arch.spec_ctrl )
                 {
                     if ( i < vmsrs->msr_count && !ret )
                     {
-- 
2.14.3