diff --git a/xen.gcc8.fix.patch b/xen.gcc8.fix.patch index 7a5cc49..558d9b4 100644 --- a/xen.gcc8.fix.patch +++ b/xen.gcc8.fix.patch @@ -94,3 +94,40 @@ if (xch) { const xc_error *error = xc_get_last_error(xch); if (error->code == XC_ERROR_NONE) +--- xen-4.10.0/tools/libxl/libxl_arm_acpi.c.orig 2017-12-13 11:37:59.000000000 +0000 ++++ xen-4.10.0/tools/libxl/libxl_arm_acpi.c 2018-02-28 12:37:08.887221211 +0000 +@@ -190,7 +190,7 @@ + struct acpi_table_rsdp *rsdp = (void *)dom->acpi_modules[0].data + offset; + + memcpy(rsdp->signature, "RSD PTR ", sizeof(rsdp->signature)); +- memcpy(rsdp->oem_id, ACPI_OEM_ID, sizeof(rsdp->oem_id)); ++ memcpy(rsdp->oem_id, ACPI_OEM_ID, sizeof(ACPI_OEM_ID)); + rsdp->length = acpitables[RSDP].size; + rsdp->revision = 0x02; + rsdp->xsdt_physical_address = acpitables[XSDT].addr; +@@ -205,11 +205,11 @@ + memcpy(h->signature, sig, 4); + h->length = len; + h->revision = rev; +- memcpy(h->oem_id, ACPI_OEM_ID, sizeof(h->oem_id)); +- memcpy(h->oem_table_id, ACPI_OEM_TABLE_ID, sizeof(h->oem_table_id)); ++ memcpy(h->oem_id, ACPI_OEM_ID, sizeof(ACPI_OEM_ID)); ++ memcpy(h->oem_table_id, ACPI_OEM_TABLE_ID, sizeof(ACPI_OEM_TABLE_ID)); + h->oem_revision = 0; + memcpy(h->asl_compiler_id, ACPI_ASL_COMPILER_ID, +- sizeof(h->asl_compiler_id)); ++ sizeof(ACPI_ASL_COMPILER_ID)); + h->asl_compiler_revision = 0; + h->checksum = 0; + } +--- xen-4.10.0/tools/xenpmd/xenpmd.c.orig 2018-02-28 16:18:50.377726049 +0000 ++++ xen-4.10.0/tools/xenpmd/xenpmd.c 2018-02-28 16:20:31.502426829 +0000 +@@ -352,7 +352,7 @@ + strlen(info->model_number) + + strlen(info->serial_number) + + strlen(info->battery_type) + +- strlen(info->oem_info) + 4)); ++ strlen(info->oem_info) + 4) & 0xff); + write_ulong_lsb_first(val+2, info->present); + write_ulong_lsb_first(val+10, info->design_capacity); + write_ulong_lsb_first(val+18, info->last_full_capacity); diff --git a/xen.gcc8.temp.fix.patch b/xen.gcc8.temp.fix.patch new file mode 100644 index 0000000..f1ec802 --- /dev/null +++ b/xen.gcc8.temp.fix.patch @@ -0,0 +1,44 @@ +--- xen-4.10.0/tools/Makefile.orig 2017-12-13 11:37:59.000000000 +0000 ++++ xen-4.10.0/tools/Makefile 2018-02-27 12:04:44.376192357 +0000 +@@ -8,7 +8,7 @@ + SUBDIRS-y += libs + SUBDIRS-y += libxc + SUBDIRS-y += flask +-SUBDIRS-y += fuzz ++#SUBDIRS-y += fuzz + SUBDIRS-y += xenstore + SUBDIRS-y += misc + SUBDIRS-y += examples +--- xen-4.10.0/tools/libacpi/Makefile.orig 2017-12-13 11:37:59.000000000 +0000 ++++ xen-4.10.0/tools/libacpi/Makefile 2018-02-27 21:12:56.928470227 +0000 +@@ -89,7 +89,7 @@ + @echo + @exit 1 + +-build.o: ssdt_s3.h ssdt_s4.h ssdt_pm.h ssdt_tpm.h ssdt_laptop_slate.h ++build.o: ssdt_s3.h ssdt_s4.h ssdt_pm.h ssdt_tpm.h ssdt_laptop_slate.h $(H_SRC) + + acpi.a: $(OBJS) + $(AR) rc $@ $(OBJS) +--- xen-4.10.0/tools/debugger/kdd/kdd.c.orig 2018-02-22 12:31:57.007039159 +0000 ++++ xen-4.10.0/tools/debugger/kdd/kdd.c 2018-02-22 18:27:37.213653422 +0000 +@@ -687,7 +687,7 @@ + } + } else { + /* 32-bit control-register space starts at 0x[2]cc, for 84 bytes */ +- uint64_t offset = addr; ++/* uint64_t offset = addr; + if (offset > 0x200) + offset -= 0x200; + offset -= 0xcc; +@@ -696,7 +696,9 @@ + len = 0; + } else { + memcpy(buf, ((uint8_t *)&ctrl.c32) + offset, len); +- } ++ } */ ++ /* disable above code due to compile issue for now */ ++ len = 0; + } + + s->txp.cmd.mem.addr = addr; diff --git a/xen.spec b/xen.spec index 6b1177c..d8493d2 100644 --- a/xen.spec +++ b/xen.spec @@ -60,7 +60,7 @@ Summary: Xen is a virtual machine monitor Name: xen Version: 4.10.0 -Release: 5%{?dist} +Release: 6%{?dist} Group: Development/Libraries License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ @@ -121,6 +121,12 @@ Patch42: 4.10.0-shim-comet-3.patch Patch43: xen.comet.fixes.patch Patch44: xen.xsa254.pti.patch Patch45: xen.gcc8.fix.patch +Patch46: xen.xsa254.bti.patch +Patch47: xsa252.patch +Patch48: xsa255-1.patch +Patch49: xsa255-2.patch +Patch50: xsa256.patch +Patch51: xen.gcc8.temp.fix.patch %if %build_qemutrad @@ -333,6 +339,12 @@ manage Xen virtual machines. %patch2 -p1 %patch3 -p1 %patch45 -p1 +%patch46 -p1 +%patch47 -p1 +%patch48 -p1 +%patch49 -p1 +%patch50 -p1 +%patch51 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -379,7 +391,7 @@ export EXTRA_CFLAGS_QEMU_XEN="$RPM_OPT_FLAGS" %if %build_hyp %if %build_crosshyp %define efi_flags LD_EFI=false -XEN_TARGET_ARCH=x86_64 make %{?_smp_mflags} %{?efi_flags} prefix=/usr xen CC="/usr/bin/x86_64-linux-gnu-gcc `echo $RPM_OPT_FLAGS | sed -e 's/-m32//g' -e 's/-march=i686//g' -e 's/-mtune=atom//g' -e 's/-specs=\/usr\/lib\/rpm\/redhat\/redhat-annobin-cc1//g' -e 's/-fstack-clash-protection//g'`" +XEN_TARGET_ARCH=x86_64 make %{?_smp_mflags} %{?efi_flags} prefix=/usr xen CC="/usr/bin/x86_64-linux-gnu-gcc `echo $RPM_OPT_FLAGS | sed -e 's/-m32//g' -e 's/-march=i686//g' -e 's/-mtune=atom//g' -e 's/-specs=\/usr\/lib\/rpm\/redhat\/redhat-annobin-cc1//g' -e 's/-fstack-clash-protection//g' -e 's/-mcet//g' -e 's/-fcf-protection//g'`" %else %ifarch armv7hl make %{?_smp_mflags} %{?efi_flags} prefix=/usr xen CC="gcc `echo $RPM_OPT_FLAGS | sed -e 's/-mfloat-abi=hard//g' -e 's/-march=armv7-a//g'`" @@ -881,6 +893,19 @@ fi %endif %changelog +* Wed Feb 28 2018 Michael Young - 4.10.0-6 +- update patch for XPTI mitigation for XSA-254 +- add Branch Target Injection (BTI) mitigation for XSA-254 +- DoS via non-preemptable L3/L4 pagetable freeing [XSA-252, CVE-2018-7540] + (#1549568) +- grant table v2 -> v1 transition may crash Xen [XSA-255, CVE-2018-7541] + (#1549570) +- x86 PVH guest without LAPIC may DoS the host [XSA-256, CVE-2018-7542] + (#1549572) +- further build issue fixes with gcc8 (some temporary workarounds) +- -mcet and -fcf-protection aren't recognized in hypervisor build x86_64 on + i686 either + * Fri Feb 23 2018 Michael Young - fix some build issues with gcc8 diff --git a/xen.xsa254.bti.patch b/xen.xsa254.bti.patch new file mode 100644 index 0000000..5d4a18c --- /dev/null +++ b/xen.xsa254.bti.patch @@ -0,0 +1,5669 @@ +From b829d42829c1ff626a02756acae4dd482fc20c9a Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Tue, 16 Jan 2018 14:23:33 +0000 +Subject: [PATCH 06/49] xen/arm: Introduce enable callback to enable a + capabilities on each online CPU + +Once Xen knows what features/workarounds present on the platform, it +might be necessary to configure each online CPU. + +Introduce a new callback "enable" that will be called on each online CPU to +configure the "capability". + +The code is based on Linux v4.14 (where cpufeature.c comes from), the +explanation of why using stop_machine_run is kept as we have similar +problem in the future. + +Lastly introduce enable_errata_workaround that will be called once CPUs +have booted and before the hardware domain is created. + +This is part of XSA-254. + +Signed-of-by: Julien Grall +Reviewed-by: Stefano Stabellini +(cherry picked from commit 7500495155aacce437878cb576f45224ae984f40) +--- + xen/arch/arm/cpuerrata.c | 6 ++++++ + xen/arch/arm/cpufeature.c | 29 +++++++++++++++++++++++++++++ + xen/arch/arm/setup.c | 1 + + xen/include/asm-arm/cpuerrata.h | 1 + + xen/include/asm-arm/cpufeature.h | 3 +++ + 5 files changed, 40 insertions(+) + +diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c +index fe9e9facbe..772587c05a 100644 +--- a/xen/arch/arm/cpuerrata.c ++++ b/xen/arch/arm/cpuerrata.c +@@ -64,6 +64,12 @@ void check_local_cpu_errata(void) + { + update_cpu_capabilities(arm_errata, "enabled workaround for"); + } ++ ++void __init enable_errata_workarounds(void) ++{ ++ enable_cpu_capabilities(arm_errata); ++} ++ + /* + * Local variables: + * mode: C +diff --git a/xen/arch/arm/cpufeature.c b/xen/arch/arm/cpufeature.c +index 479c9fb011..525b45e22f 100644 +--- a/xen/arch/arm/cpufeature.c ++++ b/xen/arch/arm/cpufeature.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + + DECLARE_BITMAP(cpu_hwcaps, ARM_NCAPS); +@@ -39,6 +40,34 @@ void update_cpu_capabilities(const struct arm_cpu_capabilities *caps, + } + } + ++/* ++ * Run through the enabled capabilities and enable() it on all active ++ * CPUs. ++ */ ++void __init enable_cpu_capabilities(const struct arm_cpu_capabilities *caps) ++{ ++ for ( ; caps->matches; caps++ ) ++ { ++ if ( !cpus_have_cap(caps->capability) ) ++ continue; ++ ++ if ( caps->enable ) ++ { ++ int ret; ++ ++ /* ++ * Use stop_machine_run() as it schedules the work allowing ++ * us to modify PSTATE, instead of on_each_cpu() which uses ++ * an IPI, giving us a PSTATE that disappears when we ++ * return. ++ */ ++ ret = stop_machine_run(caps->enable, (void *)caps, NR_CPUS); ++ /* stop_machine_run should never fail at this stage of the boot. */ ++ BUG_ON(ret); ++ } ++ } ++} ++ + /* + * Local variables: + * mode: C +diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c +index 16a3b1be8e..032a6a882d 100644 +--- a/xen/arch/arm/setup.c ++++ b/xen/arch/arm/setup.c +@@ -849,6 +849,7 @@ void __init start_xen(unsigned long boot_phys_offset, + * stop_machine (tasklets initialized via an initcall). + */ + apply_alternatives_all(); ++ enable_errata_workarounds(); + + /* Create initial domain 0. */ + /* The vGIC for DOM0 is exactly emulating the hardware GIC */ +diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h +index 8b158429c7..7de68361ff 100644 +--- a/xen/include/asm-arm/cpuerrata.h ++++ b/xen/include/asm-arm/cpuerrata.h +@@ -5,6 +5,7 @@ + #include + + void check_local_cpu_errata(void); ++void enable_errata_workarounds(void); + + #ifdef CONFIG_HAS_ALTERNATIVE + +diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h +index f00b6dbd39..21c65e198c 100644 +--- a/xen/include/asm-arm/cpufeature.h ++++ b/xen/include/asm-arm/cpufeature.h +@@ -74,6 +74,7 @@ struct arm_cpu_capabilities { + const char *desc; + u16 capability; + bool (*matches)(const struct arm_cpu_capabilities *); ++ int (*enable)(void *); /* Called on every active CPUs */ + union { + struct { /* To be used for eratum handling only */ + u32 midr_model; +@@ -85,6 +86,8 @@ struct arm_cpu_capabilities { + void update_cpu_capabilities(const struct arm_cpu_capabilities *caps, + const char *info); + ++void enable_cpu_capabilities(const struct arm_cpu_capabilities *caps); ++ + #endif /* __ASSEMBLY__ */ + + #endif +-- +2.14.3 + + +From 0f7a4faafb2d79920cc63457cfca3e03990af4cc Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Tue, 16 Jan 2018 14:23:34 +0000 +Subject: [PATCH 07/49] xen/arm64: Add missing MIDR values for Cortex-A72, A73 + and A75 + +Cortex-A72, A73 and A75 MIDR will be used to a follow-up for hardening +the branch predictor. + +This is part of XSA-254. + +Signed-off-by: Julien Grall +Acked-by: Stefano Stabellini +(cherry picked from commit 7975bff524c4e2c30efbf144de753f151d974e53) +--- + xen/include/asm-arm/processor.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h +index 65eb1071e1..3edab1b893 100644 +--- a/xen/include/asm-arm/processor.h ++++ b/xen/include/asm-arm/processor.h +@@ -47,10 +47,16 @@ + #define ARM_CPU_PART_CORTEX_A15 0xC0F + #define ARM_CPU_PART_CORTEX_A53 0xD03 + #define ARM_CPU_PART_CORTEX_A57 0xD07 ++#define ARM_CPU_PART_CORTEX_A72 0xD08 ++#define ARM_CPU_PART_CORTEX_A73 0xD09 ++#define ARM_CPU_PART_CORTEX_A75 0xD0A + + #define MIDR_CORTEX_A15 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A15) + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) + #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) ++#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) ++#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) ++#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) + + /* MPIDR Multiprocessor Affinity Register */ + #define _MPIDR_UP (30) +-- +2.14.3 + + +From d1f4283a1d8405a480b4121e1efcfaec8bbdbffa Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Tue, 16 Jan 2018 14:23:35 +0000 +Subject: [PATCH 08/49] xen/arm: cpuerrata: Add MIDR_ALL_VERSIONS + +Introduce a new macro MIDR_ALL_VERSIONS to match all variant/revision of a +given CPU model. + +This is part of XSA-254. + +Signed-off-by: Julien Grall +Reviewed-by: Stefano Stabellini +(cherry picked from commit ba73070af43a38d200413f446d6a718e108867b6) +--- + xen/arch/arm/cpuerrata.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c +index 772587c05a..c50d3331f2 100644 +--- a/xen/arch/arm/cpuerrata.c ++++ b/xen/arch/arm/cpuerrata.c +@@ -7,6 +7,12 @@ + .midr_range_min = min, \ + .midr_range_max = max + ++#define MIDR_ALL_VERSIONS(model) \ ++ .matches = is_affected_midr_range, \ ++ .midr_model = model, \ ++ .midr_range_min = 0, \ ++ .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) ++ + static bool __maybe_unused + is_affected_midr_range(const struct arm_cpu_capabilities *entry) + { +-- +2.14.3 + + +From cae6e1572f39a1906be0fc3bdaf49fe514c6a9c0 Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Tue, 16 Jan 2018 14:23:36 +0000 +Subject: [PATCH 09/49] xen/arm64: Add skeleton to harden the branch predictor + aliasing attacks + +Aliasing attacked against CPU branch predictors can allow an attacker to +redirect speculative control flow on some CPUs and potentially divulge +information from one context to another. + +This patch adds initial skeleton code behind a new Kconfig option to +enable implementation-specific mitigations against these attacks for +CPUs that are affected. + +Most of the mitigations will have to be applied when entering to the +hypervisor from the guest context. For safety, it is applied at every +exception entry. So there are potential for optimizing when receiving +an exception at the same level. + +Because the attack is against branch predictor, it is not possible to +safely use branch instruction before the mitigation is applied. +Therefore, this has to be done in the vector entry before jump to the +helper handling a given exception. + +On Arm64, each vector can hold 32 instructions. This leave us 31 +instructions for the mitigation. The last one is the branch instruction +to the helper. + +Because a platform may have CPUs with different micro-architectures, +per-CPU vector table needs to be provided. Realistically, only a few +different mitigations will be necessary. So provide a small set of +vector tables. They will be re-used and patch with the mitigations +on-demand. + +This is based on the work done in Linux (see [1]). + +This is part of XSA-254. + +[1] git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git +branch ktpi + +Signed-off-by: Julien Grall +Reviewed-by: Stefano Stabellini +(cherry picked from commit 4c4fddc166cf528aca49540bcc9ee4f196b01dac) +--- + xen/arch/arm/Kconfig | 20 ++++++ + xen/arch/arm/arm64/Makefile | 1 + + xen/arch/arm/arm64/bpi.S | 64 ++++++++++++++++++ + xen/arch/arm/cpuerrata.c | 142 +++++++++++++++++++++++++++++++++++++++ + xen/arch/arm/traps.c | 5 +- + xen/include/asm-arm/cpuerrata.h | 1 + + xen/include/asm-arm/cpufeature.h | 3 +- + xen/include/asm-arm/processor.h | 5 +- + 8 files changed, 237 insertions(+), 4 deletions(-) + create mode 100644 xen/arch/arm/arm64/bpi.S + +diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig +index f58019d6ed..06fd85cc77 100644 +--- a/xen/arch/arm/Kconfig ++++ b/xen/arch/arm/Kconfig +@@ -171,6 +171,26 @@ config ARM64_ERRATUM_834220 + + endmenu + ++config HARDEN_BRANCH_PREDICTOR ++ bool "Harden the branch predictor against aliasing attacks" if EXPERT ++ default y ++ help ++ Speculation attacks against some high-performance processors rely on ++ being able to manipulate the branch predictor for a victim context by ++ executing aliasing branches in the attacker context. Such attacks ++ can be partially mitigated against by clearing internal branch ++ predictor state and limiting the prediction logic in some situations. ++ ++ This config option will take CPU-specific actions to harden the ++ branch predictor against aliasing attacks and may rely on specific ++ instruction sequences or control bits being set by the system ++ firmware. ++ ++ If unsure, say Y. ++ ++config ARM64_HARDEN_BRANCH_PREDICTOR ++ def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR ++ + source "common/Kconfig" + + source "drivers/Kconfig" +diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile +index 718fe44455..bb5c610b2a 100644 +--- a/xen/arch/arm/arm64/Makefile ++++ b/xen/arch/arm/arm64/Makefile +@@ -1,6 +1,7 @@ + subdir-y += lib + + obj-y += cache.o ++obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o + obj-$(EARLY_PRINTK) += debug.o + obj-y += domctl.o + obj-y += domain.o +diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S +new file mode 100644 +index 0000000000..6cc2f17529 +--- /dev/null ++++ b/xen/arch/arm/arm64/bpi.S +@@ -0,0 +1,64 @@ ++/* ++ * Contains CPU specific branch predictor invalidation sequences ++ * ++ * Copyright (C) 2018 ARM Ltd. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++.macro ventry target ++ .rept 31 ++ nop ++ .endr ++ b \target ++.endm ++ ++.macro vectors target ++ ventry \target + 0x000 ++ ventry \target + 0x080 ++ ventry \target + 0x100 ++ ventry \target + 0x180 ++ ++ ventry \target + 0x200 ++ ventry \target + 0x280 ++ ventry \target + 0x300 ++ ventry \target + 0x380 ++ ++ ventry \target + 0x400 ++ ventry \target + 0x480 ++ ventry \target + 0x500 ++ ventry \target + 0x580 ++ ++ ventry \target + 0x600 ++ ventry \target + 0x680 ++ ventry \target + 0x700 ++ ventry \target + 0x780 ++.endm ++ ++/* ++ * Populate 4 vector tables. This will cover up to 4 different ++ * micro-architectures in a system. ++ */ ++ .align 11 ++ENTRY(__bp_harden_hyp_vecs_start) ++ .rept 4 ++ vectors hyp_traps_vector ++ .endr ++ENTRY(__bp_harden_hyp_vecs_end) ++ ++/* ++ * Local variables: ++ * mode: ASM ++ * indent-tabs-mode: nil ++ * End: ++ */ +diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c +index c50d3331f2..76d98e771d 100644 +--- a/xen/arch/arm/cpuerrata.c ++++ b/xen/arch/arm/cpuerrata.c +@@ -1,6 +1,148 @@ ++#include ++#include ++#include ++#include ++#include ++#include + #include + #include + ++/* Override macros from asm/page.h to make them work with mfn_t */ ++#undef virt_to_mfn ++#define virt_to_mfn(va) _mfn(__virt_to_mfn(va)) ++ ++/* Hardening Branch predictor code for Arm64 */ ++#ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR ++ ++#define VECTOR_TABLE_SIZE SZ_2K ++ ++/* ++ * Number of available table vectors (this should be in-sync with ++ * arch/arm64/bpi.S ++ */ ++#define NR_BPI_HYP_VECS 4 ++ ++extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[]; ++ ++/* ++ * Key for each slot. This is used to find whether a specific workaround ++ * had a slot assigned. ++ * ++ * The key is virtual address of the vector workaround ++ */ ++static uintptr_t bp_harden_slot_key[NR_BPI_HYP_VECS]; ++ ++/* ++ * [hyp_vec_start, hyp_vec_end[ corresponds to the first 31 instructions ++ * of each vector. The last (i.e 32th) instruction is used to branch to ++ * the original entry. ++ * ++ * Those instructions will be copied on each vector to harden them. ++ */ ++static bool copy_hyp_vect_bpi(unsigned int slot, const char *hyp_vec_start, ++ const char *hyp_vec_end) ++{ ++ void *dst_remapped; ++ const void *dst = __bp_harden_hyp_vecs_start + slot * VECTOR_TABLE_SIZE; ++ unsigned int i; ++ mfn_t dst_mfn = virt_to_mfn(dst); ++ ++ BUG_ON(((hyp_vec_end - hyp_vec_start) / 4) > 31); ++ ++ /* ++ * Vectors are part of the text that are mapped read-only. So re-map ++ * the vector table to be able to update vectors. ++ */ ++ dst_remapped = __vmap(&dst_mfn, ++ 1UL << get_order_from_bytes(VECTOR_TABLE_SIZE), ++ 1, 1, PAGE_HYPERVISOR, VMAP_DEFAULT); ++ if ( !dst_remapped ) ++ return false; ++ ++ dst_remapped += (vaddr_t)dst & ~PAGE_MASK; ++ ++ for ( i = 0; i < VECTOR_TABLE_SIZE; i += 0x80 ) ++ { ++ memcpy(dst_remapped + i, hyp_vec_start, hyp_vec_end - hyp_vec_start); ++ } ++ ++ clean_dcache_va_range(dst_remapped, VECTOR_TABLE_SIZE); ++ invalidate_icache(); ++ ++ vunmap(dst_remapped); ++ ++ return true; ++} ++ ++static bool __maybe_unused ++install_bp_hardening_vec(const struct arm_cpu_capabilities *entry, ++ const char *hyp_vec_start, ++ const char *hyp_vec_end) ++{ ++ static int last_slot = -1; ++ static DEFINE_SPINLOCK(bp_lock); ++ unsigned int i, slot = -1; ++ bool ret = true; ++ ++ /* ++ * Enable callbacks are called on every CPU based on the ++ * capabilities. So double-check whether the CPU matches the ++ * entry. ++ */ ++ if ( !entry->matches(entry) ) ++ return true; ++ ++ /* ++ * No need to install hardened vector when the processor has ++ * ID_AA64PRF0_EL1.CSV2 set. ++ */ ++ if ( cpu_data[smp_processor_id()].pfr64.csv2 ) ++ return true; ++ ++ spin_lock(&bp_lock); ++ ++ /* ++ * Look up whether the hardening vector had a slot already ++ * assigned. ++ */ ++ for ( i = 0; i < 4; i++ ) ++ { ++ if ( bp_harden_slot_key[i] == (uintptr_t)hyp_vec_start ) ++ { ++ slot = i; ++ break; ++ } ++ } ++ ++ if ( slot == -1 ) ++ { ++ last_slot++; ++ /* Check we don't overrun the number of slots available. */ ++ BUG_ON(NR_BPI_HYP_VECS <= last_slot); ++ ++ slot = last_slot; ++ ret = copy_hyp_vect_bpi(slot, hyp_vec_start, hyp_vec_end); ++ ++ /* Only update the slot if the copy succeeded. */ ++ if ( ret ) ++ bp_harden_slot_key[slot] = (uintptr_t)hyp_vec_start; ++ } ++ ++ if ( ret ) ++ { ++ /* Install the new vector table. */ ++ WRITE_SYSREG((vaddr_t)(__bp_harden_hyp_vecs_start + slot * VECTOR_TABLE_SIZE), ++ VBAR_EL2); ++ isb(); ++ } ++ ++ spin_unlock(&bp_lock); ++ ++ return ret; ++} ++ ++#endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */ ++ + #define MIDR_RANGE(model, min, max) \ + .matches = is_affected_midr_range, \ + .midr_model = model, \ +diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c +index f6f6de3691..9ec5b93dc2 100644 +--- a/xen/arch/arm/traps.c ++++ b/xen/arch/arm/traps.c +@@ -161,7 +161,10 @@ __initcall(update_serrors_cpu_caps); + + void init_traps(void) + { +- /* Setup Hyp vector base */ ++ /* ++ * Setup Hyp vector base. Note they might get updated with the ++ * branch predictor hardening. ++ */ + WRITE_SYSREG((vaddr_t)hyp_traps_vector, VBAR_EL2); + + /* Trap Debug and Performance Monitor accesses */ +diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h +index 7de68361ff..23ebf367ea 100644 +--- a/xen/include/asm-arm/cpuerrata.h ++++ b/xen/include/asm-arm/cpuerrata.h +@@ -1,6 +1,7 @@ + #ifndef __ARM_CPUERRATA_H__ + #define __ARM_CPUERRATA_H__ + ++#include + #include + #include + +diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h +index 21c65e198c..e557a095af 100644 +--- a/xen/include/asm-arm/cpufeature.h ++++ b/xen/include/asm-arm/cpufeature.h +@@ -42,8 +42,9 @@ + #define LIVEPATCH_FEATURE 4 + #define SKIP_SYNCHRONIZE_SERROR_ENTRY_EXIT 5 + #define SKIP_CTXT_SWITCH_SERROR_SYNC 6 ++#define ARM_HARDEN_BRANCH_PREDICTOR 7 + +-#define ARM_NCAPS 7 ++#define ARM_NCAPS 8 + + #ifndef __ASSEMBLY__ + +diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h +index 3edab1b893..466da5da86 100644 +--- a/xen/include/asm-arm/processor.h ++++ b/xen/include/asm-arm/processor.h +@@ -385,8 +385,9 @@ struct cpuinfo_arm { + unsigned long fp:4; /* Floating Point */ + unsigned long simd:4; /* Advanced SIMD */ + unsigned long gic:4; /* GIC support */ +- unsigned long __res0:4; +- unsigned long __res1; ++ unsigned long __res0:28; ++ unsigned long csv2:4; ++ unsigned long __res1:4; + }; + } pfr64; + +-- +2.14.3 + + +From 928112900e5b4a92ccebb2eea11665fd76aa0f0d Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Tue, 16 Jan 2018 14:23:37 +0000 +Subject: [PATCH 10/49] xen/arm64: Implement branch predictor hardening for + affected Cortex-A CPUs + +Cortex-A57, A72, A73 and A75 are susceptible to branch predictor +aliasing and can theoritically be attacked by malicious code. + +This patch implements a PSCI-based mitigation for these CPUs when +available. The call into firmware will invalidate the branch predictor +state, preventing any malicious entries from affection other victim +contexts. + +Ported from Linux git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git +branch kpti. + + Signed-off-by: Marc Zyngier + Signed-off-by: Will Deacon + +This is part of XSA-254. + +Signed-off-by: Julien Grall +Reviewed-by: Stefano Stabellini +(cherry picked from commit e730f8e41e8537f1db9770b9464f9523c28857b9) +--- + xen/arch/arm/arm64/bpi.S | 25 ++++++++++++++++++++++++ + xen/arch/arm/cpuerrata.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 74 insertions(+) + +diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S +index 6cc2f17529..4b7f1dc21f 100644 +--- a/xen/arch/arm/arm64/bpi.S ++++ b/xen/arch/arm/arm64/bpi.S +@@ -56,6 +56,31 @@ ENTRY(__bp_harden_hyp_vecs_start) + .endr + ENTRY(__bp_harden_hyp_vecs_end) + ++ENTRY(__psci_hyp_bp_inval_start) ++ sub sp, sp, #(8 * 18) ++ stp x16, x17, [sp, #(16 * 0)] ++ stp x14, x15, [sp, #(16 * 1)] ++ stp x12, x13, [sp, #(16 * 2)] ++ stp x10, x11, [sp, #(16 * 3)] ++ stp x8, x9, [sp, #(16 * 4)] ++ stp x6, x7, [sp, #(16 * 5)] ++ stp x4, x5, [sp, #(16 * 6)] ++ stp x2, x3, [sp, #(16 * 7)] ++ stp x0, x1, [sp, #(16 * 8)] ++ mov x0, #0x84000000 ++ smc #0 ++ ldp x16, x17, [sp, #(16 * 0)] ++ ldp x14, x15, [sp, #(16 * 1)] ++ ldp x12, x13, [sp, #(16 * 2)] ++ ldp x10, x11, [sp, #(16 * 3)] ++ ldp x8, x9, [sp, #(16 * 4)] ++ ldp x6, x7, [sp, #(16 * 5)] ++ ldp x4, x5, [sp, #(16 * 6)] ++ ldp x2, x3, [sp, #(16 * 7)] ++ ldp x0, x1, [sp, #(16 * 8)] ++ add sp, sp, #(8 * 18) ++ENTRY(__psci_hyp_bp_inval_end) ++ + /* + * Local variables: + * mode: ASM +diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c +index 76d98e771d..f1ea7f3c5b 100644 +--- a/xen/arch/arm/cpuerrata.c ++++ b/xen/arch/arm/cpuerrata.c +@@ -4,8 +4,10 @@ + #include + #include + #include ++#include + #include + #include ++#include + + /* Override macros from asm/page.h to make them work with mfn_t */ + #undef virt_to_mfn +@@ -141,6 +143,31 @@ install_bp_hardening_vec(const struct arm_cpu_capabilities *entry, + return ret; + } + ++extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; ++ ++static int enable_psci_bp_hardening(void *data) ++{ ++ bool ret = true; ++ static bool warned = false; ++ ++ /* ++ * The mitigation is using PSCI version function to invalidate the ++ * branch predictor. This function is only available with PSCI 0.2 ++ * and later. ++ */ ++ if ( psci_ver >= PSCI_VERSION(0, 2) ) ++ ret = install_bp_hardening_vec(data, __psci_hyp_bp_inval_start, ++ __psci_hyp_bp_inval_end); ++ else if ( !warned ) ++ { ++ ASSERT(system_state < SYS_STATE_active); ++ warning_add("PSCI 0.2 or later is required for the branch predictor hardening.\n"); ++ warned = true; ++ } ++ ++ return !ret; ++} ++ + #endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */ + + #define MIDR_RANGE(model, min, max) \ +@@ -204,6 +231,28 @@ static const struct arm_cpu_capabilities arm_errata[] = { + MIDR_RANGE(MIDR_CORTEX_A57, 0x00, + (1 << MIDR_VARIANT_SHIFT) | 2), + }, ++#endif ++#ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR ++ { ++ .capability = ARM_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), ++ .enable = enable_psci_bp_hardening, ++ }, ++ { ++ .capability = ARM_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), ++ .enable = enable_psci_bp_hardening, ++ }, ++ { ++ .capability = ARM_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), ++ .enable = enable_psci_bp_hardening, ++ }, ++ { ++ .capability = ARM_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), ++ .enable = enable_psci_bp_hardening, ++ }, + #endif + {}, + }; +-- +2.14.3 + + +From 728fadb586a2a14a244dabd70463bcc1654ecc85 Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Mon, 22 Jan 2018 14:35:42 +0000 +Subject: [PATCH 11/49] xen/arm: cpuerrata: Remove percpu.h include + +The include percpu.h was added by mistake in cpuerrata.h (see commit +4c4fddc166 "xen/arm64: Add skeleton to harden the branch aliasing +attacks"). So remove it. + +Signed-off-by: Julien Grall +Reviewed-by: Stefano Stabellini +(cherry picked from commit a5e7ce9560b408dbdc2f7fb8a58f6209601cc054) +--- + xen/include/asm-arm/cpuerrata.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h +index 23ebf367ea..7de68361ff 100644 +--- a/xen/include/asm-arm/cpuerrata.h ++++ b/xen/include/asm-arm/cpuerrata.h +@@ -1,7 +1,6 @@ + #ifndef __ARM_CPUERRATA_H__ + #define __ARM_CPUERRATA_H__ + +-#include + #include + #include + +-- +2.14.3 + + +From df7be94f26757a77747bf4fbfb84bbe2a3da3b4f Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Fri, 2 Feb 2018 14:19:19 +0000 +Subject: [PATCH 13/49] xen/arm32: entry: Consolidate DEFINE_TRAP_ENTRY_* + macros + +The only difference between all the DEFINE_TRAP_ENTRY_* macros are the +interrupts (Asynchronous Abort, IRQ, FIQ) unmasked. + +Rather than duplicating the code, introduce __DEFINE_TRAP_ENTRY macro +that will take the list of interrupts to unmask. + +This is part of XSA-254. + +Signed-off-by: Julien Grall +Reviewed-by: Stefano Stabellini +(cherry picked from commit 3bd8fd751e50dd981b7055fb33cdc8aa29537673) +--- + xen/arch/arm/arm32/entry.S | 36 +++++++++++++----------------------- + 1 file changed, 13 insertions(+), 23 deletions(-) + +diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S +index 120922e64e..c6490d2847 100644 +--- a/xen/arch/arm/arm32/entry.S ++++ b/xen/arch/arm/arm32/entry.S +@@ -111,39 +111,29 @@ abort_guest_exit_end: + skip_check: + mov pc, lr + +-#define DEFINE_TRAP_ENTRY(trap) \ ++/* ++ * Macro to define trap entry. The iflags corresponds to the list of ++ * interrupts (Asynchronous Abort, IRQ, FIQ) to unmask. ++ */ ++#define __DEFINE_TRAP_ENTRY(trap, iflags) \ + ALIGN; \ + trap_##trap: \ + SAVE_ALL; \ +- cpsie i; /* local_irq_enable */ \ +- cpsie a; /* asynchronous abort enable */ \ ++ cpsie iflags; \ + adr lr, return_from_trap; \ + mov r0, sp; \ + mov r11, sp; \ + bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ + b do_trap_##trap + +-#define DEFINE_TRAP_ENTRY_NOIRQ(trap) \ +- ALIGN; \ +-trap_##trap: \ +- SAVE_ALL; \ +- cpsie a; /* asynchronous abort enable */ \ +- adr lr, return_from_trap; \ +- mov r0, sp; \ +- mov r11, sp; \ +- bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ +- b do_trap_##trap ++/* Trap handler which unmask IRQ/Abort, keep FIQ masked */ ++#define DEFINE_TRAP_ENTRY(trap) __DEFINE_TRAP_ENTRY(trap, ai) + +-#define DEFINE_TRAP_ENTRY_NOABORT(trap) \ +- ALIGN; \ +-trap_##trap: \ +- SAVE_ALL; \ +- cpsie i; /* local_irq_enable */ \ +- adr lr, return_from_trap; \ +- mov r0, sp; \ +- mov r11, sp; \ +- bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ +- b do_trap_##trap ++/* Trap handler which unmask Abort, keep IRQ/FIQ masked */ ++#define DEFINE_TRAP_ENTRY_NOIRQ(trap) __DEFINE_TRAP_ENTRY(trap, a) ++ ++/* Trap handler which unmask IRQ, keep Abort/FIQ masked */ ++#define DEFINE_TRAP_ENTRY_NOABORT(trap) __DEFINE_TRAP_ENTRY(trap, i) + + .align 5 + GLOBAL(hyp_traps_vector) +-- +2.14.3 + + +From 3caf32c470f2f7eb3452c8a61d6224d10e56f9a3 Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Fri, 2 Feb 2018 14:19:20 +0000 +Subject: [PATCH 14/49] xen/arm32: Add missing MIDR values for Cortex-A17 and + A12 + +Cortex-A17 and A12 MIDR will be used in a follow-up patch for hardening +the branch predictor. + +This is part of XSA-254. + +Signed-off-by: Julien Grall +Reviewed-by: Stefano Stabellini +(cherry picked from commit 340367bca5360f3e3d263341b58234d0efe5ced2) +--- + xen/include/asm-arm/processor.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h +index 466da5da86..c0f79d0093 100644 +--- a/xen/include/asm-arm/processor.h ++++ b/xen/include/asm-arm/processor.h +@@ -44,6 +44,8 @@ + + #define ARM_CPU_IMP_ARM 0x41 + ++#define ARM_CPU_PART_CORTEX_A12 0xC0D ++#define ARM_CPU_PART_CORTEX_A17 0xC0E + #define ARM_CPU_PART_CORTEX_A15 0xC0F + #define ARM_CPU_PART_CORTEX_A53 0xD03 + #define ARM_CPU_PART_CORTEX_A57 0xD07 +@@ -51,6 +53,8 @@ + #define ARM_CPU_PART_CORTEX_A73 0xD09 + #define ARM_CPU_PART_CORTEX_A75 0xD0A + ++#define MIDR_CORTEX_A12 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A12) ++#define MIDR_CORTEX_A17 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A17) + #define MIDR_CORTEX_A15 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A15) + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) + #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) +-- +2.14.3 + + +From 19ad8a7287298f701b557e55e4be689a702194c0 Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Fri, 2 Feb 2018 14:19:21 +0000 +Subject: [PATCH 15/49] xen/arm32: entry: Add missing trap_reset entry + +At the moment, the reset vector is defined as .word 0 (e.g andeq r0, r0, +r0). + +This is rather unintuitive and will result to execute the trap +undefined. Instead introduce trap helpers for reset and will generate an +error message in the unlikely case that reset will be called. + +This is part of XSA-254. + +Signed-off-by: Julien Grall +Reviewed-by: Stefano Stabellini +(cherry picked from commit 00268cc91270c7b0aa3a1906bf7e7702db9c61c1) +--- + xen/arch/arm/arm32/entry.S | 3 ++- + xen/arch/arm/arm32/traps.c | 5 +++++ + 2 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S +index c6490d2847..64876c1184 100644 +--- a/xen/arch/arm/arm32/entry.S ++++ b/xen/arch/arm/arm32/entry.S +@@ -137,7 +137,7 @@ trap_##trap: \ + + .align 5 + GLOBAL(hyp_traps_vector) +- .word 0 /* 0x00 - Reset */ ++ b trap_reset /* 0x00 - Reset */ + b trap_undefined_instruction /* 0x04 - Undefined Instruction */ + b trap_hypervisor_call /* 0x08 - Hypervisor Call */ + b trap_prefetch_abort /* 0x0c - Prefetch Abort */ +@@ -146,6 +146,7 @@ GLOBAL(hyp_traps_vector) + b trap_irq /* 0x18 - IRQ */ + b trap_fiq /* 0x1c - FIQ */ + ++DEFINE_TRAP_ENTRY(reset) + DEFINE_TRAP_ENTRY(undefined_instruction) + DEFINE_TRAP_ENTRY(hypervisor_call) + DEFINE_TRAP_ENTRY(prefetch_abort) +diff --git a/xen/arch/arm/arm32/traps.c b/xen/arch/arm/arm32/traps.c +index 705255883e..4f27543dec 100644 +--- a/xen/arch/arm/arm32/traps.c ++++ b/xen/arch/arm/arm32/traps.c +@@ -23,6 +23,11 @@ + + #include + ++void do_trap_reset(struct cpu_user_regs *regs) ++{ ++ do_unexpected_trap("Reset", regs); ++} ++ + void do_trap_undefined_instruction(struct cpu_user_regs *regs) + { + uint32_t pc = regs->pc; +-- +2.14.3 + + +From c4c0187839bacadc82a5729cea739e8c485f6c60 Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Fri, 2 Feb 2018 14:19:22 +0000 +Subject: [PATCH 16/49] xen/arm32: Add skeleton to harden branch predictor + aliasing attacks + +Aliasing attacked against CPU branch predictors can allow an attacker to +redirect speculative control flow on some CPUs and potentially divulge +information from one context to another. + +This patch adds initiatial skeleton code behind a new Kconfig option +to enable implementation-specific mitigations against these attacks +for CPUs that are affected. + +Most of mitigations will have to be applied when entering to the +hypervisor from the guest context. + +Because the attack is against branch predictor, it is not possible to +safely use branch instruction before the mitigation is applied. +Therefore this has to be done in the vector entry before jump to the +helper handling a given exception. + +However, on arm32, each vector contain a single instruction. This means +that the hardened vector tables may rely on the state of registers that +does not hold when in the hypervisor (e.g SP is 8 bytes aligned). +Therefore hypervisor code running with guest vectors table should be +minimized and always have IRQs and SErrors masked to reduce the risk to +use them. + +This patch provides an infrastructure to switch vector tables before +entering to the guest and when leaving it. + +Note that alternative could have been used, but older Xen (4.8 or +earlier) doesn't have support. So avoid using alternative to ease +backporting. + +This is part of XSA-254. + +Signed-off-by: Julien Grall +Reviewed-by: Stefano Stabellini +(cherry picked from commit 9bd4463b5c7cc026a07b9bbd41a6a7122a95647e) +--- + xen/arch/arm/Kconfig | 3 +++ + xen/arch/arm/arm32/entry.S | 41 ++++++++++++++++++++++++++++++++++++++++- + xen/arch/arm/cpuerrata.c | 30 ++++++++++++++++++++++++++++++ + 3 files changed, 73 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig +index 06fd85cc77..2782ee6589 100644 +--- a/xen/arch/arm/Kconfig ++++ b/xen/arch/arm/Kconfig +@@ -191,6 +191,9 @@ config HARDEN_BRANCH_PREDICTOR + config ARM64_HARDEN_BRANCH_PREDICTOR + def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR + ++config ARM32_HARDEN_BRANCH_PREDICTOR ++ def_bool y if ARM_32 && HARDEN_BRANCH_PREDICTOR ++ + source "common/Kconfig" + + source "drivers/Kconfig" +diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S +index 64876c1184..828e52c25c 100644 +--- a/xen/arch/arm/arm32/entry.S ++++ b/xen/arch/arm/arm32/entry.S +@@ -34,6 +34,20 @@ + blne save_guest_regs + + save_guest_regs: ++#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR ++ /* ++ * Restore vectors table to the default as it may have been ++ * changed when returning to the guest (see ++ * return_to_hypervisor). We need to do that early (e.g before ++ * any interrupts are unmasked) because hardened vectors requires ++ * SP to be 8 bytes aligned. This does not hold when running in ++ * the hypervisor. ++ */ ++ ldr r1, =hyp_traps_vector ++ mcr p15, 4, r1, c12, c0, 0 ++ isb ++#endif ++ + ldr r11, =0xffffffff /* Clobber SP which is only valid for hypervisor frames. */ + str r11, [sp, #UREGS_sp] + SAVE_ONE_BANKED(SP_usr) +@@ -179,12 +193,37 @@ return_to_guest: + RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq); + /* Fall thru */ + return_to_hypervisor: +- cpsid i ++ cpsid ai + ldr lr, [sp, #UREGS_lr] + ldr r11, [sp, #UREGS_pc] + msr ELR_hyp, r11 + ldr r11, [sp, #UREGS_cpsr] + msr SPSR_hyp, r11 ++#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR ++ /* ++ * Hardening branch predictor may require to setup a different ++ * vector tables before returning to the guests. Those vectors ++ * may rely on the state of registers that does not hold when ++ * running in the hypervisor (e.g SP is 8 bytes aligned). So setup ++ * HVBAR very late. ++ * ++ * Default vectors table will be restored on exit (see ++ * save_guest_regs). ++ */ ++ mov r9, #0 /* vector tables = NULL */ ++ /* ++ * Load vector tables pointer from the per-cpu bp_harden_vecs ++ * when returning to the guest only. ++ */ ++ and r11, #PSR_MODE_MASK ++ cmp r11, #PSR_MODE_HYP ++ ldrne r11, =per_cpu__bp_harden_vecs ++ mrcne p15, 4, r10, c13, c0, 2 /* r10 = per-cpu offset (HTPIDR) */ ++ addne r11, r11, r10 /* r11 = offset of the vector tables */ ++ ldrne r9, [r11] /* r9 = vector tables */ ++ cmp r9, #0 /* Only update HVBAR when the vector */ ++ mcrne p15, 4, r9, c12, c0, 0 /* tables is not NULL. */ ++#endif + pop {r0-r12} + add sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */ + clrex +diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c +index f1ea7f3c5b..0a138fa735 100644 +--- a/xen/arch/arm/cpuerrata.c ++++ b/xen/arch/arm/cpuerrata.c +@@ -170,6 +170,36 @@ static int enable_psci_bp_hardening(void *data) + + #endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */ + ++/* Hardening Branch predictor code for Arm32 */ ++#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR ++ ++/* ++ * Per-CPU vector tables to use when returning to the guests. They will ++ * only be used on platform requiring to harden the branch predictor. ++ */ ++DEFINE_PER_CPU_READ_MOSTLY(const char *, bp_harden_vecs); ++ ++extern char hyp_traps_vector_bp_inv[]; ++ ++static void __maybe_unused ++install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry, ++ const char *hyp_vecs, const char *desc) ++{ ++ /* ++ * Enable callbacks are called on every CPU based on the ++ * capabilities. So double-check whether the CPU matches the ++ * entry. ++ */ ++ if ( !entry->matches(entry) ) ++ return; ++ ++ printk(XENLOG_INFO "CPU%u will %s on guest exit\n", ++ smp_processor_id(), desc); ++ this_cpu(bp_harden_vecs) = hyp_vecs; ++} ++ ++#endif ++ + #define MIDR_RANGE(model, min, max) \ + .matches = is_affected_midr_range, \ + .midr_model = model, \ +-- +2.14.3 + + +From f167ebf6b33c4dbdb0135c350c0d927980191ac5 Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Fri, 2 Feb 2018 14:19:23 +0000 +Subject: [PATCH 17/49] xen/arm32: Invalidate BTB on guest exit for Cortex A17 + and 12 + +In order to avoid aliasing attackes agains the branch predictor, let's +invalidate the BTB on guest exist. This is made complicated by the fact +that we cannot take a branch invalidating the BTB. + +This is based on the fourth version posted by Marc Zyngier on Linux-arm +mailing list (see [1]). + +This is part of XSA-254. + +[1] https://www.spinics.net/lists/arm-kernel/msg632062.html + +Signed-off-by: Marc Zyngier +Signed-off-by: Julien Grall +Reviewed-by: Stefano Stabellini +(cherry picked from commit 05e0690d03dc6177e614e060ae78001d4f2abde2) +--- + xen/arch/arm/arm32/entry.S | 38 ++++++++++++++++++++++++++++++++++++++ + xen/arch/arm/cpuerrata.c | 19 +++++++++++++++++++ + 2 files changed, 57 insertions(+) + +diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S +index 828e52c25c..1ebbe4b065 100644 +--- a/xen/arch/arm/arm32/entry.S ++++ b/xen/arch/arm/arm32/entry.S +@@ -160,6 +160,44 @@ GLOBAL(hyp_traps_vector) + b trap_irq /* 0x18 - IRQ */ + b trap_fiq /* 0x1c - FIQ */ + ++#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR ++ ++ .align 5 ++GLOBAL(hyp_traps_vector_bp_inv) ++ /* ++ * We encode the exception entry in the bottom 3 bits of ++ * SP, and we have to guarantee to be 8 bytes aligned. ++ */ ++ add sp, sp, #1 /* Reset 7 */ ++ add sp, sp, #1 /* Undef 6 */ ++ add sp, sp, #1 /* Hypervisor Call 5 */ ++ add sp, sp, #1 /* Prefetch abort 4 */ ++ add sp, sp, #1 /* Data abort 3 */ ++ add sp, sp, #1 /* Hypervisor 2 */ ++ add sp, sp, #1 /* IRQ 1 */ ++ nop /* FIQ 0 */ ++ ++ mcr p15, 0, r0, c7, c5, 6 /* BPIALL */ ++ isb ++ ++.macro vect_br val, targ ++ eor sp, sp, #\val ++ tst sp, #7 ++ eorne sp, sp, #\val ++ beq \targ ++.endm ++ ++ vect_br 0, trap_fiq ++ vect_br 1, trap_irq ++ vect_br 2, trap_guest_sync ++ vect_br 3, trap_data_abort ++ vect_br 4, trap_prefetch_abort ++ vect_br 5, trap_hypervisor_call ++ vect_br 6, trap_undefined_instruction ++ vect_br 7, trap_reset ++ ++#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ ++ + DEFINE_TRAP_ENTRY(reset) + DEFINE_TRAP_ENTRY(undefined_instruction) + DEFINE_TRAP_ENTRY(hypervisor_call) +diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c +index 0a138fa735..c79e6d65d3 100644 +--- a/xen/arch/arm/cpuerrata.c ++++ b/xen/arch/arm/cpuerrata.c +@@ -198,6 +198,13 @@ install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry, + this_cpu(bp_harden_vecs) = hyp_vecs; + } + ++static int enable_bp_inv_hardening(void *data) ++{ ++ install_bp_hardening_vecs(data, hyp_traps_vector_bp_inv, ++ "execute BPIALL"); ++ return 0; ++} ++ + #endif + + #define MIDR_RANGE(model, min, max) \ +@@ -283,6 +290,18 @@ static const struct arm_cpu_capabilities arm_errata[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + .enable = enable_psci_bp_hardening, + }, ++#endif ++#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR ++ { ++ .capability = ARM_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A12), ++ .enable = enable_bp_inv_hardening, ++ }, ++ { ++ .capability = ARM_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A17), ++ .enable = enable_bp_inv_hardening, ++ }, + #endif + {}, + }; +-- +2.14.3 + + +From a69a8b5fdc9cc90aa4faf522c355abd849f11001 Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Fri, 2 Feb 2018 14:19:24 +0000 +Subject: [PATCH 18/49] xen/arm32: Invalidate icache on guest exist for + Cortex-A15 + +In order to avoid aliasing attacks against the branch predictor on +Cortex A-15, let's invalidate the BTB on guest exit, which can only be +done by invalidating the icache (with ACTLR[0] being set). + +We use the same hack as for A12/A17 to perform the vector decoding. + +This is based on Linux patch from the kpti branch in [1]. + +[1] https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git + +Signed-off-by: Marc Zyngier +Signed-off-by: Julien Grall +Reviewed-by: Stefano Stabellini +(cherry picked from commit 665c4b6aa79eb21b1aada9f7f98fb5cb7f03743a) +--- + xen/arch/arm/arm32/entry.S | 21 +++++++++++++++++++++ + xen/arch/arm/cpuerrata.c | 13 +++++++++++++ + 2 files changed, 34 insertions(+) + +diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S +index 1ebbe4b065..2f8b7cb7b8 100644 +--- a/xen/arch/arm/arm32/entry.S ++++ b/xen/arch/arm/arm32/entry.S +@@ -162,6 +162,26 @@ GLOBAL(hyp_traps_vector) + + #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + ++ .align 5 ++GLOBAL(hyp_traps_vector_ic_inv) ++ /* ++ * We encode the exception entry in the bottom 3 bits of ++ * SP, and we have to guarantee to be 8 bytes aligned. ++ */ ++ add sp, sp, #1 /* Reset 7 */ ++ add sp, sp, #1 /* Undef 6 */ ++ add sp, sp, #1 /* Hypervisor call 5 */ ++ add sp, sp, #1 /* Prefetch abort 4 */ ++ add sp, sp, #1 /* Data abort 3 */ ++ add sp, sp, #1 /* Hypervisor 2 */ ++ add sp, sp, #1 /* IRQ 1 */ ++ nop /* FIQ 0 */ ++ ++ mcr p15, 0, r0, c7, c5, 0 /* ICIALLU */ ++ isb ++ ++ b decode_vectors ++ + .align 5 + GLOBAL(hyp_traps_vector_bp_inv) + /* +@@ -180,6 +200,7 @@ GLOBAL(hyp_traps_vector_bp_inv) + mcr p15, 0, r0, c7, c5, 6 /* BPIALL */ + isb + ++decode_vectors: + .macro vect_br val, targ + eor sp, sp, #\val + tst sp, #7 +diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c +index c79e6d65d3..9c7458ef06 100644 +--- a/xen/arch/arm/cpuerrata.c ++++ b/xen/arch/arm/cpuerrata.c +@@ -180,6 +180,7 @@ static int enable_psci_bp_hardening(void *data) + DEFINE_PER_CPU_READ_MOSTLY(const char *, bp_harden_vecs); + + extern char hyp_traps_vector_bp_inv[]; ++extern char hyp_traps_vector_ic_inv[]; + + static void __maybe_unused + install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry, +@@ -205,6 +206,13 @@ static int enable_bp_inv_hardening(void *data) + return 0; + } + ++static int enable_ic_inv_hardening(void *data) ++{ ++ install_bp_hardening_vecs(data, hyp_traps_vector_ic_inv, ++ "execute ICIALLU"); ++ return 0; ++} ++ + #endif + + #define MIDR_RANGE(model, min, max) \ +@@ -302,6 +310,11 @@ static const struct arm_cpu_capabilities arm_errata[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A17), + .enable = enable_bp_inv_hardening, + }, ++ { ++ .capability = ARM_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A15), ++ .enable = enable_ic_inv_hardening, ++ }, + #endif + {}, + }; +-- +2.14.3 + + +From bbd093c5033d87c0043cf90aa782efdc141dc0e7 Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Fri, 2 Feb 2018 14:19:25 +0000 +Subject: [PATCH 19/49] xen/arm32: entry: Document the purpose of r11 in the + traps handler + +It took me a bit of time to understand why __DEFINE_TRAP_ENTRY is +storing the original stack pointer in r11. It is working in pair with +return_traps_entry where sp will be restored from r11. + +This is fine because per the AAPCS r11 must be preserved by the +subroutine. So in return_from_trap, r11 will still contain the original +stack pointer. + +Add some documentation in the code to point the 2 sides to each other. + +Signed-off-by: Julien Grall +Reviewed-by: Stefano Stabellini +(cherry picked from commit dd855aa430f2da9b677c145f0c625a82aaa97110) +--- + xen/arch/arm/arm32/entry.S | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S +index 2f8b7cb7b8..f6908e3f16 100644 +--- a/xen/arch/arm/arm32/entry.S ++++ b/xen/arch/arm/arm32/entry.S +@@ -136,6 +136,10 @@ trap_##trap: \ + cpsie iflags; \ + adr lr, return_from_trap; \ + mov r0, sp; \ ++ /* \ ++ * Save the stack pointer in r11. It will be restored after the \ ++ * trap has been handled (see return_from_trap). \ ++ */ \ + mov r11, sp; \ + bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ + b do_trap_##trap +@@ -229,6 +233,10 @@ DEFINE_TRAP_ENTRY_NOIRQ(fiq) + DEFINE_TRAP_ENTRY_NOABORT(data_abort) + + return_from_trap: ++ /* ++ * Restore the stack pointer from r11. It was saved on exception ++ * entry (see __DEFINE_TRAP_ENTRY). ++ */ + mov sp, r11 + ENTRY(return_to_new_vcpu32) + ldr r11, [sp, #UREGS_cpsr] +-- +2.14.3 + + +From 79012ead937f0533ec591c4ece925e4d23568874 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 10:54:11 +0100 +Subject: [PATCH 20/49] x86/alt: Break out alternative-asm into a separate + header file + +Signed-off-by: Andrew Cooper +Reviewed-by: Wei Liu +Acked-by: Jan Beulich +master commit: 9d7b4351d3bb5c744db311cffa57ba3ebb583327 +master date: 2018-01-05 19:57:07 +0000 +--- + xen/include/asm-x86/alternative-asm.h | 31 +++++++++++++++++++++++++++++++ + xen/include/asm-x86/alternative.h | 13 +++---------- + 2 files changed, 34 insertions(+), 10 deletions(-) + create mode 100644 xen/include/asm-x86/alternative-asm.h + +diff --git a/xen/include/asm-x86/alternative-asm.h b/xen/include/asm-x86/alternative-asm.h +new file mode 100644 +index 0000000000..bf0332ef28 +--- /dev/null ++++ b/xen/include/asm-x86/alternative-asm.h +@@ -0,0 +1,31 @@ ++#ifndef _ASM_X86_ALTERNATIVE_ASM_H_ ++#define _ASM_X86_ALTERNATIVE_ASM_H_ ++ ++#ifdef __ASSEMBLY__ ++ ++/* ++ * Issue one struct alt_instr descriptor entry (need to put it into ++ * the section .altinstructions, see below). This entry contains ++ * enough information for the alternatives patching code to patch an ++ * instruction. See apply_alternatives(). ++ */ ++.macro altinstruction_entry orig alt feature orig_len alt_len ++ .long \orig - . ++ .long \alt - . ++ .word \feature ++ .byte \orig_len ++ .byte \alt_len ++.endm ++ ++#endif /* __ASSEMBLY__ */ ++#endif /* _ASM_X86_ALTERNATIVE_ASM_H_ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-file-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +diff --git a/xen/include/asm-x86/alternative.h b/xen/include/asm-x86/alternative.h +index db4f08e0e7..ba537d6b7e 100644 +--- a/xen/include/asm-x86/alternative.h ++++ b/xen/include/asm-x86/alternative.h +@@ -1,17 +1,10 @@ + #ifndef __X86_ALTERNATIVE_H__ + #define __X86_ALTERNATIVE_H__ + ++#include + #include + +-#ifdef __ASSEMBLY__ +-.macro altinstruction_entry orig alt feature orig_len alt_len +- .long \orig - . +- .long \alt - . +- .word \feature +- .byte \orig_len +- .byte \alt_len +-.endm +-#else ++#ifndef __ASSEMBLY__ + #include + #include + +@@ -145,6 +138,6 @@ extern void alternative_instructions(void); + /* Use this macro(s) if you need more than one output parameter. */ + #define ASM_OUTPUT2(a...) a + +-#endif /* __ASSEMBLY__ */ ++#endif /* !__ASSEMBLY__ */ + + #endif /* __X86_ALTERNATIVE_H__ */ +-- +2.14.3 + + +From be3138b6f65955196d67c1d54aea3d6a3bf33934 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 10:55:11 +0100 +Subject: [PATCH 21/49] x86/alt: Introduce ALTERNATIVE{,_2} macros + +To help creating alternative frames in assembly. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: 4711428f5e2a9bfff9f8d75b6a696072118c19a4 +master date: 2018-01-05 19:57:07 +0000 +--- + xen/include/asm-x86/alternative-asm.h | 46 +++++++++++++++++++++++++++++++++++ + 1 file changed, 46 insertions(+) + +diff --git a/xen/include/asm-x86/alternative-asm.h b/xen/include/asm-x86/alternative-asm.h +index bf0332ef28..6640e85581 100644 +--- a/xen/include/asm-x86/alternative-asm.h ++++ b/xen/include/asm-x86/alternative-asm.h +@@ -17,6 +17,52 @@ + .byte \alt_len + .endm + ++.macro ALTERNATIVE oldinstr, newinstr, feature ++.Lold_start_\@: ++ \oldinstr ++.Lold_end_\@: ++ ++ .pushsection .altinstructions, "a", @progbits ++ altinstruction_entry .Lold_start_\@, .Lnew_start_\@, \feature, \ ++ (.Lold_end_\@ - .Lold_start_\@), (.Lnew_end_\@ - .Lnew_start_\@) ++ ++ .section .discard, "a", @progbits ++ /* Assembler-time check that \newinstr isn't longer than \oldinstr. */ ++ .byte 0xff + (.Lnew_end_\@ - .Lnew_start_\@) - (.Lold_end_\@ - .Lold_start_\@) ++ ++ .section .altinstr_replacement, "ax", @progbits ++.Lnew_start_\@: ++ \newinstr ++.Lnew_end_\@: ++ .popsection ++.endm ++ ++.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 ++.Lold_start_\@: ++ \oldinstr ++.Lold_end_\@: ++ ++ .pushsection .altinstructions, "a", @progbits ++ altinstruction_entry .Lold_start_\@, .Lnew1_start_\@, \feature1, \ ++ (.Lold_end_\@ - .Lold_start_\@), (.Lnew1_end_\@ - .Lnew1_start_\@) ++ altinstruction_entry .Lold_start_\@, .Lnew2_start_\@, \feature2, \ ++ (.Lold_end_\@ - .Lold_start_\@), (.Lnew2_end_\@ - .Lnew2_start_\@) ++ ++ .section .discard, "a", @progbits ++ /* Assembler-time check that \newinstr{1,2} aren't longer than \oldinstr. */ ++ .byte 0xff + (.Lnew1_end_\@ - .Lnew1_start_\@) - (.Lold_end_\@ - .Lold_start_\@) ++ .byte 0xff + (.Lnew2_end_\@ - .Lnew2_start_\@) - (.Lold_end_\@ - .Lold_start_\@) ++ ++ .section .altinstr_replacement, "ax", @progbits ++.Lnew1_start_\@: ++ \newinstr1 ++.Lnew1_end_\@: ++.Lnew2_start_\@: ++ \newinstr2 ++.Lnew2_end_\@: ++ .popsection ++.endm ++ + #endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_ALTERNATIVE_ASM_H_ */ + +-- +2.14.3 + + +From c534ab4e940ae3fbddf0b4840c3549c03654921f Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 10:56:01 +0100 +Subject: [PATCH 22/49] x86/hvm: Rename update_guest_vendor() callback to + cpuid_policy_changed() + +It will shortly be used for more than just changing the vendor. + +Signed-off-by: Andrew Cooper +Reviewed-by: Wei Liu +Reviewed-by: Jan Beulich +master commit: 3bea00966eb6680410c89df764d075a8fbacc3cc +master date: 2018-01-05 19:57:07 +0000 +--- + xen/arch/x86/domctl.c | 17 ++++++++++------- + xen/arch/x86/hvm/hvm.c | 2 +- + xen/arch/x86/hvm/svm/svm.c | 4 ++-- + xen/arch/x86/hvm/vmx/vmx.c | 5 ++--- + xen/include/asm-x86/hvm/hvm.h | 6 +++--- + 5 files changed, 18 insertions(+), 16 deletions(-) + +diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c +index 075ee92cd7..fbb4c5e758 100644 +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -53,6 +53,7 @@ static int update_domain_cpuid_info(struct domain *d, + struct cpuid_policy *p = d->arch.cpuid; + const struct cpuid_leaf leaf = { ctl->eax, ctl->ebx, ctl->ecx, ctl->edx }; + int old_vendor = p->x86_vendor; ++ bool call_policy_changed = false; /* Avoid for_each_vcpu() unnecessarily */ + + /* + * Skip update for leaves we don't care about. This avoids the overhead +@@ -128,13 +129,7 @@ static int update_domain_cpuid_info(struct domain *d, + switch ( ctl->input[0] ) + { + case 0: +- if ( is_hvm_domain(d) && (p->x86_vendor != old_vendor) ) +- { +- struct vcpu *v; +- +- for_each_vcpu( d, v ) +- hvm_update_guest_vendor(v); +- } ++ call_policy_changed = (p->x86_vendor != old_vendor); + break; + + case 1: +@@ -299,6 +294,14 @@ static int update_domain_cpuid_info(struct domain *d, + break; + } + ++ if ( is_hvm_domain(d) && call_policy_changed ) ++ { ++ struct vcpu *v; ++ ++ for_each_vcpu( d, v ) ++ hvm_cpuid_policy_changed(v); ++ } ++ + return 0; + } + +diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c +index 9f7b096072..f5de233b2e 100644 +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -1555,7 +1555,7 @@ int hvm_vcpu_initialise(struct vcpu *v) + hvm_set_guest_tsc(v, 0); + } + +- hvm_update_guest_vendor(v); ++ hvm_cpuid_policy_changed(v); + + return 0; + +diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c +index b9cf423fd9..b5b927933f 100644 +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -613,7 +613,7 @@ static void svm_update_guest_efer(struct vcpu *v) + vmcb_set_efer(vmcb, new_efer); + } + +-static void svm_update_guest_vendor(struct vcpu *v) ++static void svm_cpuid_policy_changed(struct vcpu *v) + { + struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; + struct vmcb_struct *vmcb = arch_svm->vmcb; +@@ -2422,7 +2422,7 @@ static struct hvm_function_table __initdata svm_function_table = { + .get_shadow_gs_base = svm_get_shadow_gs_base, + .update_guest_cr = svm_update_guest_cr, + .update_guest_efer = svm_update_guest_efer, +- .update_guest_vendor = svm_update_guest_vendor, ++ .cpuid_policy_changed = svm_cpuid_policy_changed, + .fpu_leave = svm_fpu_leave, + .set_guest_pat = svm_set_guest_pat, + .get_guest_pat = svm_get_guest_pat, +diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c +index 73254bf5d4..4221fb8c56 100644 +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -72,7 +72,6 @@ static void vmx_free_vlapic_mapping(struct domain *d); + static void vmx_install_vlapic_mapping(struct vcpu *v); + static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr); + static void vmx_update_guest_efer(struct vcpu *v); +-static void vmx_update_guest_vendor(struct vcpu *v); + static void vmx_wbinvd_intercept(void); + static void vmx_fpu_dirty_intercept(void); + static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content); +@@ -655,7 +654,7 @@ void vmx_update_exception_bitmap(struct vcpu *v) + __vmwrite(EXCEPTION_BITMAP, bitmap); + } + +-static void vmx_update_guest_vendor(struct vcpu *v) ++static void vmx_cpuid_policy_changed(struct vcpu *v) + { + if ( opt_hvm_fep || + (v->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor) ) +@@ -2318,7 +2317,7 @@ static struct hvm_function_table __initdata vmx_function_table = { + .update_host_cr3 = vmx_update_host_cr3, + .update_guest_cr = vmx_update_guest_cr, + .update_guest_efer = vmx_update_guest_efer, +- .update_guest_vendor = vmx_update_guest_vendor, ++ .cpuid_policy_changed = vmx_cpuid_policy_changed, + .fpu_leave = vmx_fpu_leave, + .set_guest_pat = vmx_set_guest_pat, + .get_guest_pat = vmx_get_guest_pat, +diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h +index 6ecad33316..7275c65d07 100644 +--- a/xen/include/asm-x86/hvm/hvm.h ++++ b/xen/include/asm-x86/hvm/hvm.h +@@ -135,7 +135,7 @@ struct hvm_function_table { + void (*update_guest_cr)(struct vcpu *v, unsigned int cr); + void (*update_guest_efer)(struct vcpu *v); + +- void (*update_guest_vendor)(struct vcpu *v); ++ void (*cpuid_policy_changed)(struct vcpu *v); + + void (*fpu_leave)(struct vcpu *v); + +@@ -334,9 +334,9 @@ static inline void hvm_update_guest_efer(struct vcpu *v) + hvm_funcs.update_guest_efer(v); + } + +-static inline void hvm_update_guest_vendor(struct vcpu *v) ++static inline void hvm_cpuid_policy_changed(struct vcpu *v) + { +- hvm_funcs.update_guest_vendor(v); ++ hvm_funcs.cpuid_policy_changed(v); + } + + /* +-- +2.14.3 + + +From e32f814160c95094da83fbc813b45eca42d5397a Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 10:56:51 +0100 +Subject: [PATCH 23/49] x86: Introduce a common cpuid_policy_updated() + +No practical change at the moment, but future changes will need to react +irrespective of guest type. + +Signed-off-by: Andrew Cooper +Reviewed-by: Wei Liu +Acked-by: Jan Beulich +master commit: b357546b43ab87dfb10d740ae637a685134d5e32 +master date: 2018-01-05 19:57:07 +0000 +--- + xen/arch/x86/domain.c | 12 ++++++++++++ + xen/arch/x86/domctl.c | 4 ++-- + xen/arch/x86/hvm/hvm.c | 2 -- + xen/include/asm-x86/domain.h | 2 ++ + 4 files changed, 16 insertions(+), 4 deletions(-) + +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index b357b60f73..aaa2b28413 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -363,6 +363,8 @@ int vcpu_initialise(struct vcpu *v) + + if ( (rc = init_vcpu_msr_policy(v)) ) + goto fail; ++ ++ cpuid_policy_updated(v); + } + + return rc; +@@ -2026,6 +2028,16 @@ int domain_relinquish_resources(struct domain *d) + return 0; + } + ++/* ++ * Called during vcpu construction, and each time the toolstack changes the ++ * CPUID configuration for the domain. ++ */ ++void cpuid_policy_updated(struct vcpu *v) ++{ ++ if ( is_hvm_vcpu(v) ) ++ hvm_cpuid_policy_changed(v); ++} ++ + void arch_dump_domain_info(struct domain *d) + { + paging_dump_domain_info(d); +diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c +index fbb4c5e758..e61201267b 100644 +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -294,12 +294,12 @@ static int update_domain_cpuid_info(struct domain *d, + break; + } + +- if ( is_hvm_domain(d) && call_policy_changed ) ++ if ( call_policy_changed ) + { + struct vcpu *v; + + for_each_vcpu( d, v ) +- hvm_cpuid_policy_changed(v); ++ cpuid_policy_updated(v); + } + + return 0; +diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c +index f5de233b2e..2a3dd4ee91 100644 +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -1555,8 +1555,6 @@ int hvm_vcpu_initialise(struct vcpu *v) + hvm_set_guest_tsc(v, 0); + } + +- hvm_cpuid_policy_changed(v); +- + return 0; + + fail6: +diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h +index f69911918e..4679d5477d 100644 +--- a/xen/include/asm-x86/domain.h ++++ b/xen/include/asm-x86/domain.h +@@ -79,6 +79,8 @@ void toggle_guest_mode(struct vcpu *); + /* x86/64: toggle guest page tables between kernel and user modes. */ + void toggle_guest_pt(struct vcpu *); + ++void cpuid_policy_updated(struct vcpu *v); ++ + /* + * Initialise a hypercall-transfer page. The given pointer must be mapped + * in Xen virtual address space (accesses are not validated or checked). +-- +2.14.3 + + +From d02ef3d27485e1429ac480cca78ab3636387df23 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 10:57:44 +0100 +Subject: [PATCH 24/49] x86/entry: Rearrange RESTORE_ALL to restore register in + stack order + +Results in a more predictable (i.e. linear) memory access pattern. + +No functional change. + +This is part of XSA-254. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +Reviewed-by: Wei Liu +master commit: f85d105e27735f0e20aa30d77f03774f3ed55ae5 +master date: 2018-01-05 19:57:08 +0000 +--- + xen/include/asm-x86/asm_defns.h | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h +index fb0fee9286..c0523861d9 100644 +--- a/xen/include/asm-x86/asm_defns.h ++++ b/xen/include/asm-x86/asm_defns.h +@@ -288,6 +288,19 @@ static always_inline void stac(void) + * safety against information leaks. + */ + .macro RESTORE_ALL adj=0 compat=0 ++.if !\compat ++ movq UREGS_r15(%rsp), %r15 ++ movq UREGS_r14(%rsp), %r14 ++ movq UREGS_r13(%rsp), %r13 ++ movq UREGS_r12(%rsp), %r12 ++.else ++ xor %r15, %r15 ++ xor %r14, %r14 ++ xor %r13, %r13 ++ xor %r12, %r12 ++.endif ++ LOAD_ONE_REG(bp, \compat) ++ LOAD_ONE_REG(bx, \compat) + .if !\compat + movq UREGS_r11(%rsp),%r11 + movq UREGS_r10(%rsp),%r10 +@@ -304,19 +317,6 @@ static always_inline void stac(void) + LOAD_ONE_REG(dx, \compat) + LOAD_ONE_REG(si, \compat) + LOAD_ONE_REG(di, \compat) +-.if !\compat +- movq UREGS_r15(%rsp),%r15 +- movq UREGS_r14(%rsp),%r14 +- movq UREGS_r13(%rsp),%r13 +- movq UREGS_r12(%rsp),%r12 +-.else +- xor %r15, %r15 +- xor %r14, %r14 +- xor %r13, %r13 +- xor %r12, %r12 +-.endif +- LOAD_ONE_REG(bp, \compat) +- LOAD_ONE_REG(bx, \compat) + subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp + .endm + +-- +2.14.3 + + +From ab95cb0d948fdc9fcda215fec0526ac902340b14 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:00:45 +0100 +Subject: [PATCH 25/49] x86/hvm: Use SAVE_ALL to construct the cpu_user_regs + frame after VMExit + +No practical change. + +One side effect in debug builds is that %rbp is inverted in the manner +expected by the stack unwinder to indicate a interrupt frame. + +This is part of XSA-254. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +Reviewed-by: Wei Liu +master commit: 13682ca8c94bd5612a44f7f1edc1fd8ff675dacb +master date: 2018-01-05 19:57:08 +0000 +--- + xen/arch/x86/hvm/svm/entry.S | 22 ++++------------------ + xen/arch/x86/hvm/vmx/entry.S | 17 ++--------------- + 2 files changed, 6 insertions(+), 33 deletions(-) + +diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S +index 4a72e38e8b..df86da0a81 100644 +--- a/xen/arch/x86/hvm/svm/entry.S ++++ b/xen/arch/x86/hvm/svm/entry.S +@@ -98,24 +98,10 @@ UNLIKELY_END(svm_trace) + + VMRUN + +- GET_CURRENT(ax) +- push %rdi +- push %rsi +- push %rdx +- push %rcx +- mov VCPU_svm_vmcb(%rax),%rcx +- push %rax +- push %r8 +- push %r9 +- push %r10 +- push %r11 +- push %rbx +- mov %rax,%rbx +- push %rbp +- push %r12 +- push %r13 +- push %r14 +- push %r15 ++ SAVE_ALL ++ ++ GET_CURRENT(bx) ++ mov VCPU_svm_vmcb(%rbx),%rcx + + movb $0,VCPU_svm_vmcb_in_sync(%rbx) + mov VMCB_rax(%rcx),%rax +diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S +index 47cd674260..b2f98be7f5 100644 +--- a/xen/arch/x86/hvm/vmx/entry.S ++++ b/xen/arch/x86/hvm/vmx/entry.S +@@ -30,23 +30,10 @@ + #define VMLAUNCH .byte 0x0f,0x01,0xc2 + + ENTRY(vmx_asm_vmexit_handler) +- push %rdi +- push %rsi +- push %rdx +- push %rcx +- push %rax ++ SAVE_ALL ++ + mov %cr2,%rax +- push %r8 +- push %r9 +- push %r10 +- push %r11 +- push %rbx + GET_CURRENT(bx) +- push %rbp +- push %r12 +- push %r13 +- push %r14 +- push %r15 + + movb $1,VCPU_vmx_launched(%rbx) + mov %rax,VCPU_hvm_guest_cr2(%rbx) +-- +2.14.3 + + +From 1830b20b6b83be38738784ea162d62fcf85f3178 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:01:29 +0100 +Subject: [PATCH 26/49] x86/entry: Erase guest GPR state on entry to Xen + +This reduces the number of code gadgets which can be attacked with arbitrary +guest-controlled GPR values. + +This is part of XSA-254. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +Reviewed-by: Wei Liu +master commit: 03bd8c3a70d101fc2f8f36f1e171b7594462a4cd +master date: 2018-01-05 19:57:08 +0000 +--- + xen/include/asm-x86/asm_defns.h | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h +index c0523861d9..73d96227f4 100644 +--- a/xen/include/asm-x86/asm_defns.h ++++ b/xen/include/asm-x86/asm_defns.h +@@ -247,22 +247,34 @@ static always_inline void stac(void) + addq $-(UREGS_error_code-UREGS_r15), %rsp + cld + movq %rdi,UREGS_rdi(%rsp) ++ xor %edi, %edi + movq %rsi,UREGS_rsi(%rsp) ++ xor %esi, %esi + movq %rdx,UREGS_rdx(%rsp) ++ xor %edx, %edx + movq %rcx,UREGS_rcx(%rsp) ++ xor %ecx, %ecx + movq %rax,UREGS_rax(%rsp) ++ xor %eax, %eax + .if !\compat + movq %r8,UREGS_r8(%rsp) + movq %r9,UREGS_r9(%rsp) + movq %r10,UREGS_r10(%rsp) + movq %r11,UREGS_r11(%rsp) + .endif ++ xor %r8, %r8 ++ xor %r9, %r9 ++ xor %r10, %r10 ++ xor %r11, %r11 + movq %rbx,UREGS_rbx(%rsp) ++ xor %ebx, %ebx + movq %rbp,UREGS_rbp(%rsp) + #ifdef CONFIG_FRAME_POINTER + /* Indicate special exception stack frame by inverting the frame pointer. */ + leaq UREGS_rbp(%rsp), %rbp + notq %rbp ++#else ++ xor %ebp, %ebp + #endif + .if !\compat + movq %r12,UREGS_r12(%rsp) +@@ -270,6 +282,10 @@ static always_inline void stac(void) + movq %r14,UREGS_r14(%rsp) + movq %r15,UREGS_r15(%rsp) + .endif ++ xor %r12, %r12 ++ xor %r13, %r13 ++ xor %r14, %r14 ++ xor %r15, %r15 + .endm + + #define LOAD_ONE_REG(reg, compat) \ +-- +2.14.3 + + +From 8743fc2ef7d107104c17b773eadee15fefa64e53 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:02:14 +0100 +Subject: [PATCH 27/49] common/wait: Clarifications to wait infrastructure + +This logic is not as clear as it could be. Add some comments to help. + +Rearrange the asm block in __prepare_to_wait() to separate the GPR +saving/restoring from the internal logic. + +While tweaking, add an unreachable() following the jmp in +check_wakeup_from_wait(). + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: 2d1c82261d966735e82e5971eddb63ba3c565a37 +master date: 2018-01-05 19:57:08 +0000 +--- + xen/common/wait.c | 31 ++++++++++++++++++++++++------- + 1 file changed, 24 insertions(+), 7 deletions(-) + +diff --git a/xen/common/wait.c b/xen/common/wait.c +index c5fc094e2c..3d3d9fe7a2 100644 +--- a/xen/common/wait.c ++++ b/xen/common/wait.c +@@ -138,14 +138,26 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv) + domain_crash_synchronous(); + } + ++ /* Hand-rolled setjmp(). */ + asm volatile ( +- "push %%rax; push %%rbx; push %%rdx; " +- "push %%rbp; push %%r8; push %%r9; push %%r10; push %%r11; " +- "push %%r12; push %%r13; push %%r14; push %%r15; call 1f; " +- "1: addq $2f-1b,(%%rsp); sub %%esp,%%ecx; cmp %3,%%ecx; ja 3f; " +- "mov %%rsp,%%rsi; 2: rep movsb; mov %%rsp,%%rsi; 3: pop %%rax; " +- "pop %%r15; pop %%r14; pop %%r13; pop %%r12; " +- "pop %%r11; pop %%r10; pop %%r9; pop %%r8; " ++ "push %%rax; push %%rbx; push %%rdx; push %%rbp;" ++ "push %%r8; push %%r9; push %%r10; push %%r11;" ++ "push %%r12; push %%r13; push %%r14; push %%r15;" ++ ++ "call 1f;" ++ "1: addq $2f-1b,(%%rsp);" ++ "sub %%esp,%%ecx;" ++ "cmp %3,%%ecx;" ++ "ja 3f;" ++ "mov %%rsp,%%rsi;" ++ ++ /* check_wakeup_from_wait() longjmp()'s to this point. */ ++ "2: rep movsb;" ++ "mov %%rsp,%%rsi;" ++ "3: pop %%rax;" ++ ++ "pop %%r15; pop %%r14; pop %%r13; pop %%r12;" ++ "pop %%r11; pop %%r10; pop %%r9; pop %%r8;" + "pop %%rbp; pop %%rdx; pop %%rbx; pop %%rax" + : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy) + : "i" (PAGE_SIZE), "0" (0), "1" (cpu_info), "2" (wqv->stack) +@@ -189,11 +201,16 @@ void check_wakeup_from_wait(void) + wait(); /* takes us back into the scheduler */ + } + ++ /* ++ * Hand-rolled longjmp(). Returns to the pointer on the top of ++ * wqv->stack, and lands on a `rep movs` instruction. ++ */ + asm volatile ( + "mov %1,%%"__OP"sp; jmp *(%0)" + : : "S" (wqv->stack), "D" (wqv->esp), + "c" ((char *)get_cpu_info() - (char *)wqv->esp) + : "memory" ); ++ unreachable(); + } + + #else /* !CONFIG_X86 */ +-- +2.14.3 + + +From 47bbcb2dd1291d61062fe58da807010631fe1b3a Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:03:45 +0100 +Subject: [PATCH 28/49] x86: Support compiling with indirect branch thunks + +Use -mindirect-branch=thunk-extern/-mindirect-branch-register when available. +To begin with, use the retpoline thunk. Later work will add alternative +thunks which can be selected at boot time. + +This is part of XSA-254. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +master commit: 3659f0f4bcc6ca08103d1a7ae4e97535ecc978be +master date: 2018-01-16 17:45:50 +0000 +--- + xen/arch/x86/Makefile | 1 + + xen/arch/x86/Rules.mk | 7 +++++++ + xen/arch/x86/indirect-thunk.S | 38 ++++++++++++++++++++++++++++++++++++++ + xen/arch/x86/xen.lds.S | 1 + + 4 files changed, 47 insertions(+) + create mode 100644 xen/arch/x86/indirect-thunk.S + +diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile +index d5d58a205e..b334366db8 100644 +--- a/xen/arch/x86/Makefile ++++ b/xen/arch/x86/Makefile +@@ -36,6 +36,7 @@ obj-y += io_apic.o + obj-$(CONFIG_LIVEPATCH) += alternative.o livepatch.o + obj-y += msi.o + obj-y += msr.o ++obj-$(CONFIG_INDIRECT_THUNK) += indirect-thunk.o + obj-y += ioport_emulate.o + obj-y += irq.o + obj-$(CONFIG_KEXEC) += machine_kexec.o +diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk +index 568657ee52..abcc4d4f70 100644 +--- a/xen/arch/x86/Rules.mk ++++ b/xen/arch/x86/Rules.mk +@@ -30,3 +30,10 @@ CFLAGS += -fno-asynchronous-unwind-tables + ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n) + CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE + endif ++ ++# Compile with thunk-extern, indirect-branch-register if avaiable. ++ifneq ($(call cc-option,$(CC),-mindirect-branch-register,n),n) ++CFLAGS += -mindirect-branch=thunk-extern -mindirect-branch-register ++CFLAGS += -DCONFIG_INDIRECT_THUNK ++export CONFIG_INDIRECT_THUNK=y ++endif +diff --git a/xen/arch/x86/indirect-thunk.S b/xen/arch/x86/indirect-thunk.S +new file mode 100644 +index 0000000000..3eaf505d0e +--- /dev/null ++++ b/xen/arch/x86/indirect-thunk.S +@@ -0,0 +1,38 @@ ++/* ++ * Implement __x86_indirect_thunk_* symbols for use with compatbile compilers ++ * and the -mindirect-branch=thunk-extern -mindirect-branch-register options. ++ * ++ * Copyright (c) 2017-2018 Citrix Systems Ltd. ++ * ++ * This source code is licensed under the GNU General Public License, ++ * Version 2. See the file COPYING for more details. ++ */ ++ .file __FILE__ ++ ++#include ++ ++.macro IND_THUNK_RETPOLINE reg:req ++ call 2f ++1: ++ lfence ++ jmp 1b ++2: ++ mov %\reg, (%rsp) ++ ret ++.endm ++ ++/* ++ * Build the __x86_indirect_thunk_* symbols. Currently implement the ++ * retpoline thunk only. ++ */ ++.macro GEN_INDIRECT_THUNK reg:req ++ .section .text.__x86_indirect_thunk_\reg, "ax", @progbits ++ ++ENTRY(__x86_indirect_thunk_\reg) ++ IND_THUNK_RETPOLINE \reg ++.endm ++ ++/* Instantiate GEN_INDIRECT_THUNK for each register except %rsp. */ ++.irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15 ++ GEN_INDIRECT_THUNK reg=r\reg ++.endr +diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S +index d5e8821d41..d3c984a463 100644 +--- a/xen/arch/x86/xen.lds.S ++++ b/xen/arch/x86/xen.lds.S +@@ -59,6 +59,7 @@ SECTIONS + _stext = .; /* Text and read-only data */ + *(.text) + *(.text.page_aligned) ++ *(.text.__x86_indirect_thunk_*) + *(.text.cold) + *(.text.unlikely) + *(.fixup) +-- +2.14.3 + + +From 32babfc19ad3a3123f8ed4466df3c79492a2212b Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:04:53 +0100 +Subject: [PATCH 29/49] x86: Support indirect thunks from assembly code + +Introduce INDIRECT_CALL and INDIRECT_JMP which either degrade to a normal +indirect branch, or dispatch to the __x86_indirect_thunk_* symbols. + +Update all the manual indirect branches in to use the new thunks. The +indirect branches in the early boot and kexec path are left intact as we can't +use the compiled-in thunks at those points. + +This is part of XSA-254. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: 7c508612f7a5096b4819d4ef2ce566e01bd66c0c +master date: 2018-01-16 17:45:50 +0000 +--- + xen/Rules.mk | 4 ++-- + xen/arch/x86/Rules.mk | 6 +++++ + xen/arch/x86/boot/trampoline.S | 24 +++++++++++++++++-- + xen/arch/x86/extable.c | 4 ++-- + xen/arch/x86/pv/emul-priv-op.c | 39 +++++++++++++++++++++--------- + xen/arch/x86/x86_64/entry.S | 6 +++-- + xen/arch/x86/x86_emulate/x86_emulate.c | 4 ++-- + xen/common/wait.c | 8 ++++--- + xen/include/asm-x86/asm_defns.h | 8 +++++++ + xen/include/asm-x86/indirect_thunk_asm.h | 41 ++++++++++++++++++++++++++++++++ + 10 files changed, 120 insertions(+), 24 deletions(-) + create mode 100644 xen/include/asm-x86/indirect_thunk_asm.h + +diff --git a/xen/Rules.mk b/xen/Rules.mk +index 2659f8a4d1..3cf40754a6 100644 +--- a/xen/Rules.mk ++++ b/xen/Rules.mk +@@ -66,8 +66,8 @@ endif + + AFLAGS-y += -D__ASSEMBLY__ + +-# Clang's built-in assembler can't handle .code16/.code32/.code64 yet +-AFLAGS-$(clang) += -no-integrated-as ++# Clang's built-in assembler can't handle embedded .include's ++CFLAGS-$(clang) += -no-integrated-as + + ALL_OBJS := $(ALL_OBJS-y) + +diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk +index abcc4d4f70..70e9d8f5cf 100644 +--- a/xen/arch/x86/Rules.mk ++++ b/xen/arch/x86/Rules.mk +@@ -37,3 +37,9 @@ CFLAGS += -mindirect-branch=thunk-extern -mindirect-branch-register + CFLAGS += -DCONFIG_INDIRECT_THUNK + export CONFIG_INDIRECT_THUNK=y + endif ++ ++# Set up the assembler include path properly for older GCC toolchains. Clang ++# objects to the agument being passed however. ++ifneq ($(clang),y) ++CFLAGS += -Wa,-I$(BASEDIR)/include ++endif +diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S +index 4d640f3fcd..f70d913544 100644 +--- a/xen/arch/x86/boot/trampoline.S ++++ b/xen/arch/x86/boot/trampoline.S +@@ -153,8 +153,28 @@ trampoline_protmode_entry: + .code64 + start64: + /* Jump to high mappings. */ +- movabs $__high_start,%rax +- jmpq *%rax ++ movabs $__high_start, %rdi ++ ++#ifdef CONFIG_INDIRECT_THUNK ++ /* ++ * If booting virtualised, or hot-onlining a CPU, sibling threads can ++ * attempt Branch Target Injection against this jmp. ++ * ++ * We've got no usable stack so can't use a RETPOLINE thunk, and are ++ * further than disp32 from the high mappings so couldn't use ++ * JUMP_THUNK even if it was a non-RETPOLINE thunk. Furthermore, an ++ * LFENCE isn't necessarily safe to use at this point. ++ * ++ * As this isn't a hotpath, use a fully serialising event to reduce ++ * the speculation window as much as possible. %ebx needs preserving ++ * for __high_start. ++ */ ++ mov %ebx, %esi ++ cpuid ++ mov %esi, %ebx ++#endif ++ ++ jmpq *%rdi + + #include "wakeup.S" + +diff --git a/xen/arch/x86/extable.c b/xen/arch/x86/extable.c +index 6fffe057c6..72f30d9060 100644 +--- a/xen/arch/x86/extable.c ++++ b/xen/arch/x86/extable.c +@@ -158,7 +158,7 @@ static int __init stub_selftest(void) + memcpy(ptr, tests[i].opc, ARRAY_SIZE(tests[i].opc)); + unmap_domain_page(ptr); + +- asm volatile ( "call *%[stb]\n" ++ asm volatile ( "INDIRECT_CALL %[stb]\n" + ".Lret%=:\n\t" + ".pushsection .fixup,\"ax\"\n" + ".Lfix%=:\n\t" +@@ -167,7 +167,7 @@ static int __init stub_selftest(void) + ".popsection\n\t" + _ASM_EXTABLE(.Lret%=, .Lfix%=) + : [exn] "+m" (res) +- : [stb] "rm" (addr), "a" (tests[i].rax)); ++ : [stb] "r" (addr), "a" (tests[i].rax)); + ASSERT(res == tests[i].res.raw); + } + +diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c +index 5f23c2cfbf..b965b3ece7 100644 +--- a/xen/arch/x86/pv/emul-priv-op.c ++++ b/xen/arch/x86/pv/emul-priv-op.c +@@ -73,37 +73,54 @@ void (*pv_post_outb_hook)(unsigned int port, u8 value); + + typedef void io_emul_stub_t(struct cpu_user_regs *); + ++void __x86_indirect_thunk_rcx(void); ++ + static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode, + unsigned int port, unsigned int bytes) + { ++ struct stubs *this_stubs = &this_cpu(stubs); ++ unsigned long stub_va = this_stubs->addr + STUB_BUF_SIZE / 2; ++ + if ( !ctxt->io_emul_stub ) +- ctxt->io_emul_stub = map_domain_page(_mfn(this_cpu(stubs.mfn))) + +- (this_cpu(stubs.addr) & +- ~PAGE_MASK) + +- STUB_BUF_SIZE / 2; ++ ctxt->io_emul_stub = ++ map_domain_page(_mfn(this_stubs->mfn)) + (stub_va & ~PAGE_MASK); + + /* movq $host_to_guest_gpr_switch,%rcx */ + ctxt->io_emul_stub[0] = 0x48; + ctxt->io_emul_stub[1] = 0xb9; + *(void **)&ctxt->io_emul_stub[2] = (void *)host_to_guest_gpr_switch; ++ ++#ifdef CONFIG_INDIRECT_THUNK ++ /* callq __x86_indirect_thunk_rcx */ ++ ctxt->io_emul_stub[10] = 0xe8; ++ *(int32_t *)&ctxt->io_emul_stub[11] = ++ (long)__x86_indirect_thunk_rcx - (stub_va + 11 + 4); ++#else + /* callq *%rcx */ + ctxt->io_emul_stub[10] = 0xff; + ctxt->io_emul_stub[11] = 0xd1; ++ /* TODO: untangle ideal_nops from init/livepatch Kconfig options. */ ++ memcpy(&ctxt->io_emul_stub[12], "\x0f\x1f\x00", 3); /* P6_NOP3 */ ++#endif ++ + /* data16 or nop */ +- ctxt->io_emul_stub[12] = (bytes != 2) ? 0x90 : 0x66; ++ ctxt->io_emul_stub[15] = (bytes != 2) ? 0x90 : 0x66; + /* */ +- ctxt->io_emul_stub[13] = opcode; ++ ctxt->io_emul_stub[16] = opcode; + /* imm8 or nop */ +- ctxt->io_emul_stub[14] = !(opcode & 8) ? port : 0x90; ++ ctxt->io_emul_stub[17] = !(opcode & 8) ? port : 0x90; + /* ret (jumps to guest_to_host_gpr_switch) */ +- ctxt->io_emul_stub[15] = 0xc3; +- BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 16); ++ ctxt->io_emul_stub[18] = 0xc3; ++ BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 19); + + if ( ioemul_handle_quirk ) +- ioemul_handle_quirk(opcode, &ctxt->io_emul_stub[12], ctxt->ctxt.regs); ++ { ++ BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 15 + 10); ++ ioemul_handle_quirk(opcode, &ctxt->io_emul_stub[15], ctxt->ctxt.regs); ++ } + + /* Handy function-typed pointer to the stub. */ +- return (void *)(this_cpu(stubs.addr) + STUB_BUF_SIZE / 2); ++ return (void *)stub_va; + } + + +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index a8825c89df..710c0616ba 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -585,7 +585,8 @@ handle_exception_saved: + movzbl UREGS_entry_vector(%rsp),%eax + leaq exception_table(%rip),%rdx + PERFC_INCR(exceptions, %rax, %rbx) +- callq *(%rdx,%rax,8) ++ mov (%rdx, %rax, 8), %rdx ++ INDIRECT_CALL %rdx + mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + testb $3,UREGS_cs(%rsp) + jz restore_all_xen +@@ -757,7 +758,8 @@ handle_ist_exception: + 1: movq %rsp,%rdi + movzbl UREGS_entry_vector(%rsp),%eax + leaq exception_table(%rip),%rdx +- callq *(%rdx,%rax,8) ++ mov (%rdx, %rax, 8), %rdx ++ INDIRECT_CALL %rdx + mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + cmpb $TRAP_nmi,UREGS_entry_vector(%rsp) + jne ret_from_intr +diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c +index 820495fb9c..ff0a003902 100644 +--- a/xen/arch/x86/x86_emulate/x86_emulate.c ++++ b/xen/arch/x86/x86_emulate/x86_emulate.c +@@ -867,7 +867,7 @@ static inline int mkec(uint8_t e, int32_t ec, ...) + #ifdef __XEN__ + # define invoke_stub(pre, post, constraints...) do { \ + union stub_exception_token res_ = { .raw = ~0 }; \ +- asm volatile ( pre "\n\tcall *%[stub]\n\t" post "\n" \ ++ asm volatile ( pre "\n\tINDIRECT_CALL %[stub]\n\t" post "\n" \ + ".Lret%=:\n\t" \ + ".pushsection .fixup,\"ax\"\n" \ + ".Lfix%=:\n\t" \ +@@ -876,7 +876,7 @@ static inline int mkec(uint8_t e, int32_t ec, ...) + ".popsection\n\t" \ + _ASM_EXTABLE(.Lret%=, .Lfix%=) \ + : [exn] "+g" (res_), constraints, \ +- [stub] "rm" (stub.func), \ ++ [stub] "r" (stub.func), \ + "m" (*(uint8_t(*)[MAX_INST_LEN + 1])stub.ptr) ); \ + if ( unlikely(~res_.raw) ) \ + { \ +diff --git a/xen/common/wait.c b/xen/common/wait.c +index 3d3d9fe7a2..a57bc10d61 100644 +--- a/xen/common/wait.c ++++ b/xen/common/wait.c +@@ -203,12 +203,14 @@ void check_wakeup_from_wait(void) + + /* + * Hand-rolled longjmp(). Returns to the pointer on the top of +- * wqv->stack, and lands on a `rep movs` instruction. ++ * wqv->stack, and lands on a `rep movs` instruction. All other GPRs are ++ * restored from the stack, so are available for use here. + */ + asm volatile ( +- "mov %1,%%"__OP"sp; jmp *(%0)" ++ "mov %1,%%"__OP"sp; INDIRECT_JMP %[ip]" + : : "S" (wqv->stack), "D" (wqv->esp), +- "c" ((char *)get_cpu_info() - (char *)wqv->esp) ++ "c" ((char *)get_cpu_info() - (char *)wqv->esp), ++ [ip] "r" (*(unsigned long *)wqv->stack) + : "memory" ); + unreachable(); + } +diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h +index 73d96227f4..9cfd79f0c6 100644 +--- a/xen/include/asm-x86/asm_defns.h ++++ b/xen/include/asm-x86/asm_defns.h +@@ -13,6 +13,14 @@ + #include + #include + ++#ifdef __ASSEMBLY__ ++# include ++#else ++asm ( "\t.equ CONFIG_INDIRECT_THUNK, " ++ __stringify(IS_ENABLED(CONFIG_INDIRECT_THUNK)) ); ++asm ( "\t.include \"asm/indirect_thunk_asm.h\"" ); ++#endif ++ + #ifndef __ASSEMBLY__ + void ret_from_intr(void); + #endif +diff --git a/xen/include/asm-x86/indirect_thunk_asm.h b/xen/include/asm-x86/indirect_thunk_asm.h +new file mode 100644 +index 0000000000..96bcc25497 +--- /dev/null ++++ b/xen/include/asm-x86/indirect_thunk_asm.h +@@ -0,0 +1,41 @@ ++/* ++ * Warning! This file is included at an assembler level for .c files, causing ++ * usual #ifdef'ary to turn into comments. ++ */ ++ ++.macro INDIRECT_BRANCH insn:req arg:req ++/* ++ * Create an indirect branch. insn is one of call/jmp, arg is a single ++ * register. ++ * ++ * With no compiler support, this degrades into a plain indirect call/jmp. ++ * With compiler support, dispatch to the correct __x86_indirect_thunk_* ++ */ ++ .if CONFIG_INDIRECT_THUNK == 1 ++ ++ $done = 0 ++ .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15 ++ .ifeqs "\arg", "%r\reg" ++ \insn __x86_indirect_thunk_r\reg ++ $done = 1 ++ .exitm ++ .endif ++ .endr ++ ++ .if $done != 1 ++ .error "Bad register arg \arg" ++ .endif ++ ++ .else ++ \insn *\arg ++ .endif ++.endm ++ ++/* Convenience wrappers. */ ++.macro INDIRECT_CALL arg:req ++ INDIRECT_BRANCH call \arg ++.endm ++ ++.macro INDIRECT_JMP arg:req ++ INDIRECT_BRANCH jmp \arg ++.endm +-- +2.14.3 + + +From 6aaf353f2ecbe8ae57e16812a6d74a4f089def3a Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:06:07 +0100 +Subject: [PATCH 30/49] x86/boot: Report details of speculative mitigations + +Nothing very interesting at the moment, but the logic will grow as new +mitigations are added. + +This is part of XSA-254. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +master commit: 31d6c53adf6417bf449ca50e8416e41b64d46803 +master date: 2018-01-16 17:45:50 +0000 +--- + xen/arch/x86/Makefile | 1 + + xen/arch/x86/setup.c | 3 ++ + xen/arch/x86/spec_ctrl.c | 75 +++++++++++++++++++++++++++++++++++++++++ + xen/include/asm-x86/spec_ctrl.h | 35 +++++++++++++++++++ + 4 files changed, 114 insertions(+) + create mode 100644 xen/arch/x86/spec_ctrl.c + create mode 100644 xen/include/asm-x86/spec_ctrl.h + +diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile +index b334366db8..e8c49639d8 100644 +--- a/xen/arch/x86/Makefile ++++ b/xen/arch/x86/Makefile +@@ -57,6 +57,7 @@ obj-y += setup.o + obj-y += shutdown.o + obj-y += smp.o + obj-y += smpboot.o ++obj-y += spec_ctrl.o + obj-y += srat.o + obj-y += string.o + obj-y += sysctl.o +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 2e10c6bdf4..470427bc64 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -51,6 +51,7 @@ + #include + #include + #include ++#include + + /* opt_nosmp: If true, secondary processors are ignored. */ + static bool __initdata opt_nosmp; +@@ -1502,6 +1503,8 @@ void __init noreturn __start_xen(unsigned long mbi_p) + if ( cpu_has_fsgsbase ) + set_in_cr4(X86_CR4_FSGSBASE); + ++ init_speculation_mitigations(); ++ + init_idle_domain(); + + this_cpu(stubs.addr) = alloc_stub_page(smp_processor_id(), +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +new file mode 100644 +index 0000000000..256701a43c +--- /dev/null ++++ b/xen/arch/x86/spec_ctrl.c +@@ -0,0 +1,75 @@ ++/****************************************************************************** ++ * arch/x86/spec_ctrl.c ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; If not, see . ++ * ++ * Copyright (c) 2017-2018 Citrix Systems Ltd. ++ */ ++#include ++#include ++ ++#include ++#include ++ ++enum ind_thunk { ++ THUNK_DEFAULT, /* Decide which thunk to use at boot time. */ ++ THUNK_NONE, /* Missing compiler support for thunks. */ ++ ++ THUNK_RETPOLINE, ++}; ++ ++static void __init print_details(enum ind_thunk thunk) ++{ ++ printk(XENLOG_DEBUG "Speculative mitigation facilities:\n"); ++ ++ /* Compiled-in support which pertains to BTI mitigations. */ ++ if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) ) ++ printk(XENLOG_DEBUG " Compiled-in support: INDIRECT_THUNK\n"); ++ ++ printk(XENLOG_INFO ++ "BTI mitigations: Thunk %s\n", ++ thunk == THUNK_NONE ? "N/A" : ++ thunk == THUNK_RETPOLINE ? "RETPOLINE" : "?"); ++} ++ ++void __init init_speculation_mitigations(void) ++{ ++ enum ind_thunk thunk = THUNK_DEFAULT; ++ ++ /* ++ * Supplimentary minor adjustments. Without compiler support, there are ++ * no thunks. ++ */ ++ if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) ) ++ thunk = THUNK_NONE; ++ ++ /* ++ * If there are still no thunk preferences, the compiled default is ++ * actually retpoline, and it is better than nothing. ++ */ ++ if ( thunk == THUNK_DEFAULT ) ++ thunk = THUNK_RETPOLINE; ++ ++ print_details(thunk); ++} ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-file-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +new file mode 100644 +index 0000000000..e088a551da +--- /dev/null ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -0,0 +1,35 @@ ++/****************************************************************************** ++ * include/asm-x86/spec_ctrl.h ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; If not, see . ++ * ++ * Copyright (c) 2017-2018 Citrix Systems Ltd. ++ */ ++ ++#ifndef __X86_SPEC_CTRL_H__ ++#define __X86_SPEC_CTRL_H__ ++ ++void init_speculation_mitigations(void); ++ ++#endif /* !__X86_SPEC_CTRL_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-file-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +-- +2.14.3 + + +From 0e12c2c881aa12016bb659ab1eb4c7289244b3e7 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:07:02 +0100 +Subject: [PATCH 31/49] x86/amd: Try to set lfence as being Dispatch + Serialising + +This property is required for the AMD's recommended mitigation for Branch +Target Injection, but Xen needs to cope with being unable to detect or modify +the MSR. + +This is part of XSA-254. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: fe3ee5530a8d0d0b6a478167125d00c40f294a86 +master date: 2018-01-16 17:45:50 +0000 +--- + xen/arch/x86/cpu/amd.c | 35 ++++++++++++++++++++++++++++++++++- + xen/include/asm-x86/cpufeature.h | 1 + + xen/include/asm-x86/cpufeatures.h | 1 + + xen/include/asm-x86/msr-index.h | 1 + + 4 files changed, 37 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index 5f36ac75a7..40c0bac80b 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -558,8 +558,41 @@ static void init_amd(struct cpuinfo_x86 *c) + wrmsr_amd_safe(0xc001100d, l, h & ~1); + } + ++ /* ++ * Attempt to set lfence to be Dispatch Serialising. This MSR almost ++ * certainly isn't virtualised (and Xen at least will leak the real ++ * value in but silently discard writes), as well as being per-core ++ * rather than per-thread, so do a full safe read/write/readback cycle ++ * in the worst case. ++ */ ++ if (c->x86 == 0x0f || c->x86 == 0x11) ++ /* Always dispatch serialising on this hardare. */ ++ __set_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability); ++ else /* Implicily "== 0x10 || >= 0x12" by being 64bit. */ { ++ if (rdmsr_safe(MSR_AMD64_DE_CFG, value)) ++ /* Unable to read. Assume the safer default. */ ++ __clear_bit(X86_FEATURE_LFENCE_DISPATCH, ++ c->x86_capability); ++ else if (value & AMD64_DE_CFG_LFENCE_SERIALISE) ++ /* Already dispatch serialising. */ ++ __set_bit(X86_FEATURE_LFENCE_DISPATCH, ++ c->x86_capability); ++ else if (wrmsr_safe(MSR_AMD64_DE_CFG, ++ value | AMD64_DE_CFG_LFENCE_SERIALISE) || ++ rdmsr_safe(MSR_AMD64_DE_CFG, value) || ++ !(value & AMD64_DE_CFG_LFENCE_SERIALISE)) ++ /* Attempt to set failed. Assume the safer default. */ ++ __clear_bit(X86_FEATURE_LFENCE_DISPATCH, ++ c->x86_capability); ++ else ++ /* Successfully enabled! */ ++ __set_bit(X86_FEATURE_LFENCE_DISPATCH, ++ c->x86_capability); ++ } ++ + /* MFENCE stops RDTSC speculation */ +- __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); ++ if (!cpu_has_lfence_dispatch) ++ __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); + + switch(c->x86) + { +diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h +index 84cc51d2bd..adc333f20e 100644 +--- a/xen/include/asm-x86/cpufeature.h ++++ b/xen/include/asm-x86/cpufeature.h +@@ -104,6 +104,7 @@ + #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) + #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) + #define cpu_has_aperfmperf boot_cpu_has(X86_FEATURE_APERFMPERF) ++#define cpu_has_lfence_dispatch boot_cpu_has(X86_FEATURE_LFENCE_DISPATCH) + + enum _cache_type { + CACHE_TYPE_NULL = 0, +diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h +index bc98227763..58b37d6a6d 100644 +--- a/xen/include/asm-x86/cpufeatures.h ++++ b/xen/include/asm-x86/cpufeatures.h +@@ -22,3 +22,4 @@ XEN_CPUFEATURE(APERFMPERF, (FSCAPINTS+0)*32+ 8) /* APERFMPERF */ + XEN_CPUFEATURE(MFENCE_RDTSC, (FSCAPINTS+0)*32+ 9) /* MFENCE synchronizes RDTSC */ + XEN_CPUFEATURE(XEN_SMEP, (FSCAPINTS+0)*32+10) /* SMEP gets used by Xen itself */ + XEN_CPUFEATURE(XEN_SMAP, (FSCAPINTS+0)*32+11) /* SMAP gets used by Xen itself */ ++XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+12) /* lfence set as Dispatch Serialising */ +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index b99c623367..9c8bae6c35 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -207,6 +207,7 @@ + #define MSR_AMD64_IC_CFG 0xc0011021 + #define MSR_AMD64_DC_CFG 0xc0011022 + #define MSR_AMD64_DE_CFG 0xc0011029 ++#define AMD64_DE_CFG_LFENCE_SERIALISE (_AC(1, ULL) << 1) + + #define MSR_AMD64_DR0_ADDRESS_MASK 0xc0011027 + #define MSR_AMD64_DR1_ADDRESS_MASK 0xc0011019 +-- +2.14.3 + + +From c513244d8e5b8aa0326c6f2d5fb2382811c97d6d Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:07:50 +0100 +Subject: [PATCH 32/49] x86: Introduce alternative indirect thunks + +Depending on hardware and microcode availability, we will want to replace +IND_THUNK_REPOLINE with other implementations. + +For AMD hardware, choose IND_THUNK_LFENCE in preference to retpoline if lfence +is known to be (or was successfully made) dispatch serialising. + +This is part of XSA-254. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: 858cba0d4c6b6b45180afcb41561fd6585ad51a3 +master date: 2018-01-16 17:45:50 +0000 +--- + docs/misc/xen-command-line.markdown | 16 ++++++++ + xen/arch/x86/indirect-thunk.S | 17 +++++++-- + xen/arch/x86/spec_ctrl.c | 75 +++++++++++++++++++++++++++++++++++-- + xen/include/asm-x86/cpufeatures.h | 2 + + 4 files changed, 104 insertions(+), 6 deletions(-) + +diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown +index 49539b4d1c..214012bd9e 100644 +--- a/docs/misc/xen-command-line.markdown ++++ b/docs/misc/xen-command-line.markdown +@@ -245,6 +245,22 @@ and not running softirqs. Reduce this if softirqs are not being run frequently + enough. Setting this to a high value may cause boot failure, particularly if + the NMI watchdog is also enabled. + ++### bti (x86) ++> `= List of [ thunk=retpoline|lfence|jmp ]` ++ ++Branch Target Injection controls. By default, Xen will pick the most ++appropriate BTI mitigations based on compiled in support, loaded microcode, ++and hardware details. ++ ++**WARNING: Any use of this option may interfere with heuristics. Use with ++extreme care.** ++ ++If Xen was compiled with INDIRECT_THUNK support, `thunk=` can be used to ++select which of the thunks gets patched into the `__x86_indirect_thunk_%reg` ++locations. The default thunk is `retpoline` (generally preferred for Intel ++hardware), with the alternatives being `jmp` (a `jmp *%reg` gadget, minimal ++overhead), and `lfence` (an `lfence; jmp *%reg` gadget, preferred for AMD). ++ + ### xenheap\_megabytes (arm32) + > `= ` + +diff --git a/xen/arch/x86/indirect-thunk.S b/xen/arch/x86/indirect-thunk.S +index 3eaf505d0e..7d34707218 100644 +--- a/xen/arch/x86/indirect-thunk.S ++++ b/xen/arch/x86/indirect-thunk.S +@@ -21,15 +21,26 @@ + ret + .endm + ++.macro IND_THUNK_LFENCE reg:req ++ lfence ++ jmp *%\reg ++.endm ++ ++.macro IND_THUNK_JMP reg:req ++ jmp *%\reg ++.endm ++ + /* +- * Build the __x86_indirect_thunk_* symbols. Currently implement the +- * retpoline thunk only. ++ * Build the __x86.indirect_thunk.* symbols. Execution lands on an ++ * alternative patch point which implements one of the above THUNK_*'s + */ + .macro GEN_INDIRECT_THUNK reg:req + .section .text.__x86_indirect_thunk_\reg, "ax", @progbits + + ENTRY(__x86_indirect_thunk_\reg) +- IND_THUNK_RETPOLINE \reg ++ ALTERNATIVE_2 __stringify(IND_THUNK_RETPOLINE \reg), \ ++ __stringify(IND_THUNK_LFENCE \reg), X86_FEATURE_IND_THUNK_LFENCE, \ ++ __stringify(IND_THUNK_JMP \reg), X86_FEATURE_IND_THUNK_JMP + .endm + + /* Instantiate GEN_INDIRECT_THUNK for each register except %rsp. */ +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 256701a43c..d601c028d8 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -16,18 +16,54 @@ + * + * Copyright (c) 2017-2018 Citrix Systems Ltd. + */ ++#include + #include + #include + + #include + #include + +-enum ind_thunk { ++static enum ind_thunk { + THUNK_DEFAULT, /* Decide which thunk to use at boot time. */ + THUNK_NONE, /* Missing compiler support for thunks. */ + + THUNK_RETPOLINE, +-}; ++ THUNK_LFENCE, ++ THUNK_JMP, ++} opt_thunk __initdata = THUNK_DEFAULT; ++ ++static int __init parse_bti(const char *s) ++{ ++ const char *ss; ++ int rc = 0; ++ ++ do { ++ ss = strchr(s, ','); ++ if ( !ss ) ++ ss = strchr(s, '\0'); ++ ++ if ( !strncmp(s, "thunk=", 6) ) ++ { ++ s += 6; ++ ++ if ( !strncmp(s, "retpoline", ss - s) ) ++ opt_thunk = THUNK_RETPOLINE; ++ else if ( !strncmp(s, "lfence", ss - s) ) ++ opt_thunk = THUNK_LFENCE; ++ else if ( !strncmp(s, "jmp", ss - s) ) ++ opt_thunk = THUNK_JMP; ++ else ++ rc = -EINVAL; ++ } ++ else ++ rc = -EINVAL; ++ ++ s = ss + 1; ++ } while ( *ss ); ++ ++ return rc; ++} ++custom_param("bti", parse_bti); + + static void __init print_details(enum ind_thunk thunk) + { +@@ -40,13 +76,40 @@ static void __init print_details(enum ind_thunk thunk) + printk(XENLOG_INFO + "BTI mitigations: Thunk %s\n", + thunk == THUNK_NONE ? "N/A" : +- thunk == THUNK_RETPOLINE ? "RETPOLINE" : "?"); ++ thunk == THUNK_RETPOLINE ? "RETPOLINE" : ++ thunk == THUNK_LFENCE ? "LFENCE" : ++ thunk == THUNK_JMP ? "JMP" : "?"); + } + + void __init init_speculation_mitigations(void) + { + enum ind_thunk thunk = THUNK_DEFAULT; + ++ /* ++ * Has the user specified any custom BTI mitigations? If so, follow their ++ * instructions exactly and disable all heuristics. ++ */ ++ if ( opt_thunk != THUNK_DEFAULT ) ++ { ++ thunk = opt_thunk; ++ } ++ else ++ { ++ /* ++ * Evaluate the safest Branch Target Injection mitigations to use. ++ * First, begin with compiler-aided mitigations. ++ */ ++ if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) ) ++ { ++ /* ++ * AMD's recommended mitigation is to set lfence as being dispatch ++ * serialising, and to use IND_THUNK_LFENCE. ++ */ ++ if ( cpu_has_lfence_dispatch ) ++ thunk = THUNK_LFENCE; ++ } ++ } ++ + /* + * Supplimentary minor adjustments. Without compiler support, there are + * no thunks. +@@ -61,6 +124,12 @@ void __init init_speculation_mitigations(void) + if ( thunk == THUNK_DEFAULT ) + thunk = THUNK_RETPOLINE; + ++ /* Apply the chosen settings. */ ++ if ( thunk == THUNK_LFENCE ) ++ setup_force_cpu_cap(X86_FEATURE_IND_THUNK_LFENCE); ++ else if ( thunk == THUNK_JMP ) ++ setup_force_cpu_cap(X86_FEATURE_IND_THUNK_JMP); ++ + print_details(thunk); + } + +diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h +index 58b37d6a6d..ba1771b3d3 100644 +--- a/xen/include/asm-x86/cpufeatures.h ++++ b/xen/include/asm-x86/cpufeatures.h +@@ -23,3 +23,5 @@ XEN_CPUFEATURE(MFENCE_RDTSC, (FSCAPINTS+0)*32+ 9) /* MFENCE synchronizes RDTS + XEN_CPUFEATURE(XEN_SMEP, (FSCAPINTS+0)*32+10) /* SMEP gets used by Xen itself */ + XEN_CPUFEATURE(XEN_SMAP, (FSCAPINTS+0)*32+11) /* SMAP gets used by Xen itself */ + XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+12) /* lfence set as Dispatch Serialising */ ++XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+13) /* Use IND_THUNK_LFENCE */ ++XEN_CPUFEATURE(IND_THUNK_JMP, (FSCAPINTS+0)*32+14) /* Use IND_THUNK_JMP */ +-- +2.14.3 + + +From 129880dd8f28bc728f93e3aad4675622c1ee2aad Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:41:14 +0100 +Subject: [PATCH 33/49] x86/feature: Definitions for Indirect Branch Controls + +Contemporary processors are gaining Indirect Branch Controls via microcode +updates. Intel are introducing one bit to indicate IBRS and IBPB support, and +a second bit for STIBP. AMD are introducing IBPB only, so enumerate it with a +separate bit. + +Furthermore, depending on compiler and microcode availability, we may want to +run Xen with IBRS set, or clear. + +To use these facilities, we synthesise separate IBRS and IBPB bits for +internal use. A lot of infrastructure is required before these features are +safe to offer to guests. + +This is part of XSA-254. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +Acked-by: Wei Liu +master commit: 0d703a701cc4bc47773986b2796eebd28b1439b5 +master date: 2018-01-16 17:45:50 +0000 +--- + tools/libxl/libxl_cpuid.c | 3 +++ + tools/misc/xen-cpuid.c | 12 ++++++++++-- + xen/arch/x86/spec_ctrl.c | 17 +++++++++++++++++ + xen/include/asm-x86/cpufeatures.h | 3 +++ + xen/include/asm-x86/msr-index.h | 8 ++++++++ + xen/include/public/arch-x86/cpufeatureset.h | 3 +++ + xen/tools/gen-cpuid.py | 5 +++++ + 7 files changed, 49 insertions(+), 2 deletions(-) + +diff --git a/tools/libxl/libxl_cpuid.c b/tools/libxl/libxl_cpuid.c +index e692b61569..81ba9616bc 100644 +--- a/tools/libxl/libxl_cpuid.c ++++ b/tools/libxl/libxl_cpuid.c +@@ -202,6 +202,8 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str) + + {"avx512-4vnniw",0x00000007, 0, CPUID_REG_EDX, 2, 1}, + {"avx512-4fmaps",0x00000007, 0, CPUID_REG_EDX, 3, 1}, ++ {"ibrsb", 0x00000007, 0, CPUID_REG_EDX, 26, 1}, ++ {"stibp", 0x00000007, 0, CPUID_REG_EDX, 27, 1}, + + {"lahfsahf", 0x80000001, NA, CPUID_REG_ECX, 0, 1}, + {"cmplegacy", 0x80000001, NA, CPUID_REG_ECX, 1, 1}, +@@ -239,6 +241,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str) + + {"invtsc", 0x80000007, NA, CPUID_REG_EDX, 8, 1}, + ++ {"ibpb", 0x80000008, NA, CPUID_REG_EBX, 12, 1}, + {"nc", 0x80000008, NA, CPUID_REG_ECX, 0, 8}, + {"apicidsize", 0x80000008, NA, CPUID_REG_ECX, 12, 4}, + +diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c +index 0831f7551d..8c3dac0d50 100644 +--- a/tools/misc/xen-cpuid.c ++++ b/tools/misc/xen-cpuid.c +@@ -149,7 +149,11 @@ static const char *str_e8b[32] = + { + [ 0] = "clzero", + +- [1 ... 31] = "REZ", ++ [1 ... 11] = "REZ", ++ ++ [12] = "ibpb", ++ ++ [13 ... 31] = "REZ", + }; + + static const char *str_7d0[32] = +@@ -158,7 +162,11 @@ static const char *str_7d0[32] = + + [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps", + +- [4 ... 31] = "REZ", ++ [4 ... 25] = "REZ", ++ ++ [26] = "ibrsb", [27] = "stibp", ++ ++ [28 ... 31] = "REZ", + }; + + static struct { +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index d601c028d8..89e7287e43 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -67,8 +67,25 @@ custom_param("bti", parse_bti); + + static void __init print_details(enum ind_thunk thunk) + { ++ unsigned int _7d0 = 0, e8b = 0, tmp; ++ ++ /* Collect diagnostics about available mitigations. */ ++ if ( boot_cpu_data.cpuid_level >= 7 ) ++ cpuid_count(7, 0, &tmp, &tmp, &tmp, &_7d0); ++ if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 ) ++ cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp); ++ + printk(XENLOG_DEBUG "Speculative mitigation facilities:\n"); + ++ /* Hardware features which pertain to speculative mitigations. */ ++ if ( (_7d0 & (cpufeat_mask(X86_FEATURE_IBRSB) | ++ cpufeat_mask(X86_FEATURE_STIBP))) || ++ (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ) ++ printk(XENLOG_DEBUG " Hardware features:%s%s%s\n", ++ (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "", ++ (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP" : "", ++ (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : ""); ++ + /* Compiled-in support which pertains to BTI mitigations. */ + if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) ) + printk(XENLOG_DEBUG " Compiled-in support: INDIRECT_THUNK\n"); +diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h +index ba1771b3d3..dd2388f393 100644 +--- a/xen/include/asm-x86/cpufeatures.h ++++ b/xen/include/asm-x86/cpufeatures.h +@@ -25,3 +25,6 @@ XEN_CPUFEATURE(XEN_SMAP, (FSCAPINTS+0)*32+11) /* SMAP gets used by Xen it + XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+12) /* lfence set as Dispatch Serialising */ + XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+13) /* Use IND_THUNK_LFENCE */ + XEN_CPUFEATURE(IND_THUNK_JMP, (FSCAPINTS+0)*32+14) /* Use IND_THUNK_JMP */ ++XEN_CPUFEATURE(XEN_IBPB, (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */ ++XEN_CPUFEATURE(XEN_IBRS_SET, (FSCAPINTS+0)*32+16) /* IBRSB && IRBS set in Xen */ ++XEN_CPUFEATURE(XEN_IBRS_CLEAR, (FSCAPINTS+0)*32+17) /* IBRSB && IBRS clear in Xen */ +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index 9c8bae6c35..11c43fa83e 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -31,6 +31,14 @@ + #define EFER_LMSLE (1<<_EFER_LMSLE) + #define EFER_FFXSE (1<<_EFER_FFXSE) + ++/* Speculation Controls. */ ++#define MSR_SPEC_CTRL 0x00000048 ++#define SPEC_CTRL_IBRS (_AC(1, ULL) << 0) ++#define SPEC_CTRL_STIBP (_AC(1, ULL) << 1) ++ ++#define MSR_PRED_CMD 0x00000049 ++#define PRED_CMD_IBPB (_AC(1, ULL) << 0) ++ + /* Intel MSRs. Some also available on other CPUs */ + #define MSR_IA32_PERFCTR0 0x000000c1 + #define MSR_IA32_A_PERFCTR0 0x000004c1 +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index be6da8eaf1..e148755a66 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -237,10 +237,13 @@ XEN_CPUFEATURE(EFRO, 7*32+10) /* APERF/MPERF Read Only interface */ + + /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */ + XEN_CPUFEATURE(CLZERO, 8*32+ 0) /*A CLZERO instruction */ ++XEN_CPUFEATURE(IBPB, 8*32+12) /* IBPB support only (no IBRS, used by AMD) */ + + /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */ + XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ + XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ ++XEN_CPUFEATURE(IBRSB, 9*32+26) /* IBRS and IBPB support (used by Intel) */ ++XEN_CPUFEATURE(STIBP, 9*32+27) /* STIBP */ + + #endif /* XEN_CPUFEATURE */ + +diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py +index 9ec4486f2b..613b909c3d 100755 +--- a/xen/tools/gen-cpuid.py ++++ b/xen/tools/gen-cpuid.py +@@ -256,6 +256,11 @@ def crunch_numbers(state): + AVX512F: [AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD, + AVX512BW, AVX512VL, AVX512VBMI, AVX512_4VNNIW, + AVX512_4FMAPS, AVX512_VPOPCNTDQ], ++ ++ # Single Thread Indirect Branch Predictors enumerates a new bit in the ++ # MSR enumerated by Indirect Branch Restricted Speculation/Indirect ++ # Branch Prediction Barrier enumeration. ++ IBRSB: [STIBP], + } + + deep_features = tuple(sorted(deps.keys())) +-- +2.14.3 + + +From 65ee6e043a6dc61bece75a9dfe24c7ee70c6597c Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:42:36 +0100 +Subject: [PATCH 34/49] x86/cmdline: Introduce a command line option to disable + IBRS/IBPB, STIBP and IBPB + +Instead of gaining yet another top level boolean, introduce a more generic +cpuid= option. Also introduce a helper function to parse a generic boolean +value. + +This is part of XSA-254. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +xen/cmdline: Fix parse_boolean() for unadorned values + +A command line such as "cpuid=no-ibrsb,no-stibp" tickles a bug in +parse_boolean() because the separating comma fails the NUL case. + +Instead, check for slen == nlen which accounts for the boundary (if any) +passed via the 'e' parameter. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: 7850b1c00749df834ea2ad0c1f5d9364c4838795 +master date: 2018-01-16 17:45:50 +0000 +master commit: ac37ec1ddef234eeba6f438c29ff687c64962ebd +master date: 2018-01-31 10:47:12 +0000 +--- + docs/misc/xen-command-line.markdown | 12 ++++++++++++ + xen/arch/x86/cpuid.c | 35 +++++++++++++++++++++++++++++++++++ + xen/common/kernel.c | 27 +++++++++++++++++++++++++++ + xen/include/xen/lib.h | 7 +++++++ + 4 files changed, 81 insertions(+) + +diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown +index 214012bd9e..2d95759568 100644 +--- a/docs/misc/xen-command-line.markdown ++++ b/docs/misc/xen-command-line.markdown +@@ -471,6 +471,18 @@ choice of `dom0-kernel` is deprecated and not supported by all Dom0 kernels. + respectively. + * `verbose` option can be included as a string or also as `verbose=` + ++### cpuid (x86) ++> `= List of comma separated booleans` ++ ++This option allows for fine tuning of the facilities Xen will use, after ++accounting for hardware capabilities as enumerated via CPUID. ++ ++Currently accepted: ++ ++The Speculation Control hardware features `ibrsb`, `stibp`, `ibpb` are used by ++default if avaiable. They can be ignored, e.g. `no-ibrsb`, at which point Xen ++won't use them itself, and won't offer them to guests. ++ + ### cpuid\_mask\_cpu (AMD only) + > `= fam_0f_rev_c | fam_0f_rev_d | fam_0f_rev_e | fam_0f_rev_f | fam_0f_rev_g | fam_10_rev_b | fam_10_rev_c | fam_11_rev_b` + +diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c +index 5ee82d39d7..2ef71d218e 100644 +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -18,6 +18,41 @@ static const uint32_t hvm_shadow_featuremask[] = INIT_HVM_SHADOW_FEATURES; + static const uint32_t hvm_hap_featuremask[] = INIT_HVM_HAP_FEATURES; + static const uint32_t deep_features[] = INIT_DEEP_FEATURES; + ++static int __init parse_xen_cpuid(const char *s) ++{ ++ const char *ss; ++ int val, rc = 0; ++ ++ do { ++ ss = strchr(s, ','); ++ if ( !ss ) ++ ss = strchr(s, '\0'); ++ ++ if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) ++ { ++ if ( !val ) ++ setup_clear_cpu_cap(X86_FEATURE_IBPB); ++ } ++ else if ( (val = parse_boolean("ibrsb", s, ss)) >= 0 ) ++ { ++ if ( !val ) ++ setup_clear_cpu_cap(X86_FEATURE_IBRSB); ++ } ++ else if ( (val = parse_boolean("stibp", s, ss)) >= 0 ) ++ { ++ if ( !val ) ++ setup_clear_cpu_cap(X86_FEATURE_STIBP); ++ } ++ else ++ rc = -EINVAL; ++ ++ s = ss + 1; ++ } while ( *ss ); ++ ++ return rc; ++} ++custom_param("cpuid", parse_xen_cpuid); ++ + #define EMPTY_LEAF ((struct cpuid_leaf){}) + static void zero_leaves(struct cpuid_leaf *l, + unsigned int first, unsigned int last) +diff --git a/xen/common/kernel.c b/xen/common/kernel.c +index 8d137c58fb..5766a0f784 100644 +--- a/xen/common/kernel.c ++++ b/xen/common/kernel.c +@@ -244,6 +244,33 @@ int parse_bool(const char *s, const char *e) + return -1; + } + ++int parse_boolean(const char *name, const char *s, const char *e) ++{ ++ size_t slen, nlen; ++ int val = !!strncmp(s, "no-", 3); ++ ++ if ( !val ) ++ s += 3; ++ ++ slen = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s); ++ nlen = strlen(name); ++ ++ /* Does s now start with name? */ ++ if ( slen < nlen || strncmp(s, name, nlen) ) ++ return -1; ++ ++ /* Exact, unadorned name? Result depends on the 'no-' prefix. */ ++ if ( slen == nlen ) ++ return val; ++ ++ /* =$SOMETHING? Defer to the regular boolean parsing. */ ++ if ( s[nlen] == '=' ) ++ return parse_bool(&s[nlen + 1], e); ++ ++ /* Unrecognised. Give up. */ ++ return -1; ++} ++ + unsigned int tainted; + + /** +diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h +index ed00ae1379..1d9771340c 100644 +--- a/xen/include/xen/lib.h ++++ b/xen/include/xen/lib.h +@@ -74,6 +74,13 @@ void cmdline_parse(const char *cmdline); + int runtime_parse(const char *line); + int parse_bool(const char *s, const char *e); + ++/** ++ * Given a specific name, parses a string of the form: ++ * [no-]$NAME[=...] ++ * returning 0 or 1 for a recognised boolean, or -1 for an error. ++ */ ++int parse_boolean(const char *name, const char *s, const char *e); ++ + /*#define DEBUG_TRACE_DUMP*/ + #ifdef DEBUG_TRACE_DUMP + extern void debugtrace_dump(void); +-- +2.14.3 + + +From 641c11ef293c7f3a58c1856138835c06e09d6b07 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:47:41 +0100 +Subject: [PATCH 39/49] x86/cpuid: Handling of IBRS/IBPB, STIBP and IBRS for + guests + +Intel specifies IBRS/IBPB (combined, in a single bit) and STIBP as a separate +bit. AMD specifies IBPB alone in a 3rd bit. + +AMD's IBPB is a subset of Intel's combined IBRS/IBPB. For performance +reasons, administrators might wish to express "IBPB only" even on Intel +hardware, so we allow the AMD bit to be used for this purpose. + +The behaviour of STIBP is more complicated. + +It is our current understanding that STIBP will be advertised on HT-capable +hardware irrespective of whether HT is enabled, but not advertised on +HT-incapable hardware. However, for ease of virtualisation, STIBP's +functionality is ignored rather than reserved by microcode/hardware on +HT-incapable hardware. + +For guest safety, we treat STIBP as special, always override the toolstack +choice, and always advertise STIBP if IBRS is available. This removes the +corner case where STIBP is not advertised, but the guest is running on +HT-capable hardware where it does matter. + +Finally as a bugfix, update the libxc CPUID logic to understand the e8b +feature leaf, which has the side effect of also offering CLZERO to guests on +applicable hardware. + +Signed-off-by: Andrew Cooper +Acked-by: Wei Liu +Reviewed-by: Jan Beulich +master commit: d297b56682e730d598e2529cc6998151d3b6f6f8 +master date: 2018-01-26 14:10:21 +0000 +--- + tools/libxc/xc_cpuid_x86.c | 4 +++- + xen/arch/x86/cpuid.c | 28 ++++++++++++++++++++++++++++ + xen/include/public/arch-x86/cpufeatureset.h | 2 +- + 3 files changed, 32 insertions(+), 2 deletions(-) + +diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c +index 25b922ea21..9fa2f7c360 100644 +--- a/tools/libxc/xc_cpuid_x86.c ++++ b/tools/libxc/xc_cpuid_x86.c +@@ -465,7 +465,9 @@ static void xc_cpuid_hvm_policy(xc_interface *xch, + + case 0x80000008: + regs[0] &= 0x0000ffffu; +- regs[1] = regs[3] = 0; ++ regs[1] = info->featureset[featureword_of(X86_FEATURE_CLZERO)]; ++ /* regs[2] handled in the per-vendor logic. */ ++ regs[3] = 0; + break; + + case 0x00000002: /* Intel cache info (dumped by AMD policy) */ +diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c +index 2ef71d218e..b3c9ac6c48 100644 +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -383,6 +383,16 @@ static void __init calculate_pv_max_policy(void) + /* Unconditionally claim to be able to set the hypervisor bit. */ + __set_bit(X86_FEATURE_HYPERVISOR, pv_featureset); + ++ /* On hardware with IBRS/IBPB support, there are further adjustments. */ ++ if ( test_bit(X86_FEATURE_IBRSB, pv_featureset) ) ++ { ++ /* Offer STIBP unconditionally. It is a nop on non-HT hardware. */ ++ __set_bit(X86_FEATURE_STIBP, pv_featureset); ++ ++ /* AMD's IBPB is a subset of IBRS/IBPB. */ ++ __set_bit(X86_FEATURE_IBPB, pv_featureset); ++ } ++ + sanitise_featureset(pv_featureset); + cpuid_featureset_to_policy(pv_featureset, p); + recalculate_xstate(p); +@@ -440,6 +450,16 @@ static void __init calculate_hvm_max_policy(void) + __clear_bit(X86_FEATURE_XSAVES, hvm_featureset); + } + ++ /* On hardware with IBRS/IBPB support, there are further adjustments. */ ++ if ( test_bit(X86_FEATURE_IBRSB, hvm_featureset) ) ++ { ++ /* Offer STIBP unconditionally. It is a nop on non-HT hardware. */ ++ __set_bit(X86_FEATURE_STIBP, hvm_featureset); ++ ++ /* AMD's IBPB is a subset of IBRS/IBPB. */ ++ __set_bit(X86_FEATURE_IBPB, hvm_featureset); ++ } ++ + sanitise_featureset(hvm_featureset); + cpuid_featureset_to_policy(hvm_featureset, p); + recalculate_xstate(p); +@@ -581,6 +601,14 @@ void recalculate_cpuid_policy(struct domain *d) + recalculate_xstate(p); + recalculate_misc(p); + ++ /* ++ * Override STIBP to match IBRS. Guests can safely use STIBP ++ * functionality on non-HT hardware, but can't necesserily protect ++ * themselves from SP2/Spectre/Branch Target Injection if STIBP is hidden ++ * on HT-capable hardware. ++ */ ++ p->feat.stibp = p->feat.ibrsb; ++ + for ( i = 0; i < ARRAY_SIZE(p->cache.raw); ++i ) + { + if ( p->cache.subleaf[i].type >= 1 && +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index e148755a66..0f21fed161 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -243,7 +243,7 @@ XEN_CPUFEATURE(IBPB, 8*32+12) /* IBPB support only (no IBRS, used by + XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ + XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ + XEN_CPUFEATURE(IBRSB, 9*32+26) /* IBRS and IBPB support (used by Intel) */ +-XEN_CPUFEATURE(STIBP, 9*32+27) /* STIBP */ ++XEN_CPUFEATURE(STIBP, 9*32+27) /*! STIBP */ + + #endif /* XEN_CPUFEATURE */ + +-- +2.14.3 + + +From 79891ef9442acb998f354b969e7302d81245ab0b Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:48:22 +0100 +Subject: [PATCH 40/49] x86/msr: Emulation of MSR_{SPEC_CTRL,PRED_CMD} for + guests + +As per the spec currently available here: + +https://software.intel.com/sites/default/files/managed/c5/63/336996-Speculative-Execution-Side-Channel-Mitigations.pdf + +MSR_ARCH_CAPABILITIES will only come into existence on new hardware, but is +implemented as a straight #GP for now to avoid being leaky when new hardware +arrives. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: ea58a679a6190e714a592f1369b660769a48a80c +master date: 2018-01-26 14:10:21 +0000 +--- + xen/arch/x86/msr.c | 45 +++++++++++++++++++++++++++++++++++++++++ + xen/include/asm-x86/msr-index.h | 2 ++ + xen/include/asm-x86/msr.h | 10 +++++++++ + 3 files changed, 57 insertions(+) + +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index 187f8623a5..7875d9c1e0 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -120,11 +120,22 @@ int init_vcpu_msr_policy(struct vcpu *v) + + int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) + { ++ const struct cpuid_policy *cp = v->domain->arch.cpuid; + const struct msr_domain_policy *dp = v->domain->arch.msr; + const struct msr_vcpu_policy *vp = v->arch.msr; + + switch ( msr ) + { ++ case MSR_PRED_CMD: ++ /* Write-only */ ++ goto gp_fault; ++ ++ case MSR_SPEC_CTRL: ++ if ( !cp->feat.ibrsb ) ++ goto gp_fault; ++ *val = vp->spec_ctrl.raw; ++ break; ++ + case MSR_INTEL_PLATFORM_INFO: + if ( !dp->plaform_info.available ) + goto gp_fault; +@@ -132,6 +143,10 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) + _MSR_PLATFORM_INFO_CPUID_FAULTING; + break; + ++ case MSR_ARCH_CAPABILITIES: ++ /* Not implemented yet. */ ++ goto gp_fault; ++ + case MSR_INTEL_MISC_FEATURES_ENABLES: + if ( !vp->misc_features_enables.available ) + goto gp_fault; +@@ -153,14 +168,44 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + { + const struct vcpu *curr = current; + struct domain *d = v->domain; ++ const struct cpuid_policy *cp = d->arch.cpuid; + struct msr_domain_policy *dp = d->arch.msr; + struct msr_vcpu_policy *vp = v->arch.msr; + + switch ( msr ) + { + case MSR_INTEL_PLATFORM_INFO: ++ case MSR_ARCH_CAPABILITIES: ++ /* Read-only */ + goto gp_fault; + ++ case MSR_SPEC_CTRL: ++ if ( !cp->feat.ibrsb ) ++ goto gp_fault; /* MSR available? */ ++ ++ /* ++ * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored) ++ * when STIBP isn't enumerated in hardware. ++ */ ++ ++ if ( val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) ) ++ goto gp_fault; /* Rsvd bit set? */ ++ ++ vp->spec_ctrl.raw = val; ++ break; ++ ++ case MSR_PRED_CMD: ++ if ( !cp->feat.ibrsb && !cp->extd.ibpb ) ++ goto gp_fault; /* MSR available? */ ++ ++ /* ++ * The only defined behaviour is when writing PRED_CMD_IBPB. In ++ * practice, real hardware accepts any value without faulting. ++ */ ++ if ( v == curr && (val & PRED_CMD_IBPB) ) ++ wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB); ++ break; ++ + case MSR_INTEL_MISC_FEATURES_ENABLES: + { + uint64_t rsvd = ~0ull; +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index 11c43fa83e..9a5bdcbd8b 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -39,6 +39,8 @@ + #define MSR_PRED_CMD 0x00000049 + #define PRED_CMD_IBPB (_AC(1, ULL) << 0) + ++#define MSR_ARCH_CAPABILITIES 0x0000010a ++ + /* Intel MSRs. Some also available on other CPUs */ + #define MSR_IA32_PERFCTR0 0x000000c1 + #define MSR_IA32_A_PERFCTR0 0x000004c1 +diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h +index 41732a4c93..20ba47e905 100644 +--- a/xen/include/asm-x86/msr.h ++++ b/xen/include/asm-x86/msr.h +@@ -215,6 +215,16 @@ struct msr_domain_policy + /* MSR policy object for per-vCPU MSRs */ + struct msr_vcpu_policy + { ++ /* 0x00000048 - MSR_SPEC_CTRL */ ++ struct { ++ /* ++ * Only the bottom two bits are defined, so no need to waste space ++ * with uint64_t at the moment, but use uint32_t for the convenience ++ * of the assembly code. ++ */ ++ uint32_t raw; ++ } spec_ctrl; ++ + /* 0x00000140 MSR_INTEL_MISC_FEATURES_ENABLES */ + struct { + bool available; /* This MSR is non-architectural */ +-- +2.14.3 + + +From 8baba874d6c76c1d6dd69b1d9aa06abdc344a1f5 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:49:02 +0100 +Subject: [PATCH 41/49] x86/migrate: Move MSR_SPEC_CTRL on migrate + +Signed-off-by: Andrew Cooper +Reviewed-by: Wei Liu +Reviewed-by: Jan Beulich +master commit: 0cf2a4eb769302b7d7d7835540e7b2f15006df30 +master date: 2018-01-26 14:10:21 +0000 +--- + xen/arch/x86/domctl.c | 2 ++ + xen/arch/x86/hvm/hvm.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c +index e61201267b..c8446ac0d3 100644 +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -1290,6 +1290,7 @@ long arch_do_domctl( + struct xen_domctl_vcpu_msr msr; + struct vcpu *v; + static const uint32_t msrs_to_send[] = { ++ MSR_SPEC_CTRL, + MSR_INTEL_MISC_FEATURES_ENABLES, + }; + uint32_t nr_msrs = ARRAY_SIZE(msrs_to_send); +@@ -1416,6 +1417,7 @@ long arch_do_domctl( + + switch ( msr.index ) + { ++ case MSR_SPEC_CTRL: + case MSR_INTEL_MISC_FEATURES_ENABLES: + if ( guest_wrmsr(v, msr.index, msr.value) != X86EMUL_OKAY ) + break; +diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c +index 2a3dd4ee91..2e212f6f80 100644 +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -1323,6 +1323,7 @@ static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h) + + #define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) + static const uint32_t msrs_to_send[] = { ++ MSR_SPEC_CTRL, + MSR_INTEL_MISC_FEATURES_ENABLES, + }; + static unsigned int __read_mostly msr_count_max = ARRAY_SIZE(msrs_to_send); +@@ -1458,6 +1459,7 @@ static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t *h) + { + int rc; + ++ case MSR_SPEC_CTRL: + case MSR_INTEL_MISC_FEATURES_ENABLES: + rc = guest_wrmsr(v, ctxt->msr[i].index, ctxt->msr[i].val); + +-- +2.14.3 + + +From 92efbe865813d84873a0e7262b1fa414842306b6 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:49:32 +0100 +Subject: [PATCH 42/49] x86/hvm: Permit guests direct access to + MSR_{SPEC_CTRL,PRED_CMD} + +For performance reasons, HVM guests should have direct access to these MSRs +when possible. + +Signed-off-by: Andrew Cooper +Reviewed-by: Boris Ostrovsky +Reviewed-by: Jan Beulich +Reviewed-by: Kevin Tian +master commit: 5a2fe171144ebcc908ea1fca45058d6010f6a286 +master date: 2018-01-26 14:10:21 +0000 +--- + xen/arch/x86/domctl.c | 19 +++++++++++++++++++ + xen/arch/x86/hvm/svm/svm.c | 5 +++++ + xen/arch/x86/hvm/vmx/vmx.c | 17 +++++++++++++++++ + 3 files changed, 41 insertions(+) + +diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c +index c8446ac0d3..2e3b6275e0 100644 +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -53,6 +53,7 @@ static int update_domain_cpuid_info(struct domain *d, + struct cpuid_policy *p = d->arch.cpuid; + const struct cpuid_leaf leaf = { ctl->eax, ctl->ebx, ctl->ecx, ctl->edx }; + int old_vendor = p->x86_vendor; ++ unsigned int old_7d0 = p->feat.raw[0].d, old_e8b = p->extd.raw[8].b; + bool call_policy_changed = false; /* Avoid for_each_vcpu() unnecessarily */ + + /* +@@ -218,6 +219,14 @@ static int update_domain_cpuid_info(struct domain *d, + + d->arch.pv_domain.cpuidmasks->_7ab0 = mask; + } ++ ++ /* ++ * If the IBRS/IBPB policy has changed, we need to recalculate the MSR ++ * interception bitmaps. ++ */ ++ call_policy_changed = (is_hvm_domain(d) && ++ ((old_7d0 ^ p->feat.raw[0].d) & ++ cpufeat_mask(X86_FEATURE_IBRSB))); + break; + + case 0xa: +@@ -292,6 +301,16 @@ static int update_domain_cpuid_info(struct domain *d, + d->arch.pv_domain.cpuidmasks->e1cd = mask; + } + break; ++ ++ case 0x80000008: ++ /* ++ * If the IBPB policy has changed, we need to recalculate the MSR ++ * interception bitmaps. ++ */ ++ call_policy_changed = (is_hvm_domain(d) && ++ ((old_e8b ^ p->extd.raw[8].b) & ++ cpufeat_mask(X86_FEATURE_IBPB))); ++ break; + } + + if ( call_policy_changed ) +diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c +index 975a2d80cb..e978268fb5 100644 +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -617,6 +617,7 @@ static void svm_cpuid_policy_changed(struct vcpu *v) + { + struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; + struct vmcb_struct *vmcb = arch_svm->vmcb; ++ const struct cpuid_policy *cp = v->domain->arch.cpuid; + u32 bitmap = vmcb_get_exception_intercepts(vmcb); + + if ( opt_hvm_fep || +@@ -626,6 +627,10 @@ static void svm_cpuid_policy_changed(struct vcpu *v) + bitmap &= ~(1U << TRAP_invalid_op); + + vmcb_set_exception_intercepts(vmcb, bitmap); ++ ++ /* Give access to MSR_PRED_CMD if the guest has been told about it. */ ++ svm_intercept_msr(v, MSR_PRED_CMD, ++ cp->extd.ibpb ? MSR_INTERCEPT_NONE : MSR_INTERCEPT_RW); + } + + static void svm_sync_vmcb(struct vcpu *v) +diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c +index 4221fb8c56..9416ad5df2 100644 +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -656,6 +656,8 @@ void vmx_update_exception_bitmap(struct vcpu *v) + + static void vmx_cpuid_policy_changed(struct vcpu *v) + { ++ const struct cpuid_policy *cp = v->domain->arch.cpuid; ++ + if ( opt_hvm_fep || + (v->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor) ) + v->arch.hvm_vmx.exception_bitmap |= (1U << TRAP_invalid_op); +@@ -665,6 +667,21 @@ static void vmx_cpuid_policy_changed(struct vcpu *v) + vmx_vmcs_enter(v); + vmx_update_exception_bitmap(v); + vmx_vmcs_exit(v); ++ ++ /* ++ * We can safely pass MSR_SPEC_CTRL through to the guest, even if STIBP ++ * isn't enumerated in hardware, as SPEC_CTRL_STIBP is ignored. ++ */ ++ if ( cp->feat.ibrsb ) ++ vmx_clear_msr_intercept(v, MSR_SPEC_CTRL, VMX_MSR_RW); ++ else ++ vmx_set_msr_intercept(v, MSR_SPEC_CTRL, VMX_MSR_RW); ++ ++ /* MSR_PRED_CMD is safe to pass through if the guest knows about it. */ ++ if ( cp->feat.ibrsb || cp->extd.ibpb ) ++ vmx_clear_msr_intercept(v, MSR_PRED_CMD, VMX_MSR_RW); ++ else ++ vmx_set_msr_intercept(v, MSR_PRED_CMD, VMX_MSR_RW); + } + + int vmx_guest_x86_mode(struct vcpu *v) +-- +2.14.3 + + +From a695f8dce7c3f137f61c8c8a880b24b1b4cf319c Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:50:40 +0100 +Subject: [PATCH 43/49] x86/entry: Organise the use of MSR_SPEC_CTRL at each + entry/exit point + +We need to be able to either set or clear IBRS in Xen context, as well as +restore appropriate guest values in guest context. See the documentation in +asm-x86/spec_ctrl_asm.h for details. + +With the contemporary microcode, writes to %cr3 are slower when SPEC_CTRL.IBRS +is set. Therefore, the positioning of SPEC_CTRL_{ENTRY/EXIT}* is important. + +Ideally, the IBRS_SET/IBRS_CLEAR hunks might be positioned either side of the +%cr3 change, but that is rather more complicated to arrange, and could still +result in a guest controlled value in SPEC_CTRL during the %cr3 change, +negating the saving if the guest chose to have IBRS set. + +Therefore, we optimise for the pre-Skylake case (being far more common in the +field than Skylake and later, at the moment), where we have a Xen-preferred +value of IBRS clear when switching %cr3. + +There is a semi-unrelated bugfix, where various asm_defn.h macros have a +hidden dependency on PAGE_SIZE, which results in an assembler error if used in +a .macro definition. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: 5e7962901131186d3514528ed57c7a9901a15a3e +master date: 2018-01-26 14:10:21 +0000 +--- + xen/arch/x86/hvm/svm/entry.S | 11 +- + xen/arch/x86/hvm/vmx/entry.S | 19 +++ + xen/arch/x86/setup.c | 1 + + xen/arch/x86/smpboot.c | 2 + + xen/arch/x86/x86_64/asm-offsets.c | 6 + + xen/arch/x86/x86_64/compat/entry.S | 14 +++ + xen/arch/x86/x86_64/entry.S | 48 +++++++- + xen/include/asm-x86/asm_defns.h | 3 + + xen/include/asm-x86/current.h | 6 + + xen/include/asm-x86/nops.h | 6 + + xen/include/asm-x86/spec_ctrl.h | 9 ++ + xen/include/asm-x86/spec_ctrl_asm.h | 225 ++++++++++++++++++++++++++++++++++++ + 12 files changed, 344 insertions(+), 6 deletions(-) + create mode 100644 xen/include/asm-x86/spec_ctrl_asm.h + +diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S +index df86da0a81..bf092fe071 100644 +--- a/xen/arch/x86/hvm/svm/entry.S ++++ b/xen/arch/x86/hvm/svm/entry.S +@@ -79,6 +79,12 @@ UNLIKELY_END(svm_trace) + or $X86_EFLAGS_MBS,%rax + mov %rax,VMCB_rflags(%rcx) + ++ mov VCPU_arch_msr(%rbx), %rax ++ mov VCPUMSR_spec_ctrl_raw(%rax), %eax ++ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ ++ SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ ++ + pop %r15 + pop %r14 + pop %r13 +@@ -101,8 +107,11 @@ UNLIKELY_END(svm_trace) + SAVE_ALL + + GET_CURRENT(bx) +- mov VCPU_svm_vmcb(%rbx),%rcx + ++ SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ ++ ++ mov VCPU_svm_vmcb(%rbx),%rcx + movb $0,VCPU_svm_vmcb_in_sync(%rbx) + mov VMCB_rax(%rcx),%rax + mov %rax,UREGS_rax(%rsp) +diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S +index b2f98be7f5..e750544b4b 100644 +--- a/xen/arch/x86/hvm/vmx/entry.S ++++ b/xen/arch/x86/hvm/vmx/entry.S +@@ -38,6 +38,9 @@ ENTRY(vmx_asm_vmexit_handler) + movb $1,VCPU_vmx_launched(%rbx) + mov %rax,VCPU_hvm_guest_cr2(%rbx) + ++ SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ ++ + mov %rsp,%rdi + call vmx_vmexit_handler + +@@ -68,6 +71,13 @@ UNLIKELY_END(realmode) + call vmx_vmenter_helper + test %al, %al + jz .Lvmx_vmentry_restart ++ ++ mov VCPU_arch_msr(%rbx), %rax ++ mov VCPUMSR_spec_ctrl_raw(%rax), %eax ++ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ ++ SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ ++ + mov VCPU_hvm_guest_cr2(%rbx),%rax + + pop %r15 +@@ -99,6 +109,15 @@ UNLIKELY_END(realmode) + .Lvmx_vmentry_fail: + sti + SAVE_ALL ++ ++ /* ++ * PV variant needed here as no guest code has executed (so ++ * MSR_SPEC_CTRL can't have changed value), and NMIs/MCEs are liable ++ * to hit (in which case the HVM variant might corrupt things). ++ */ ++ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo Clob: acd */ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ ++ + call vmx_vmentry_failure + BUG /* vmx_vmentry_failure() shouldn't return. */ + +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 470427bc64..b2aa281d72 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -668,6 +668,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) + set_processor_id(0); + set_current(INVALID_VCPU); /* debug sanity. */ + idle_vcpu[0] = current; ++ init_shadow_spec_ctrl_state(); + + percpu_init_areas(); + +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index edf607f5a2..005287c65c 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -308,6 +309,7 @@ void start_secondary(void *unused) + set_current(idle_vcpu[cpu]); + this_cpu(curr_vcpu) = idle_vcpu[cpu]; + rdmsrl(MSR_EFER, this_cpu(efer)); ++ init_shadow_spec_ctrl_state(); + + /* + * Just as during early bootstrap, it is convenient here to disable +diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c +index b1a4310974..17f1d77320 100644 +--- a/xen/arch/x86/x86_64/asm-offsets.c ++++ b/xen/arch/x86/x86_64/asm-offsets.c +@@ -88,6 +88,7 @@ void __dummy__(void) + OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss); + OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl); + OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags); ++ OFFSET(VCPU_arch_msr, struct vcpu, arch.msr); + OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending); + OFFSET(VCPU_mce_pending, struct vcpu, mce_pending); + OFFSET(VCPU_nmi_old_mask, struct vcpu, nmi_state.old_mask); +@@ -139,6 +140,8 @@ void __dummy__(void) + OFFSET(CPUINFO_cr4, struct cpu_info, cr4); + OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3); + OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3); ++ OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl); ++ OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl); + DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); + BLANK(); + +@@ -154,6 +157,9 @@ void __dummy__(void) + OFFSET(TRAPBOUNCE_eip, struct trap_bounce, eip); + BLANK(); + ++ OFFSET(VCPUMSR_spec_ctrl_raw, struct msr_vcpu_policy, spec_ctrl.raw); ++ BLANK(); ++ + #ifdef CONFIG_PERF_COUNTERS + DEFINE(ASM_PERFC_exceptions, PERFC_exceptions); + BLANK(); +diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S +index e668f00c36..4190c733a3 100644 +--- a/xen/arch/x86/x86_64/compat/entry.S ++++ b/xen/arch/x86/x86_64/compat/entry.S +@@ -18,6 +18,10 @@ ENTRY(entry_int82) + pushq $0 + movl $HYPERCALL_VECTOR, 4(%rsp) + SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ ++ ++ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ ++ + CR4_PV32_RESTORE + + GET_CURRENT(bx) +@@ -142,6 +146,13 @@ ENTRY(compat_restore_all_guest) + .popsection + or $X86_EFLAGS_IF,%r11 + mov %r11d,UREGS_eflags(%rsp) ++ ++ mov VCPU_arch_msr(%rbx), %rax ++ mov VCPUMSR_spec_ctrl_raw(%rax), %eax ++ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ ++ SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ ++ + RESTORE_ALL adj=8 compat=1 + .Lft0: iretq + _ASM_PRE_EXTABLE(.Lft0, handle_exception) +@@ -200,6 +211,9 @@ ENTRY(cstar_enter) + movl $TRAP_syscall, 4(%rsp) + SAVE_ALL + ++ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ ++ + GET_STACK_END(bx) + mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx + neg %rcx +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 710c0616ba..73bd7ca2ad 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -38,6 +38,10 @@ ENTRY(switch_to_kernel) + restore_all_guest: + ASSERT_INTERRUPTS_DISABLED + ++ /* Stash guest SPEC_CTRL value while we can read struct vcpu. */ ++ mov VCPU_arch_msr(%rbx), %rdx ++ mov VCPUMSR_spec_ctrl_raw(%rdx), %r15d ++ + /* Copy guest mappings and switch to per-CPU root page table. */ + mov %cr3, %r9 + GET_STACK_END(dx) +@@ -65,6 +69,12 @@ restore_all_guest: + write_cr3 rax, rdi, rsi + .Lrag_keep_cr3: + ++ /* Restore stashed SPEC_CTRL value. */ ++ mov %r15d, %eax ++ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ ++ SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ ++ + RESTORE_ALL + testw $TRAP_syscall,4(%rsp) + jz iret_exit_to_guest +@@ -103,9 +113,9 @@ restore_all_xen: + * Check whether we need to switch to the per-CPU page tables, in + * case we return to late PV exit code (from an NMI or #MC). + */ +- GET_STACK_END(ax) +- mov STACK_CPUINFO_FIELD(xen_cr3)(%rax), %rdx +- mov STACK_CPUINFO_FIELD(pv_cr3)(%rax), %rax ++ GET_STACK_END(bx) ++ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rdx ++ mov STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax + test %rdx, %rdx + /* + * Ideally the condition would be "nsz", but such doesn't exist, +@@ -115,6 +125,9 @@ UNLIKELY_START(g, exit_cr3) + write_cr3 rax, rdi, rsi + UNLIKELY_END(exit_cr3) + ++ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ ++ SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */ ++ + RESTORE_ALL adj=8 + iretq + +@@ -145,6 +158,9 @@ ENTRY(lstar_enter) + movl $TRAP_syscall, 4(%rsp) + SAVE_ALL + ++ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ ++ + GET_STACK_END(bx) + mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx + neg %rcx +@@ -248,6 +264,9 @@ GLOBAL(sysenter_eflags_saved) + movl $TRAP_syscall, 4(%rsp) + SAVE_ALL + ++ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ ++ + GET_STACK_END(bx) + mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx + neg %rcx +@@ -294,6 +313,9 @@ ENTRY(int80_direct_trap) + movl $0x80, 4(%rsp) + SAVE_ALL + ++ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ ++ + GET_STACK_END(bx) + mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx + neg %rcx +@@ -469,6 +491,10 @@ ENTRY(common_interrupt) + SAVE_ALL CLAC + + GET_STACK_END(14) ++ ++ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ ++ + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx + mov %rcx, %r15 + neg %rcx +@@ -507,6 +533,10 @@ GLOBAL(handle_exception) + SAVE_ALL CLAC + + GET_STACK_END(14) ++ ++ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ ++ + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx + mov %rcx, %r15 + neg %rcx +@@ -700,8 +730,12 @@ ENTRY(double_fault) + /* Set AC to reduce chance of further SMAP faults */ + SAVE_ALL STAC + +- GET_STACK_END(bx) +- mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rbx ++ GET_STACK_END(14) ++ ++ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ ++ ++ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx + test %rbx, %rbx + jz .Ldblf_cr3_okay + jns .Ldblf_cr3_load +@@ -730,6 +764,10 @@ handle_ist_exception: + SAVE_ALL CLAC + + GET_STACK_END(14) ++ ++ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ ++ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ ++ + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx + mov %rcx, %r15 + neg %rcx +diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h +index 61b6d35bff..ba96b0e823 100644 +--- a/xen/include/asm-x86/asm_defns.h ++++ b/xen/include/asm-x86/asm_defns.h +@@ -7,6 +7,7 @@ + #include + #endif + #include ++#include + #include + #include + #include +@@ -374,4 +375,6 @@ static always_inline void stac(void) + 4: .p2align 2 ; \ + .popsection + ++#include ++ + #endif /* __X86_ASM_DEFNS_H__ */ +diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h +index b929c48c85..1009d05e24 100644 +--- a/xen/include/asm-x86/current.h ++++ b/xen/include/asm-x86/current.h +@@ -53,6 +53,12 @@ struct cpu_info { + */ + unsigned long xen_cr3; + unsigned long pv_cr3; ++ ++ /* See asm-x86/spec_ctrl_asm.h for usage. */ ++ unsigned int shadow_spec_ctrl; ++ bool use_shadow_spec_ctrl; ++ ++ unsigned long __pad; + /* get_stack_bottom() must be 16-byte aligned */ + }; + +diff --git a/xen/include/asm-x86/nops.h b/xen/include/asm-x86/nops.h +index 9a6a4e1164..18cb718bac 100644 +--- a/xen/include/asm-x86/nops.h ++++ b/xen/include/asm-x86/nops.h +@@ -61,6 +61,12 @@ + #define ASM_NOP7 _ASM_MK_NOP(K8_NOP7) + #define ASM_NOP8 _ASM_MK_NOP(K8_NOP8) + ++#define ASM_NOP17 ASM_NOP8; ASM_NOP7; ASM_NOP2 ++#define ASM_NOP21 ASM_NOP8; ASM_NOP8; ASM_NOP5 ++#define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8 ++#define ASM_NOP29 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP5 ++#define ASM_NOP32 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8 ++ + #define ASM_NOP_MAX 8 + + #endif /* __X86_ASM_NOPS_H__ */ +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index e088a551da..b451250282 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -20,8 +20,17 @@ + #ifndef __X86_SPEC_CTRL_H__ + #define __X86_SPEC_CTRL_H__ + ++#include ++ + void init_speculation_mitigations(void); + ++static inline void init_shadow_spec_ctrl_state(void) ++{ ++ struct cpu_info *info = get_cpu_info(); ++ ++ info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0; ++} ++ + #endif /* !__X86_SPEC_CTRL_H__ */ + + /* +diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h +new file mode 100644 +index 0000000000..ba5557436d +--- /dev/null ++++ b/xen/include/asm-x86/spec_ctrl_asm.h +@@ -0,0 +1,225 @@ ++/****************************************************************************** ++ * include/asm-x86/spec_ctrl.h ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; If not, see . ++ * ++ * Copyright (c) 2017-2018 Citrix Systems Ltd. ++ */ ++ ++#ifndef __X86_SPEC_CTRL_ASM_H__ ++#define __X86_SPEC_CTRL_ASM_H__ ++ ++#ifdef __ASSEMBLY__ ++#include ++ ++/* ++ * Saving and restoring MSR_SPEC_CTRL state is a little tricky. ++ * ++ * We want the guests choice of SPEC_CTRL while in guest context, and Xen's ++ * choice (set or clear, depending on the hardware) while running in Xen ++ * context. Therefore, a simplistic algorithm is: ++ * ++ * - Set/clear IBRS on entry to Xen ++ * - Set the guests' choice on exit to guest ++ * - Leave SPEC_CTRL unchanged on exit to xen ++ * ++ * There are two complicating factors: ++ * 1) HVM guests can have direct access to the MSR, so it can change ++ * behind Xen's back. ++ * 2) An NMI or MCE can interrupt at any point, including early in the entry ++ * path, or late in the exit path after restoring the guest value. This ++ * will corrupt the guest value. ++ * ++ * Factor 1 is dealt with by relying on NMIs/MCEs being blocked immediately ++ * after VMEXIT. The VMEXIT-specific code reads MSR_SPEC_CTRL and updates ++ * current before loading Xen's MSR_SPEC_CTRL setting. ++ * ++ * Factor 2 is harder. We maintain a shadow_spec_ctrl value, and ++ * use_shadow_spec_ctrl boolean per cpu. The synchronous use is: ++ * ++ * 1) Store guest value in shadow_spec_ctrl ++ * 2) Set use_shadow_spec_ctrl boolean ++ * 3) Load guest value into MSR_SPEC_CTRL ++ * 4) Exit to guest ++ * 5) Entry from guest ++ * 6) Clear use_shadow_spec_ctrl boolean ++ * 7) Load Xen's value into MSR_SPEC_CTRL ++ * ++ * The asynchronous use for interrupts/exceptions is: ++ * - Set/clear IBRS on entry to Xen ++ * - On exit to Xen, check use_shadow_spec_ctrl ++ * - If set, load shadow_spec_ctrl ++ * ++ * Therefore, an interrupt/exception which hits the synchronous path between ++ * steps 2 and 6 will restore the shadow value rather than leaving Xen's value ++ * loaded and corrupting the value used in guest context. ++ * ++ * The following ASM fragments implement this algorithm. See their local ++ * comments for further details. ++ * - SPEC_CTRL_ENTRY_FROM_VMEXIT ++ * - SPEC_CTRL_ENTRY_FROM_PV ++ * - SPEC_CTRL_ENTRY_FROM_INTR ++ * - SPEC_CTRL_EXIT_TO_XEN ++ * - SPEC_CTRL_EXIT_TO_GUEST ++ */ ++ ++.macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req ++/* ++ * Requires %rbx=current, %rsp=regs/cpuinfo ++ * Clobbers %rax, %rcx, %rdx ++ * ++ * The common case is that a guest has direct access to MSR_SPEC_CTRL, at ++ * which point we need to save the guest value before setting IBRS for Xen. ++ * Unilaterally saving the guest value is shorter and faster than checking. ++ */ ++ mov $MSR_SPEC_CTRL, %ecx ++ rdmsr ++ ++ /* Stash the value from hardware. */ ++ mov VCPU_arch_msr(%rbx), %rdx ++ mov %eax, VCPUMSR_spec_ctrl_raw(%rdx) ++ xor %edx, %edx ++ ++ /* Clear SPEC_CTRL shadowing *before* loading Xen's value. */ ++ movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp) ++ ++ /* Load Xen's intended value. */ ++ mov $\ibrs_val, %eax ++ wrmsr ++.endm ++ ++.macro DO_SPEC_CTRL_ENTRY maybexen:req ibrs_val:req ++/* ++ * Requires %rsp=regs (also cpuinfo if !maybexen) ++ * Requires %r14=stack_end (if maybexen) ++ * Clobbers %rax, %rcx, %rdx ++ * ++ * PV guests can't update MSR_SPEC_CTRL behind Xen's back, so no need to read ++ * it back. Entries from guest context need to clear SPEC_CTRL shadowing, ++ * while entries from Xen must leave shadowing in its current state. ++ */ ++ mov $MSR_SPEC_CTRL, %ecx ++ xor %edx, %edx ++ ++ /* ++ * Clear SPEC_CTRL shadowing *before* loading Xen's value. If entering ++ * from a possibly-xen context, %rsp doesn't necessarily alias the cpuinfo ++ * block so calculate the position directly. ++ */ ++ .if \maybexen ++ /* Branchless `if ( !xen ) clear_shadowing` */ ++ testb $3, UREGS_cs(%rsp) ++ setz %al ++ and %al, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14) ++ .else ++ movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp) ++ .endif ++ ++ /* Load Xen's intended value. */ ++ mov $\ibrs_val, %eax ++ wrmsr ++.endm ++ ++.macro DO_SPEC_CTRL_EXIT_TO_XEN ++/* ++ * Requires %rbx=stack_end ++ * Clobbers %rax, %rcx, %rdx ++ * ++ * When returning to Xen context, look to see whether SPEC_CTRL shadowing is ++ * in effect, and reload the shadow value. This covers race conditions which ++ * exist with an NMI/MCE/etc hitting late in the return-to-guest path. ++ */ ++ xor %edx, %edx ++ ++ cmpb %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%rbx) ++ je .L\@_skip ++ ++ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax ++ mov $MSR_SPEC_CTRL, %ecx ++ wrmsr ++ ++.L\@_skip: ++.endm ++ ++.macro DO_SPEC_CTRL_EXIT_TO_GUEST ++/* ++ * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo ++ * Clobbers %rcx, %rdx ++ * ++ * When returning to guest context, set up SPEC_CTRL shadowing and load the ++ * guest value. ++ */ ++ /* Set up shadow value *before* enabling shadowing. */ ++ mov %eax, CPUINFO_shadow_spec_ctrl(%rsp) ++ ++ /* Set SPEC_CTRL shadowing *before* loading the guest value. */ ++ movb $1, CPUINFO_use_shadow_spec_ctrl(%rsp) ++ ++ mov $MSR_SPEC_CTRL, %ecx ++ xor %edx, %edx ++ wrmsr ++.endm ++ ++/* Use after a VMEXIT from an HVM guest. */ ++#define SPEC_CTRL_ENTRY_FROM_VMEXIT \ ++ ALTERNATIVE_2 __stringify(ASM_NOP32), \ ++ __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \ ++ ibrs_val=SPEC_CTRL_IBRS), \ ++ X86_FEATURE_XEN_IBRS_SET, \ ++ __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \ ++ ibrs_val=0), \ ++ X86_FEATURE_XEN_IBRS_CLEAR ++ ++/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ ++#define SPEC_CTRL_ENTRY_FROM_PV \ ++ ALTERNATIVE_2 __stringify(ASM_NOP21), \ ++ __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 \ ++ ibrs_val=SPEC_CTRL_IBRS), \ ++ X86_FEATURE_XEN_IBRS_SET, \ ++ __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 ibrs_val=0), \ ++ X86_FEATURE_XEN_IBRS_CLEAR ++ ++/* Use in interrupt/exception context. May interrupt Xen or PV context. */ ++#define SPEC_CTRL_ENTRY_FROM_INTR \ ++ ALTERNATIVE_2 __stringify(ASM_NOP29), \ ++ __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 \ ++ ibrs_val=SPEC_CTRL_IBRS), \ ++ X86_FEATURE_XEN_IBRS_SET, \ ++ __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 ibrs_val=0), \ ++ X86_FEATURE_XEN_IBRS_CLEAR ++ ++/* Use when exiting to Xen context. */ ++#define SPEC_CTRL_EXIT_TO_XEN \ ++ ALTERNATIVE_2 __stringify(ASM_NOP17), \ ++ DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_SET, \ ++ DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_CLEAR ++ ++/* Use when exiting to guest context. */ ++#define SPEC_CTRL_EXIT_TO_GUEST \ ++ ALTERNATIVE_2 __stringify(ASM_NOP24), \ ++ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_SET, \ ++ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_CLEAR ++ ++#endif /* __ASSEMBLY__ */ ++#endif /* !__X86_SPEC_CTRL_ASM_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-file-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +-- +2.14.3 + + +From ce7d7c01685569d9ff1f971c0f0622573bfe8bf3 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:51:38 +0100 +Subject: [PATCH 44/49] x86/entry: Organise the clobbering of the RSB/RAS on + entry to Xen + +ret instructions are speculated directly to values recorded in the Return +Stack Buffer/Return Address Stack, as there is no uncertainty in well-formed +code. Guests can take advantage of this in two ways: + + 1) If they can find a path in Xen which executes more ret instructions than + call instructions. (At least one in the waitqueue infrastructure, + probably others.) + + 2) Use the fact that the RSB/RAS in hardware is actually a circular stack + without a concept of empty. (When it logically empties, stale values + will start being used.) + +To mitigate, overwrite the RSB on entry to Xen with gadgets which will capture +and contain rogue speculation. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: e6c0128e9ab25bf66df11377a33ee5584d7f99e3 +master date: 2018-01-26 14:10:21 +0000 +--- + xen/include/asm-x86/cpufeatures.h | 2 ++ + xen/include/asm-x86/nops.h | 1 + + xen/include/asm-x86/spec_ctrl_asm.h | 44 +++++++++++++++++++++++++++++++++++++ + 3 files changed, 47 insertions(+) + +diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h +index dd2388f393..b5dae12bfa 100644 +--- a/xen/include/asm-x86/cpufeatures.h ++++ b/xen/include/asm-x86/cpufeatures.h +@@ -28,3 +28,5 @@ XEN_CPUFEATURE(IND_THUNK_JMP, (FSCAPINTS+0)*32+14) /* Use IND_THUNK_JMP */ + XEN_CPUFEATURE(XEN_IBPB, (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */ + XEN_CPUFEATURE(XEN_IBRS_SET, (FSCAPINTS+0)*32+16) /* IBRSB && IRBS set in Xen */ + XEN_CPUFEATURE(XEN_IBRS_CLEAR, (FSCAPINTS+0)*32+17) /* IBRSB && IBRS clear in Xen */ ++XEN_CPUFEATURE(RSB_NATIVE, (FSCAPINTS+0)*32+18) /* RSB overwrite needed for native */ ++XEN_CPUFEATURE(RSB_VMEXIT, (FSCAPINTS+0)*32+19) /* RSB overwrite needed for vmexit */ +diff --git a/xen/include/asm-x86/nops.h b/xen/include/asm-x86/nops.h +index 18cb718bac..37f9819e82 100644 +--- a/xen/include/asm-x86/nops.h ++++ b/xen/include/asm-x86/nops.h +@@ -66,6 +66,7 @@ + #define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8 + #define ASM_NOP29 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP5 + #define ASM_NOP32 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8 ++#define ASM_NOP40 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8 + + #define ASM_NOP_MAX 8 + +diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h +index ba5557436d..e27ea2b1e6 100644 +--- a/xen/include/asm-x86/spec_ctrl_asm.h ++++ b/xen/include/asm-x86/spec_ctrl_asm.h +@@ -74,6 +74,44 @@ + * - SPEC_CTRL_EXIT_TO_GUEST + */ + ++.macro DO_OVERWRITE_RSB ++/* ++ * Requires nothing ++ * Clobbers %rax, %rcx ++ * ++ * Requires 256 bytes of stack space, but %rsp has no net change. Based on ++ * Google's performance numbers, the loop is unrolled to 16 iterations and two ++ * calls per iteration. ++ * ++ * The call filling the RSB needs a nonzero displacement. A nop would do, but ++ * we use "1: pause; lfence; jmp 1b" to safely contains any ret-based ++ * speculation, even if the loop is speculatively executed prematurely. ++ * ++ * %rsp is preserved by using an extra GPR because a) we've got plenty spare, ++ * b) the two movs are shorter to encode than `add $32*8, %rsp`, and c) can be ++ * optimised with mov-elimination in modern cores. ++ */ ++ mov $16, %ecx /* 16 iterations, two calls per loop */ ++ mov %rsp, %rax /* Store the current %rsp */ ++ ++.L\@_fill_rsb_loop: ++ ++ .irp n, 1, 2 /* Unrolled twice. */ ++ call .L\@_insert_rsb_entry_\n /* Create an RSB entry. */ ++ ++.L\@_capture_speculation_\n: ++ pause ++ lfence ++ jmp .L\@_capture_speculation_\n /* Capture rogue speculation. */ ++ ++.L\@_insert_rsb_entry_\n: ++ .endr ++ ++ sub $1, %ecx ++ jnz .L\@_fill_rsb_loop ++ mov %rax, %rsp /* Restore old %rsp */ ++.endm ++ + .macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req + /* + * Requires %rbx=current, %rsp=regs/cpuinfo +@@ -173,6 +211,8 @@ + + /* Use after a VMEXIT from an HVM guest. */ + #define SPEC_CTRL_ENTRY_FROM_VMEXIT \ ++ ALTERNATIVE __stringify(ASM_NOP40), \ ++ DO_OVERWRITE_RSB, X86_FEATURE_RSB_VMEXIT; \ + ALTERNATIVE_2 __stringify(ASM_NOP32), \ + __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \ + ibrs_val=SPEC_CTRL_IBRS), \ +@@ -183,6 +223,8 @@ + + /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ + #define SPEC_CTRL_ENTRY_FROM_PV \ ++ ALTERNATIVE __stringify(ASM_NOP40), \ ++ DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE; \ + ALTERNATIVE_2 __stringify(ASM_NOP21), \ + __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 \ + ibrs_val=SPEC_CTRL_IBRS), \ +@@ -192,6 +234,8 @@ + + /* Use in interrupt/exception context. May interrupt Xen or PV context. */ + #define SPEC_CTRL_ENTRY_FROM_INTR \ ++ ALTERNATIVE __stringify(ASM_NOP40), \ ++ DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE; \ + ALTERNATIVE_2 __stringify(ASM_NOP29), \ + __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 \ + ibrs_val=SPEC_CTRL_IBRS), \ +-- +2.14.3 + + +From fc81946ceaae2c27fce2ba0f3f29fa9df3975951 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:52:28 +0100 +Subject: [PATCH 45/49] x86/entry: Avoid using alternatives in NMI/#MC paths + +This patch is deliberately arranged to be easy to revert if/when alternatives +patching becomes NMI/#MC safe. + +For safety, there must be a dispatch serialising instruction in (what is +logically) DO_SPEC_CTRL_ENTRY so that, in the case that Xen needs IBRS set in +context, an attacker can't speculate around the WRMSR and reach an indirect +branch within the speculation window. + +Using conditionals opens this attack vector up, so the else clause gets an +LFENCE to force the pipeline to catch up before continuing. This also covers +the safety of RSB conditional, as execution it is guaranteed to either hit the +WRMSR or LFENCE. + +One downside of not using alternatives is that there unconditionally an LFENCE +in the IST path in cases where we are not using the features from IBRS-capable +microcode. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: 3fffaf9c13e9502f09ad4ab1aac3f8b7b9398f6f +master date: 2018-01-26 14:10:21 +0000 +--- + xen/arch/x86/spec_ctrl.c | 8 +++++ + xen/arch/x86/x86_64/asm-offsets.c | 1 + + xen/arch/x86/x86_64/entry.S | 6 ++-- + xen/include/asm-x86/current.h | 1 + + xen/include/asm-x86/spec_ctrl.h | 1 + + xen/include/asm-x86/spec_ctrl_asm.h | 67 +++++++++++++++++++++++++++++++++++++ + 6 files changed, 81 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 89e7287e43..cc1c972845 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -20,8 +20,10 @@ + #include + #include + ++#include + #include + #include ++#include + + static enum ind_thunk { + THUNK_DEFAULT, /* Decide which thunk to use at boot time. */ +@@ -150,6 +152,12 @@ void __init init_speculation_mitigations(void) + print_details(thunk); + } + ++static void __init __maybe_unused build_assertions(void) ++{ ++ /* The optimised assembly relies on this alias. */ ++ BUILD_BUG_ON(BTI_IST_IBRS != SPEC_CTRL_IBRS); ++} ++ + /* + * Local variables: + * mode: C +diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c +index 17f1d77320..51be528f89 100644 +--- a/xen/arch/x86/x86_64/asm-offsets.c ++++ b/xen/arch/x86/x86_64/asm-offsets.c +@@ -142,6 +142,7 @@ void __dummy__(void) + OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3); + OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl); + OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl); ++ OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info); + DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); + BLANK(); + +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 73bd7ca2ad..a5a6702468 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -126,7 +126,7 @@ UNLIKELY_START(g, exit_cr3) + UNLIKELY_END(exit_cr3) + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ +- SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */ ++ SPEC_CTRL_EXIT_TO_XEN_IST /* Req: %rbx=end, Clob: acd */ + + RESTORE_ALL adj=8 + iretq +@@ -732,7 +732,7 @@ ENTRY(double_fault) + + GET_STACK_END(14) + +- SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ ++ SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx +@@ -765,7 +765,7 @@ handle_ist_exception: + + GET_STACK_END(14) + +- SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ ++ SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx +diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h +index 1009d05e24..4678a0fcf5 100644 +--- a/xen/include/asm-x86/current.h ++++ b/xen/include/asm-x86/current.h +@@ -57,6 +57,7 @@ struct cpu_info { + /* See asm-x86/spec_ctrl_asm.h for usage. */ + unsigned int shadow_spec_ctrl; + bool use_shadow_spec_ctrl; ++ uint8_t bti_ist_info; + + unsigned long __pad; + /* get_stack_bottom() must be 16-byte aligned */ +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index b451250282..c454b02b66 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -29,6 +29,7 @@ static inline void init_shadow_spec_ctrl_state(void) + struct cpu_info *info = get_cpu_info(); + + info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0; ++ info->bti_ist_info = 0; + } + + #endif /* !__X86_SPEC_CTRL_H__ */ +diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h +index e27ea2b1e6..814f53dffc 100644 +--- a/xen/include/asm-x86/spec_ctrl_asm.h ++++ b/xen/include/asm-x86/spec_ctrl_asm.h +@@ -20,6 +20,11 @@ + #ifndef __X86_SPEC_CTRL_ASM_H__ + #define __X86_SPEC_CTRL_ASM_H__ + ++/* Encoding of the bottom bits in cpuinfo.bti_ist_info */ ++#define BTI_IST_IBRS (1 << 0) ++#define BTI_IST_WRMSR (1 << 1) ++#define BTI_IST_RSB (1 << 2) ++ + #ifdef __ASSEMBLY__ + #include + +@@ -255,6 +260,68 @@ + DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_SET, \ + DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_CLEAR + ++/* TODO: Drop these when the alternatives infrastructure is NMI/#MC safe. */ ++.macro SPEC_CTRL_ENTRY_FROM_INTR_IST ++/* ++ * Requires %rsp=regs, %r14=stack_end ++ * Clobbers %rax, %rcx, %rdx ++ * ++ * This is logical merge of DO_OVERWRITE_RSB and DO_SPEC_CTRL_ENTRY ++ * maybexen=1, but with conditionals rather than alternatives. ++ */ ++ movzbl STACK_CPUINFO_FIELD(bti_ist_info)(%r14), %eax ++ ++ testb $BTI_IST_RSB, %al ++ jz .L\@_skip_rsb ++ ++ DO_OVERWRITE_RSB ++ ++.L\@_skip_rsb: ++ ++ testb $BTI_IST_WRMSR, %al ++ jz .L\@_skip_wrmsr ++ ++ xor %edx, %edx ++ testb $3, UREGS_cs(%rsp) ++ setz %dl ++ and %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14) ++ ++.L\@_entry_from_xen: ++ /* ++ * Load Xen's intended value. SPEC_CTRL_IBRS vs 0 is encoded in the ++ * bottom bit of bti_ist_info, via a deliberate alias with BTI_IST_IBRS. ++ */ ++ mov $MSR_SPEC_CTRL, %ecx ++ and $BTI_IST_IBRS, %eax ++ wrmsr ++ ++ /* Opencoded UNLIKELY_START() with no condition. */ ++UNLIKELY_DISPATCH_LABEL(\@_serialise): ++ .subsection 1 ++ /* ++ * In the case that we might need to set SPEC_CTRL.IBRS for safety, we ++ * need to ensure that an attacker can't poison the `jz .L\@_skip_wrmsr` ++ * to speculate around the WRMSR. As a result, we need a dispatch ++ * serialising instruction in the else clause. ++ */ ++.L\@_skip_wrmsr: ++ lfence ++ UNLIKELY_END(\@_serialise) ++.endm ++ ++.macro SPEC_CTRL_EXIT_TO_XEN_IST ++/* ++ * Requires %rbx=stack_end ++ * Clobbers %rax, %rcx, %rdx ++ */ ++ testb $BTI_IST_WRMSR, STACK_CPUINFO_FIELD(bti_ist_info)(%rbx) ++ jz .L\@_skip ++ ++ DO_SPEC_CTRL_EXIT_TO_XEN ++ ++.L\@_skip: ++.endm ++ + #endif /* __ASSEMBLY__ */ + #endif /* !__X86_SPEC_CTRL_ASM_H__ */ + +-- +2.14.3 + + +From bc0e599a83d17f06ec7da1708721cede2df8274e Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:53:10 +0100 +Subject: [PATCH 46/49] x86/boot: Calculate the most appropriate BTI mitigation + to use + +See the logic and comments in init_speculation_mitigations() for further +details. + +There are two controls for RSB overwriting, because in principle there are +cases where it might be safe to forego rsb_native (Off the top of my head, +SMEP active, no 32bit PV guests at all, no use of vmevent/paging subsystems +for HVM guests, but I make no guarantees that this list of restrictions is +exhaustive). + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +x86/spec_ctrl: Fix determination of when to use IBRS + +The original version of this logic was: + + /* + * On Intel hardware, we'd like to use retpoline in preference to + * IBRS, but only if it is safe on this hardware. + */ + else if ( boot_cpu_has(X86_FEATURE_IBRSB) ) + { + if ( retpoline_safe() ) + thunk = THUNK_RETPOLINE; + else + ibrs = true; + } + +but it was changed by a request during review. Sadly, the result is buggy as +it breaks the later fallback logic by allowing IBRS to appear as available +when in fact it isn't. + +This in practice means that on repoline-unsafe hardware without IBRS, we +select THUNK_JUMP despite intending to select THUNK_RETPOLINE. + +Reported-by: Zhenzhong Duan +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: 2713715305ca516f698d58cec5e0b322c3b2c4eb +master date: 2018-01-26 14:10:21 +0000 +master commit: 30cbd0c83ef3d0edac2d5bcc41a9a2b7a843ae58 +master date: 2018-02-06 18:32:58 +0000 +--- + docs/misc/xen-command-line.markdown | 10 ++- + xen/arch/x86/spec_ctrl.c | 141 +++++++++++++++++++++++++++++++++++- + xen/include/asm-x86/spec_ctrl.h | 4 +- + 3 files changed, 149 insertions(+), 6 deletions(-) + +diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown +index 2d95759568..a751a392ac 100644 +--- a/docs/misc/xen-command-line.markdown ++++ b/docs/misc/xen-command-line.markdown +@@ -246,7 +246,7 @@ enough. Setting this to a high value may cause boot failure, particularly if + the NMI watchdog is also enabled. + + ### bti (x86) +-> `= List of [ thunk=retpoline|lfence|jmp ]` ++> `= List of [ thunk=retpoline|lfence|jmp, ibrs=, rsb_{vmexit,native}= ]` + + Branch Target Injection controls. By default, Xen will pick the most + appropriate BTI mitigations based on compiled in support, loaded microcode, +@@ -261,6 +261,14 @@ locations. The default thunk is `retpoline` (generally preferred for Intel + hardware), with the alternatives being `jmp` (a `jmp *%reg` gadget, minimal + overhead), and `lfence` (an `lfence; jmp *%reg` gadget, preferred for AMD). + ++On hardware supporting IBRS, the `ibrs=` option can be used to force or ++prevent Xen using the feature itself. If Xen is not using IBRS itself, ++functionality is still set up so IBRS can be virtualised for guests. ++ ++The `rsb_vmexit=` and `rsb_native=` options can be used to fine tune when the ++RSB gets overwritten. There are individual controls for an entry from HVM ++context, and an entry from a native (PV or Xen) context. ++ + ### xenheap\_megabytes (arm32) + > `= ` + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index cc1c972845..8aefe29968 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -20,6 +20,7 @@ + #include + #include + ++#include + #include + #include + #include +@@ -33,11 +34,15 @@ static enum ind_thunk { + THUNK_LFENCE, + THUNK_JMP, + } opt_thunk __initdata = THUNK_DEFAULT; ++static int8_t __initdata opt_ibrs = -1; ++static bool __initdata opt_rsb_native = true; ++static bool __initdata opt_rsb_vmexit = true; ++uint8_t __read_mostly default_bti_ist_info; + + static int __init parse_bti(const char *s) + { + const char *ss; +- int rc = 0; ++ int val, rc = 0; + + do { + ss = strchr(s, ','); +@@ -57,6 +62,12 @@ static int __init parse_bti(const char *s) + else + rc = -EINVAL; + } ++ else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 ) ++ opt_ibrs = val; ++ else if ( (val = parse_boolean("rsb_native", s, ss)) >= 0 ) ++ opt_rsb_native = val; ++ else if ( (val = parse_boolean("rsb_vmexit", s, ss)) >= 0 ) ++ opt_rsb_vmexit = val; + else + rc = -EINVAL; + +@@ -93,24 +104,84 @@ static void __init print_details(enum ind_thunk thunk) + printk(XENLOG_DEBUG " Compiled-in support: INDIRECT_THUNK\n"); + + printk(XENLOG_INFO +- "BTI mitigations: Thunk %s\n", ++ "BTI mitigations: Thunk %s, Others:%s%s%s\n", + thunk == THUNK_NONE ? "N/A" : + thunk == THUNK_RETPOLINE ? "RETPOLINE" : + thunk == THUNK_LFENCE ? "LFENCE" : +- thunk == THUNK_JMP ? "JMP" : "?"); ++ thunk == THUNK_JMP ? "JMP" : "?", ++ boot_cpu_has(X86_FEATURE_XEN_IBRS_SET) ? " IBRS+" : ++ boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR) ? " IBRS-" : "", ++ boot_cpu_has(X86_FEATURE_RSB_NATIVE) ? " RSB_NATIVE" : "", ++ boot_cpu_has(X86_FEATURE_RSB_VMEXIT) ? " RSB_VMEXIT" : ""); ++} ++ ++/* Calculate whether Retpoline is known-safe on this CPU. */ ++static bool __init retpoline_safe(void) ++{ ++ unsigned int ucode_rev = this_cpu(ucode_cpu_info).cpu_sig.rev; ++ ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) ++ return true; ++ ++ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || ++ boot_cpu_data.x86 != 6 ) ++ return false; ++ ++ switch ( boot_cpu_data.x86_model ) ++ { ++ case 0x17: /* Penryn */ ++ case 0x1d: /* Dunnington */ ++ case 0x1e: /* Nehalem */ ++ case 0x1f: /* Auburndale / Havendale */ ++ case 0x1a: /* Nehalem EP */ ++ case 0x2e: /* Nehalem EX */ ++ case 0x25: /* Westmere */ ++ case 0x2c: /* Westmere EP */ ++ case 0x2f: /* Westmere EX */ ++ case 0x2a: /* SandyBridge */ ++ case 0x2d: /* SandyBridge EP/EX */ ++ case 0x3a: /* IvyBridge */ ++ case 0x3e: /* IvyBridge EP/EX */ ++ case 0x3c: /* Haswell */ ++ case 0x3f: /* Haswell EX/EP */ ++ case 0x45: /* Haswell D */ ++ case 0x46: /* Haswell H */ ++ return true; ++ ++ /* ++ * Broadwell processors are retpoline-safe after specific microcode ++ * versions. ++ */ ++ case 0x3d: /* Broadwell */ ++ return ucode_rev >= 0x28; ++ case 0x47: /* Broadwell H */ ++ return ucode_rev >= 0x1b; ++ case 0x4f: /* Broadwell EP/EX */ ++ return ucode_rev >= 0xb000025; ++ case 0x56: /* Broadwell D */ ++ return false; /* TBD. */ ++ ++ /* ++ * Skylake and later processors are not retpoline-safe. ++ */ ++ default: ++ return false; ++ } + } + + void __init init_speculation_mitigations(void) + { + enum ind_thunk thunk = THUNK_DEFAULT; ++ bool ibrs = false; + + /* + * Has the user specified any custom BTI mitigations? If so, follow their + * instructions exactly and disable all heuristics. + */ +- if ( opt_thunk != THUNK_DEFAULT ) ++ if ( opt_thunk != THUNK_DEFAULT || opt_ibrs != -1 ) + { + thunk = opt_thunk; ++ ibrs = !!opt_ibrs; + } + else + { +@@ -126,7 +197,18 @@ void __init init_speculation_mitigations(void) + */ + if ( cpu_has_lfence_dispatch ) + thunk = THUNK_LFENCE; ++ /* ++ * On Intel hardware, we'd like to use retpoline in preference to ++ * IBRS, but only if it is safe on this hardware. ++ */ ++ else if ( retpoline_safe() ) ++ thunk = THUNK_RETPOLINE; ++ else if ( boot_cpu_has(X86_FEATURE_IBRSB) ) ++ ibrs = true; + } ++ /* Without compiler thunk support, use IBRS if available. */ ++ else if ( boot_cpu_has(X86_FEATURE_IBRSB) ) ++ ibrs = true; + } + + /* +@@ -136,6 +218,13 @@ void __init init_speculation_mitigations(void) + if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) ) + thunk = THUNK_NONE; + ++ /* ++ * If IBRS is in use and thunks are compiled in, there is no point ++ * suffering extra overhead. Switch to the least-overhead thunk. ++ */ ++ if ( ibrs && thunk == THUNK_DEFAULT ) ++ thunk = THUNK_JMP; ++ + /* + * If there are still no thunk preferences, the compiled default is + * actually retpoline, and it is better than nothing. +@@ -149,6 +238,50 @@ void __init init_speculation_mitigations(void) + else if ( thunk == THUNK_JMP ) + setup_force_cpu_cap(X86_FEATURE_IND_THUNK_JMP); + ++ if ( boot_cpu_has(X86_FEATURE_IBRSB) ) ++ { ++ /* ++ * Even if we've chosen to not have IBRS set in Xen context, we still ++ * need the IBRS entry/exit logic to virtualise IBRS support for ++ * guests. ++ */ ++ if ( ibrs ) ++ setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_SET); ++ else ++ setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_CLEAR); ++ ++ default_bti_ist_info |= BTI_IST_WRMSR | ibrs; ++ } ++ ++ /* ++ * PV guests can poison the RSB to any virtual address from which ++ * they can execute a call instruction. This is necessarily outside ++ * of the Xen supervisor mappings. ++ * ++ * With SMEP enabled, the processor won't speculate into user mappings. ++ * Therefore, in this case, we don't need to worry about poisoned entries ++ * from 64bit PV guests. ++ * ++ * 32bit PV guest kernels run in ring 1, so use supervisor mappings. ++ * If a processors speculates to 32bit PV guest kernel mappings, it is ++ * speculating in 64bit supervisor mode, and can leak data. ++ */ ++ if ( opt_rsb_native ) ++ { ++ setup_force_cpu_cap(X86_FEATURE_RSB_NATIVE); ++ default_bti_ist_info |= BTI_IST_RSB; ++ } ++ ++ /* ++ * HVM guests can always poison the RSB to point at Xen supervisor ++ * mappings. ++ */ ++ if ( opt_rsb_vmexit ) ++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); ++ ++ /* (Re)init BSP state now that default_bti_ist_info has been calculated. */ ++ init_shadow_spec_ctrl_state(); ++ + print_details(thunk); + } + +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index c454b02b66..6120e4f561 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -24,12 +24,14 @@ + + void init_speculation_mitigations(void); + ++extern uint8_t default_bti_ist_info; ++ + static inline void init_shadow_spec_ctrl_state(void) + { + struct cpu_info *info = get_cpu_info(); + + info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0; +- info->bti_ist_info = 0; ++ info->bti_ist_info = default_bti_ist_info; + } + + #endif /* !__X86_SPEC_CTRL_H__ */ +-- +2.14.3 + + +From db12743f2d24fc59d5b9cefc15eb3d56cdaf549d Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:53:40 +0100 +Subject: [PATCH 47/49] x86/ctxt: Issue a speculation barrier between vcpu + contexts + +Issuing an IBPB command flushes the Branch Target Buffer, so that any poison +left by one vcpu won't remain when beginning to execute the next. + +The cost of IBPB is substantial, and skipped on transition to idle, as Xen's +idle code is robust already. All transitions into vcpu context are fully +serialising in practice (and under consideration for being retroactively +declared architecturally serialising), so a cunning attacker cannot use SP1 to +try and skip the flush. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: a2ed643ed783020f885035432e9c0919756921d1 +master date: 2018-01-26 14:10:21 +0000 +--- + docs/misc/xen-command-line.markdown | 5 ++++- + xen/arch/x86/domain.c | 29 +++++++++++++++++++++++++++++ + xen/arch/x86/spec_ctrl.c | 10 +++++++++- + xen/include/asm-x86/spec_ctrl.h | 1 + + 4 files changed, 43 insertions(+), 2 deletions(-) + +diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown +index a751a392ac..e9b65ac555 100644 +--- a/docs/misc/xen-command-line.markdown ++++ b/docs/misc/xen-command-line.markdown +@@ -246,7 +246,7 @@ enough. Setting this to a high value may cause boot failure, particularly if + the NMI watchdog is also enabled. + + ### bti (x86) +-> `= List of [ thunk=retpoline|lfence|jmp, ibrs=, rsb_{vmexit,native}= ]` ++> `= List of [ thunk=retpoline|lfence|jmp, ibrs=, ibpb=, rsb_{vmexit,native}= ]` + + Branch Target Injection controls. By default, Xen will pick the most + appropriate BTI mitigations based on compiled in support, loaded microcode, +@@ -265,6 +265,9 @@ On hardware supporting IBRS, the `ibrs=` option can be used to force or + prevent Xen using the feature itself. If Xen is not using IBRS itself, + functionality is still set up so IBRS can be virtualised for guests. + ++On hardware supporting IBPB, the `ibpb=` option can be used to prevent Xen ++from issuing Branch Prediction Barriers on vcpu context switches. ++ + The `rsb_vmexit=` and `rsb_native=` options can be used to fine tune when the + RSB gets overwritten. There are individual controls for an entry from HVM + context, and an entry from a native (PV or Xen) context. +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index aaa2b28413..8e936c8547 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -65,6 +65,7 @@ + #include + #include + #include ++#include + + DEFINE_PER_CPU(struct vcpu *, curr_vcpu); + +@@ -1739,6 +1740,34 @@ void context_switch(struct vcpu *prev, struct vcpu *next) + } + + ctxt_switch_levelling(next); ++ ++ if ( opt_ibpb && !is_idle_domain(nextd) ) ++ { ++ static DEFINE_PER_CPU(unsigned int, last); ++ unsigned int *last_id = &this_cpu(last); ++ ++ /* ++ * Squash the domid and vcpu id together for comparison ++ * efficiency. We could in principle stash and compare the struct ++ * vcpu pointer, but this risks a false alias if a domain has died ++ * and the same 4k page gets reused for a new vcpu. ++ */ ++ unsigned int next_id = (((unsigned int)nextd->domain_id << 16) | ++ (uint16_t)next->vcpu_id); ++ BUILD_BUG_ON(MAX_VIRT_CPUS > 0xffff); ++ ++ /* ++ * When scheduling from a vcpu, to idle, and back to the same vcpu ++ * (which might be common in a lightly loaded system, or when ++ * using vcpu pinning), there is no need to issue IBPB, as we are ++ * returning to the same security context. ++ */ ++ if ( *last_id != next_id ) ++ { ++ wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB); ++ *last_id = next_id; ++ } ++ } + } + + context_saved(prev); +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 8aefe29968..8ad992a700 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -37,6 +37,7 @@ static enum ind_thunk { + static int8_t __initdata opt_ibrs = -1; + static bool __initdata opt_rsb_native = true; + static bool __initdata opt_rsb_vmexit = true; ++bool __read_mostly opt_ibpb = true; + uint8_t __read_mostly default_bti_ist_info; + + static int __init parse_bti(const char *s) +@@ -64,6 +65,8 @@ static int __init parse_bti(const char *s) + } + else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 ) + opt_ibrs = val; ++ else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) ++ opt_ibpb = val; + else if ( (val = parse_boolean("rsb_native", s, ss)) >= 0 ) + opt_rsb_native = val; + else if ( (val = parse_boolean("rsb_vmexit", s, ss)) >= 0 ) +@@ -104,13 +107,14 @@ static void __init print_details(enum ind_thunk thunk) + printk(XENLOG_DEBUG " Compiled-in support: INDIRECT_THUNK\n"); + + printk(XENLOG_INFO +- "BTI mitigations: Thunk %s, Others:%s%s%s\n", ++ "BTI mitigations: Thunk %s, Others:%s%s%s%s\n", + thunk == THUNK_NONE ? "N/A" : + thunk == THUNK_RETPOLINE ? "RETPOLINE" : + thunk == THUNK_LFENCE ? "LFENCE" : + thunk == THUNK_JMP ? "JMP" : "?", + boot_cpu_has(X86_FEATURE_XEN_IBRS_SET) ? " IBRS+" : + boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR) ? " IBRS-" : "", ++ opt_ibpb ? " IBPB" : "", + boot_cpu_has(X86_FEATURE_RSB_NATIVE) ? " RSB_NATIVE" : "", + boot_cpu_has(X86_FEATURE_RSB_VMEXIT) ? " RSB_VMEXIT" : ""); + } +@@ -279,6 +283,10 @@ void __init init_speculation_mitigations(void) + if ( opt_rsb_vmexit ) + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); + ++ /* Check we have hardware IBPB support before using it... */ ++ if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) ) ++ opt_ibpb = false; ++ + /* (Re)init BSP state now that default_bti_ist_info has been calculated. */ + init_shadow_spec_ctrl_state(); + +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index 6120e4f561..e328b0f509 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -24,6 +24,7 @@ + + void init_speculation_mitigations(void); + ++extern bool opt_ibpb; + extern uint8_t default_bti_ist_info; + + static inline void init_shadow_spec_ctrl_state(void) +-- +2.14.3 + + +From 5644514050b9ae7d75cdd95fd07912b9930cae08 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:54:12 +0100 +Subject: [PATCH 48/49] x86/cpuid: Offer Indirect Branch Controls to guests + +With all infrastructure in place, it is now safe to let guests see and use +these features. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +Acked-by: Wei Liu +master commit: 67c6838ddacfa646f9d1ae802bd0f16a935665b8 +master date: 2018-01-26 14:10:21 +0000 +--- + xen/include/public/arch-x86/cpufeatureset.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 0f21fed161..fa81af14b7 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -237,13 +237,13 @@ XEN_CPUFEATURE(EFRO, 7*32+10) /* APERF/MPERF Read Only interface */ + + /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */ + XEN_CPUFEATURE(CLZERO, 8*32+ 0) /*A CLZERO instruction */ +-XEN_CPUFEATURE(IBPB, 8*32+12) /* IBPB support only (no IBRS, used by AMD) */ ++XEN_CPUFEATURE(IBPB, 8*32+12) /*A IBPB support only (no IBRS, used by AMD) */ + + /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */ + XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ + XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ +-XEN_CPUFEATURE(IBRSB, 9*32+26) /* IBRS and IBPB support (used by Intel) */ +-XEN_CPUFEATURE(STIBP, 9*32+27) /*! STIBP */ ++XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ ++XEN_CPUFEATURE(STIBP, 9*32+27) /*A! STIBP */ + + #endif /* XEN_CPUFEATURE */ + +-- +2.14.3 + + +From 3181472a5ca45ae5e77abbcf024d025d9ba79ced Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Feb 2018 11:54:52 +0100 +Subject: [PATCH 49/49] x86/idle: Clear SPEC_CTRL while idle + +On contemporary hardware, setting IBRS/STIBP has a performance impact on +adjacent hyperthreads. It is therefore recommended to clear the setting +before becoming idle, to avoid an idle core preventing adjacent userspace +execution from running at full performance. + +Care must be taken to ensure there are no ret or indirect branch instructions +between spec_ctrl_{enter,exit}_idle() invocations, which are forced always +inline. Care must also be taken to avoid using spec_ctrl_enter_idle() between +flushing caches and becoming idle, in cases where that matters. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: 4c7e478d597b0346eef3a256cfd6794ac778b608 +master date: 2018-01-26 14:10:21 +0000 +--- + xen/arch/x86/acpi/cpu_idle.c | 21 +++++++++++++++++++++ + xen/arch/x86/cpu/mwait-idle.c | 7 +++++++ + xen/arch/x86/domain.c | 8 ++++++++ + xen/include/asm-x86/spec_ctrl.h | 34 ++++++++++++++++++++++++++++++++++ + 4 files changed, 70 insertions(+) + +diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c +index b605a87083..5feda704c6 100644 +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -55,6 +55,7 @@ + #include + #include + #include ++#include + + /*#define DEBUG_PM_CX*/ + +@@ -417,8 +418,14 @@ void mwait_idle_with_hints(unsigned int eax, unsigned int ecx) + */ + if ( (expires > NOW() || expires == 0) && !softirq_pending(cpu) ) + { ++ struct cpu_info *info = get_cpu_info(); ++ + cpumask_set_cpu(cpu, &cpuidle_mwait_flags); ++ ++ spec_ctrl_enter_idle(info); + __mwait(eax, ecx); ++ spec_ctrl_exit_idle(info); ++ + cpumask_clear_cpu(cpu, &cpuidle_mwait_flags); + } + +@@ -433,6 +440,8 @@ static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) + + static void acpi_idle_do_entry(struct acpi_processor_cx *cx) + { ++ struct cpu_info *info = get_cpu_info(); ++ + switch ( cx->entry_method ) + { + case ACPI_CSTATE_EM_FFH: +@@ -440,15 +449,19 @@ static void acpi_idle_do_entry(struct acpi_processor_cx *cx) + acpi_processor_ffh_cstate_enter(cx); + return; + case ACPI_CSTATE_EM_SYSIO: ++ spec_ctrl_enter_idle(info); + /* IO port based C-state */ + inb(cx->address); + /* Dummy wait op - must do something useless after P_LVL2 read + because chipsets cannot guarantee that STPCLK# signal + gets asserted in time to freeze execution properly. */ + inl(pmtmr_ioport); ++ spec_ctrl_exit_idle(info); + return; + case ACPI_CSTATE_EM_HALT: ++ spec_ctrl_enter_idle(info); + safe_halt(); ++ spec_ctrl_exit_idle(info); + local_irq_disable(); + return; + } +@@ -576,7 +589,13 @@ static void acpi_processor_idle(void) + if ( pm_idle_save ) + pm_idle_save(); + else ++ { ++ struct cpu_info *info = get_cpu_info(); ++ ++ spec_ctrl_enter_idle(info); + safe_halt(); ++ spec_ctrl_exit_idle(info); ++ } + return; + } + +@@ -755,6 +774,7 @@ void acpi_dead_idle(void) + * Otherwise, CPU may still hold dirty data, breaking cache coherency, + * leading to strange errors. + */ ++ spec_ctrl_enter_idle(get_cpu_info()); + wbinvd(); + + while ( 1 ) +@@ -784,6 +804,7 @@ void acpi_dead_idle(void) + u32 address = cx->address; + u32 pmtmr_ioport_local = pmtmr_ioport; + ++ spec_ctrl_enter_idle(get_cpu_info()); + wbinvd(); + + while ( 1 ) +diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c +index 762dff1cba..e357f29208 100644 +--- a/xen/arch/x86/cpu/mwait-idle.c ++++ b/xen/arch/x86/cpu/mwait-idle.c +@@ -58,6 +58,7 @@ + #include + #include + #include ++#include + #include + + #define MWAIT_IDLE_VERSION "0.4.1" +@@ -736,7 +737,13 @@ static void mwait_idle(void) + if (pm_idle_save) + pm_idle_save(); + else ++ { ++ struct cpu_info *info = get_cpu_info(); ++ ++ spec_ctrl_enter_idle(info); + safe_halt(); ++ spec_ctrl_exit_idle(info); ++ } + return; + } + +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index 8e936c8547..7e10a2a07d 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -55,6 +55,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -75,9 +76,15 @@ void (*dead_idle) (void) __read_mostly = default_dead_idle; + + static void default_idle(void) + { ++ struct cpu_info *info = get_cpu_info(); ++ + local_irq_disable(); + if ( cpu_is_haltable(smp_processor_id()) ) ++ { ++ spec_ctrl_enter_idle(info); + safe_halt(); ++ spec_ctrl_exit_idle(info); ++ } + else + local_irq_enable(); + } +@@ -89,6 +96,7 @@ void default_dead_idle(void) + * held by the CPUs spinning here indefinitely, and get discarded by + * a subsequent INIT. + */ ++ spec_ctrl_enter_idle(get_cpu_info()); + wbinvd(); + for ( ; ; ) + halt(); +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index e328b0f509..5ab4ff3f68 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -20,7 +20,9 @@ + #ifndef __X86_SPEC_CTRL_H__ + #define __X86_SPEC_CTRL_H__ + ++#include + #include ++#include + + void init_speculation_mitigations(void); + +@@ -35,6 +37,38 @@ static inline void init_shadow_spec_ctrl_state(void) + info->bti_ist_info = default_bti_ist_info; + } + ++/* WARNING! `ret`, `call *`, `jmp *` not safe after this call. */ ++static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) ++{ ++ uint32_t val = 0; ++ ++ /* ++ * Latch the new shadow value, then enable shadowing, then update the MSR. ++ * There are no SMP issues here; only local processor ordering concerns. ++ */ ++ info->shadow_spec_ctrl = val; ++ barrier(); ++ info->use_shadow_spec_ctrl = true; ++ barrier(); ++ asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET) ++ :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); ++} ++ ++/* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */ ++static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) ++{ ++ uint32_t val = SPEC_CTRL_IBRS; ++ ++ /* ++ * Disable shadowing before updating the MSR. There are no SMP issues ++ * here; only local processor ordering concerns. ++ */ ++ info->use_shadow_spec_ctrl = false; ++ barrier(); ++ asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET) ++ :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); ++} ++ + #endif /* !__X86_SPEC_CTRL_H__ */ + + /* +-- +2.14.3 + diff --git a/xen.xsa254.pti.patch b/xen.xsa254.pti.patch index 5295737..611a881 100644 --- a/xen.xsa254.pti.patch +++ b/xen.xsa254.pti.patch @@ -1375,3 +1375,36 @@ index fc38874b1f..a8825c89df 100644 -- 2.14.3 +From 05eba93a0a344ec189e71722bd542cdc7949a8a5 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Thu, 8 Feb 2018 11:45:19 +0100 +Subject: [PATCH] x86: fix GET_STACK_END + +AIUI the purpose of having the .if directive is to make GET_STACK_END +work with any general purpose registers. The code as-is would produce +the wrong result for r8. Fix it. + +Signed-off-by: Wei Liu +Acked-by: Andrew Cooper +master commit: 8155476765a5bdecea1534b46562cf28e0113a9a +master date: 2018-01-25 11:34:17 +0000 +--- + xen/include/asm-x86/asm_defns.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h +index 9cfd79f..61b6d35 100644 +--- a/xen/include/asm-x86/asm_defns.h ++++ b/xen/include/asm-x86/asm_defns.h +@@ -120,7 +120,7 @@ void ret_from_intr(void); + + #define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field) + #define GET_STACK_END(reg) \ +- .if .Lr##reg > 8; \ ++ .if .Lr##reg >= 8; \ + movq $STACK_SIZE-1, %r##reg; \ + .else; \ + movl $STACK_SIZE-1, %e##reg; \ +-- +2.1.4 + diff --git a/xsa252.patch b/xsa252.patch new file mode 100644 index 0000000..8615928 --- /dev/null +++ b/xsa252.patch @@ -0,0 +1,27 @@ +From: Jan Beulich +Subject: memory: don't implicitly unpin for decrease-reservation + +It very likely was a mistake (copy-and-paste from domain cleanup code) +to implicitly unpin here: The caller should really unpin itself before +(or after, if they so wish) requesting the page to be removed. + +This is XSA-252. + +Reported-by: Jann Horn +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/xen/common/memory.c ++++ b/xen/common/memory.c +@@ -357,11 +357,6 @@ int guest_remove_page(struct domain *d, + + rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0); + +-#ifdef _PGT_pinned +- if ( !rc && test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) +- put_page_and_type(page); +-#endif +- + /* + * With the lack of an IOMMU on some platforms, domains with DMA-capable + * device must retrieve the same pfn when the hypercall populate_physmap diff --git a/xsa255-1.patch b/xsa255-1.patch new file mode 100644 index 0000000..f8bba9e --- /dev/null +++ b/xsa255-1.patch @@ -0,0 +1,133 @@ +From: Jan Beulich +Subject: gnttab/ARM: don't corrupt shared GFN array + +... by writing status GFNs to it. Introduce a second array instead. +Also implement gnttab_status_gmfn() properly now that the information is +suitably being tracked. + +While touching it anyway, remove a misguided (but luckily benign) upper +bound check from gnttab_shared_gmfn(): We should never access beyond the +bounds of that array. + +This is part of XSA-255. + +Signed-off-by: Jan Beulich +Reviewed-by: Stefano Stabellini +Reviewed-by: Andrew Cooper +--- +v3: Don't init the ARM GFN arrays to zero anymore, use INVALID_GFN. +v2: New. + +--- a/xen/common/grant_table.c ++++ b/xen/common/grant_table.c +@@ -3775,6 +3775,7 @@ int gnttab_map_frame(struct domain *d, u + { + int rc = 0; + struct grant_table *gt = d->grant_table; ++ bool status = false; + + grant_write_lock(gt); + +@@ -3785,6 +3786,7 @@ int gnttab_map_frame(struct domain *d, u + (idx & XENMAPIDX_grant_table_status) ) + { + idx &= ~XENMAPIDX_grant_table_status; ++ status = true; + if ( idx < nr_status_frames(gt) ) + *mfn = _mfn(virt_to_mfn(gt->status[idx])); + else +@@ -3802,7 +3804,7 @@ int gnttab_map_frame(struct domain *d, u + } + + if ( !rc ) +- gnttab_set_frame_gfn(gt, idx, gfn); ++ gnttab_set_frame_gfn(gt, status, idx, gfn); + + grant_write_unlock(gt); + +--- a/xen/include/asm-arm/grant_table.h ++++ b/xen/include/asm-arm/grant_table.h +@@ -9,7 +9,8 @@ + #define INITIAL_NR_GRANT_FRAMES 1U + + struct grant_table_arch { +- gfn_t *gfn; ++ gfn_t *shared_gfn; ++ gfn_t *status_gfn; + }; + + void gnttab_clear_flag(unsigned long nr, uint16_t *addr); +@@ -21,7 +22,6 @@ int replace_grant_host_mapping(unsigned + unsigned long new_gpaddr, unsigned int flags); + void gnttab_mark_dirty(struct domain *d, unsigned long l); + #define gnttab_create_status_page(d, t, i) do {} while (0) +-#define gnttab_status_gmfn(d, t, i) (0) + #define gnttab_release_host_mappings(domain) 1 + static inline int replace_grant_supported(void) + { +@@ -42,19 +42,35 @@ static inline unsigned int gnttab_dom0_m + + #define gnttab_init_arch(gt) \ + ({ \ +- (gt)->arch.gfn = xzalloc_array(gfn_t, (gt)->max_grant_frames); \ +- ( (gt)->arch.gfn ? 0 : -ENOMEM ); \ ++ unsigned int ngf_ = (gt)->max_grant_frames; \ ++ unsigned int nsf_ = grant_to_status_frames(ngf_); \ ++ \ ++ (gt)->arch.shared_gfn = xmalloc_array(gfn_t, ngf_); \ ++ (gt)->arch.status_gfn = xmalloc_array(gfn_t, nsf_); \ ++ if ( (gt)->arch.shared_gfn && (gt)->arch.status_gfn ) \ ++ { \ ++ while ( ngf_-- ) \ ++ (gt)->arch.shared_gfn[ngf_] = INVALID_GFN; \ ++ while ( nsf_-- ) \ ++ (gt)->arch.status_gfn[nsf_] = INVALID_GFN; \ ++ } \ ++ else \ ++ gnttab_destroy_arch(gt); \ ++ (gt)->arch.shared_gfn ? 0 : -ENOMEM; \ + }) + + #define gnttab_destroy_arch(gt) \ + do { \ +- xfree((gt)->arch.gfn); \ +- (gt)->arch.gfn = NULL; \ ++ xfree((gt)->arch.shared_gfn); \ ++ (gt)->arch.shared_gfn = NULL; \ ++ xfree((gt)->arch.status_gfn); \ ++ (gt)->arch.status_gfn = NULL; \ + } while ( 0 ) + +-#define gnttab_set_frame_gfn(gt, idx, gfn) \ ++#define gnttab_set_frame_gfn(gt, st, idx, gfn) \ + do { \ +- (gt)->arch.gfn[idx] = gfn; \ ++ ((st) ? (gt)->arch.status_gfn : (gt)->arch.shared_gfn)[idx] = \ ++ (gfn); \ + } while ( 0 ) + + #define gnttab_create_shared_page(d, t, i) \ +@@ -65,8 +81,10 @@ static inline unsigned int gnttab_dom0_m + } while ( 0 ) + + #define gnttab_shared_gmfn(d, t, i) \ +- ( ((i >= nr_grant_frames(t)) && \ +- (i < (t)->max_grant_frames))? 0 : gfn_x((t)->arch.gfn[i])) ++ gfn_x(((i) >= nr_grant_frames(t)) ? INVALID_GFN : (t)->arch.shared_gfn[i]) ++ ++#define gnttab_status_gmfn(d, t, i) \ ++ gfn_x(((i) >= nr_status_frames(t)) ? INVALID_GFN : (t)->arch.status_gfn[i]) + + #define gnttab_need_iommu_mapping(d) \ + (is_domain_direct_mapped(d) && need_iommu(d)) +--- a/xen/include/asm-x86/grant_table.h ++++ b/xen/include/asm-x86/grant_table.h +@@ -46,7 +46,7 @@ static inline unsigned int gnttab_dom0_m + + #define gnttab_init_arch(gt) 0 + #define gnttab_destroy_arch(gt) do {} while ( 0 ) +-#define gnttab_set_frame_gfn(gt, idx, gfn) do {} while ( 0 ) ++#define gnttab_set_frame_gfn(gt, st, idx, gfn) do {} while ( 0 ) + + #define gnttab_create_shared_page(d, t, i) \ + do { \ diff --git a/xsa255-2.patch b/xsa255-2.patch new file mode 100644 index 0000000..402b6ef --- /dev/null +++ b/xsa255-2.patch @@ -0,0 +1,167 @@ +From: Jan Beulich +Subject: gnttab: don't blindly free status pages upon version change + +There may still be active mappings, which would trigger the respective +BUG_ON(). Split the loop into one dealing with the page attributes and +the second (when the first fully passed) freeing the pages. Return an +error if any pages still have pending references. + +This is part of XSA-255. + +Signed-off-by: Jan Beulich +Reviewed-by: Stefano Stabellini +Reviewed-by: Andrew Cooper +--- +v4: Add gprintk(XENLOG_ERR, ...) to domain_crash() invocations. +v3: Call guest_physmap_remove_page() from gnttab_map_frame(), making the + code unconditional at the same time. Re-base over changes to first + patch. +v2: Also deal with translated guests. + +--- a/xen/common/grant_table.c ++++ b/xen/common/grant_table.c +@@ -1636,23 +1636,74 @@ status_alloc_failed: + return -ENOMEM; + } + +-static void ++static int + gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt) + { +- int i; ++ unsigned int i; + + for ( i = 0; i < nr_status_frames(gt); i++ ) + { + struct page_info *pg = virt_to_page(gt->status[i]); ++ gfn_t gfn = gnttab_get_frame_gfn(gt, true, i); ++ ++ /* ++ * For translated domains, recovering from failure after partial ++ * changes were made is more complicated than it seems worth ++ * implementing at this time. Hence respective error paths below ++ * crash the domain in such a case. ++ */ ++ if ( paging_mode_translate(d) ) ++ { ++ int rc = gfn_eq(gfn, INVALID_GFN) ++ ? 0 ++ : guest_physmap_remove_page(d, gfn, ++ _mfn(page_to_mfn(pg)), 0); ++ ++ if ( rc ) ++ { ++ gprintk(XENLOG_ERR, ++ "Could not remove status frame %u (GFN %#lx) from P2M\n", ++ i, gfn_x(gfn)); ++ domain_crash(d); ++ return rc; ++ } ++ gnttab_set_frame_gfn(gt, true, i, INVALID_GFN); ++ } + + BUG_ON(page_get_owner(pg) != d); + if ( test_and_clear_bit(_PGC_allocated, &pg->count_info) ) + put_page(pg); +- BUG_ON(pg->count_info & ~PGC_xen_heap); ++ ++ if ( pg->count_info & ~PGC_xen_heap ) ++ { ++ if ( paging_mode_translate(d) ) ++ { ++ gprintk(XENLOG_ERR, ++ "Wrong page state %#lx of status frame %u (GFN %#lx)\n", ++ pg->count_info, i, gfn_x(gfn)); ++ domain_crash(d); ++ } ++ else ++ { ++ if ( get_page(pg, d) ) ++ set_bit(_PGC_allocated, &pg->count_info); ++ while ( i-- ) ++ gnttab_create_status_page(d, gt, i); ++ } ++ return -EBUSY; ++ } ++ ++ page_set_owner(pg, NULL); ++ } ++ ++ for ( i = 0; i < nr_status_frames(gt); i++ ) ++ { + free_xenheap_page(gt->status[i]); + gt->status[i] = NULL; + } + gt->nr_status_frames = 0; ++ ++ return 0; + } + + /* +@@ -2962,8 +3013,9 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA + break; + } + +- if ( op.version < 2 && gt->gt_version == 2 ) +- gnttab_unpopulate_status_frames(currd, gt); ++ if ( op.version < 2 && gt->gt_version == 2 && ++ (res = gnttab_unpopulate_status_frames(currd, gt)) != 0 ) ++ goto out_unlock; + + /* Make sure there's no crud left over from the old version. */ + for ( i = 0; i < nr_grant_frames(gt); i++ ) +@@ -3803,6 +3855,11 @@ int gnttab_map_frame(struct domain *d, u + rc = -EINVAL; + } + ++ if ( !rc && paging_mode_translate(d) && ++ !gfn_eq(gnttab_get_frame_gfn(gt, status, idx), INVALID_GFN) ) ++ rc = guest_physmap_remove_page(d, gnttab_get_frame_gfn(gt, status, idx), ++ *mfn, 0); ++ + if ( !rc ) + gnttab_set_frame_gfn(gt, status, idx, gfn); + +--- a/xen/include/asm-arm/grant_table.h ++++ b/xen/include/asm-arm/grant_table.h +@@ -73,6 +73,11 @@ static inline unsigned int gnttab_dom0_m + (gfn); \ + } while ( 0 ) + ++#define gnttab_get_frame_gfn(gt, st, idx) ({ \ ++ _gfn((st) ? gnttab_status_gmfn(NULL, gt, idx) \ ++ : gnttab_shared_gmfn(NULL, gt, idx)); \ ++}) ++ + #define gnttab_create_shared_page(d, t, i) \ + do { \ + share_xen_page_with_guest( \ +--- a/xen/include/asm-x86/grant_table.h ++++ b/xen/include/asm-x86/grant_table.h +@@ -47,6 +47,12 @@ static inline unsigned int gnttab_dom0_m + #define gnttab_init_arch(gt) 0 + #define gnttab_destroy_arch(gt) do {} while ( 0 ) + #define gnttab_set_frame_gfn(gt, st, idx, gfn) do {} while ( 0 ) ++#define gnttab_get_frame_gfn(gt, st, idx) ({ \ ++ unsigned long mfn_ = (st) ? gnttab_status_mfn(gt, idx) \ ++ : gnttab_shared_mfn(gt, idx); \ ++ unsigned long gpfn_ = get_gpfn_from_mfn(mfn_); \ ++ VALID_M2P(gpfn_) ? _gfn(gpfn_) : INVALID_GFN; \ ++}) + + #define gnttab_create_shared_page(d, t, i) \ + do { \ +@@ -63,11 +69,11 @@ static inline unsigned int gnttab_dom0_m + } while ( 0 ) + + +-#define gnttab_shared_mfn(d, t, i) \ ++#define gnttab_shared_mfn(t, i) \ + ((virt_to_maddr((t)->shared_raw[i]) >> PAGE_SHIFT)) + + #define gnttab_shared_gmfn(d, t, i) \ +- (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i))) ++ (mfn_to_gmfn(d, gnttab_shared_mfn(t, i))) + + + #define gnttab_status_mfn(t, i) \ diff --git a/xsa256.patch b/xsa256.patch new file mode 100644 index 0000000..50ff24e --- /dev/null +++ b/xsa256.patch @@ -0,0 +1,40 @@ +From: Andrew Cooper +Subject: x86/hvm: Disallow the creation of HVM domains without Local APIC emulation + +There are multiple problems, not necesserily limited to: + + * Guests which configure event channels via hvmop_set_evtchn_upcall_vector(), + or which hit %cr8 emulation will cause Xen to fall over a NULL vlapic->regs + pointer. + + * On Intel hardware, disabling the TPR_SHADOW execution control without + reenabling CR8_{LOAD,STORE} interception means that the guests %cr8 + accesses interact with the real TPR. Amongst other things, setting the + real TPR to 0xf blocks even IPIs from interrupting this CPU. + + * On hardware which sets up the use of Interrupt Posting, including + IOMMU-Posting, guests run without the appropriate non-root configuration, + which at a minimum will result in dropped interrupts. + +Whether no-LAPIC mode is of any use at all remains to be seen. + +This is XSA-256. + +Reported-by: Ian Jackson +Reviewed-by: Roger Pau Monné +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index f93327b..f65fc12 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -413,7 +413,7 @@ static bool emulation_flags_ok(const struct domain *d, uint32_t emflags) + if ( is_hardware_domain(d) && + emflags != (XEN_X86_EMU_LAPIC|XEN_X86_EMU_IOAPIC) ) + return false; +- if ( !is_hardware_domain(d) && emflags && ++ if ( !is_hardware_domain(d) && + emflags != XEN_X86_EMU_ALL && emflags != XEN_X86_EMU_LAPIC ) + return false; + }