diff --git a/.gitignore b/.gitignore index 8757e3a..d695cf4 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ lwip-1.3.0.tar.gz pciutils-2.2.9.tar.bz2 zlib-1.2.3.tar.gz polarssl-1.1.4-gpl.tgz -/xen-4.10.0.tar.gz +/xen-4.10.1.tar.gz diff --git a/4.10.0-shim-comet-3.patch b/4.10.0-shim-comet-3.patch deleted file mode 100644 index 61f2645..0000000 --- a/4.10.0-shim-comet-3.patch +++ /dev/null @@ -1,10861 +0,0 @@ -From ab7be6ce4ac8cc3f32952d8c9c260412e780e939 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 20 Dec 2017 15:40:58 +0100 -Subject: [PATCH 02/77] xen/pv: Construct d0v0's GDT properly - -c/s cf6d39f8199 "x86/PV: properly populate descriptor tables" changed the GDT -to reference zero_page for intermediate frames between the guest and Xen -frames. - -Because dom0_construct_pv() doesn't call arch_set_info_guest(), some bits of -initialisation are missed, including the pv_destroy_gdt() which initially -fills the references to zero_page. - -In practice, this means there is a window between starting and the first call -to HYPERCALL_set_gdt() were lar/lsl/verr/verw suffer non-architectural -behaviour. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: 08f27f4468eedbeccaac9fdda4ef732247efd74e -master date: 2017-12-01 19:03:26 +0000 ---- - xen/arch/x86/pv/dom0_build.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c -index 44601d08d3..a13412efb9 100644 ---- a/xen/arch/x86/pv/dom0_build.c -+++ b/xen/arch/x86/pv/dom0_build.c -@@ -18,6 +18,7 @@ - #include - #include - #include -+#include - #include - - /* Allow ring-3 access in long mode as guest cannot use ring 1 ... */ -@@ -866,6 +867,13 @@ int __init dom0_construct_pv(struct domain *d, - regs->rsi = vstartinfo_start; - regs->eflags = X86_EFLAGS_IF; - -+ /* -+ * We don't call arch_set_info_guest(), so some initialisation needs doing -+ * by hand: -+ * - Reset the GDT to reference zero_page -+ */ -+ pv_destroy_gdt(v); -+ - if ( test_bit(XENFEAT_supervisor_mode_kernel, parms.f_required) ) - panic("Dom0 requires supervisor-mode execution"); - --- -2.14.3 - - -From 4150501b717e7fde77c9ab4e96dd9916d7345b55 Mon Sep 17 00:00:00 2001 -From: Sergey Dyasli -Date: Wed, 20 Dec 2017 15:41:33 +0100 -Subject: [PATCH 03/77] x86/vvmx: don't enable vmcs shadowing for nested guests - -Running "./xtf_runner vvmx" in L1 Xen under L0 Xen produces the -following result on H/W with VMCS shadowing: - - Test: vmxon - Failure in test_vmxon_in_root_cpl0() - Expected 0x8200000f: VMfailValid(15) VMXON_IN_ROOT - Got 0x82004400: VMfailValid(17408) - Test result: FAILURE - -This happens because SDM allows vmentries with enabled VMCS shadowing -VM-execution control and VMCS link pointer value of ~0ull. But results -of a nested VMREAD are undefined in such cases. - -Fix this by not copying the value of VMCS shadowing control from vmcs01 -to vmcs02. - -Signed-off-by: Sergey Dyasli -Acked-by: Kevin Tian -master commit: 19fdb8e258619aea265af9c183e035e545cbc2d2 -master date: 2017-12-01 19:03:27 +0000 ---- - xen/arch/x86/hvm/vmx/vvmx.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c -index dde02c076b..013d049f8a 100644 ---- a/xen/arch/x86/hvm/vmx/vvmx.c -+++ b/xen/arch/x86/hvm/vmx/vvmx.c -@@ -633,6 +633,7 @@ void nvmx_update_secondary_exec_control(struct vcpu *v, - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; - - host_cntrl &= ~apicv_bit; -+ host_cntrl &= ~SECONDARY_EXEC_ENABLE_VMCS_SHADOWING; - shadow_cntrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL); - - /* No vAPIC-v support, so it shouldn't be set in vmcs12. */ --- -2.14.3 - - -From c8f4f45e04dd782ac5dfdf58866339ac97186324 Mon Sep 17 00:00:00 2001 -From: Daniel Kiper -Date: Wed, 20 Dec 2017 15:42:13 +0100 -Subject: [PATCH 04/77] x86/mb2: avoid Xen image when looking for - module/crashkernel position - -Commit e22e1c4 (x86/EFI: avoid Xen image when looking for module/kexec -position) added relevant check for EFI case. However, since commit -f75a304 (x86: add multiboot2 protocol support for relocatable images) -Multiboot2 compatible bootloaders are able to relocate Xen image too. -So, we have to avoid also Xen image region in such cases. - -Reported-by: Andrew Cooper -Reported-by: Konrad Rzeszutek Wilk -Signed-off-by: Daniel Kiper -Reviewed-by: Jan Beulich -master commit: 9589927e5bf9e123ec42b6e0b0809f153bd92732 -master date: 2017-12-12 14:30:53 +0100 ---- - xen/arch/x86/setup.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index 32bb02e3a5..2e10c6bdf4 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -653,7 +653,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) - module_t *mod = (module_t *)__va(mbi->mods_addr); - unsigned long nr_pages, raw_max_page, modules_headroom, *module_map; - int i, j, e820_warn = 0, bytes = 0; -- bool acpi_boot_table_init_done = false; -+ bool acpi_boot_table_init_done = false, relocated = false; - struct domain *dom0; - struct ns16550_defaults ns16550 = { - .data_bits = 8, -@@ -904,8 +904,10 @@ void __init noreturn __start_xen(unsigned long mbi_p) - mod[i].reserved = 0; - } - -- if ( efi_enabled(EFI_LOADER) ) -+ if ( xen_phys_start ) - { -+ relocated = true; -+ - /* - * This needs to remain in sync with xen_in_range() and the - * respective reserve_e820_ram() invocation below. -@@ -1098,8 +1100,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) - - /* Don't overlap with other modules (or Xen itself). */ - end = consider_modules(s, e, size, mod, -- mbi->mods_count + efi_enabled(EFI_LOADER), -- j); -+ mbi->mods_count + relocated, j); - - if ( highmem_start && end > highmem_start ) - continue; -@@ -1126,7 +1127,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) - { - /* Don't overlap with modules (or Xen itself). */ - e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size), mod, -- mbi->mods_count + efi_enabled(EFI_LOADER), -1); -+ mbi->mods_count + relocated, -1); - if ( s >= e ) - break; - if ( e > kexec_crash_area_limit ) --- -2.14.3 - - -From e2dc7b584f4c7ab6ad7ab543e5cf7ee2e6d1d569 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Wed, 20 Dec 2017 15:42:42 +0100 -Subject: [PATCH 05/77] x86/mm: drop bogus paging mode assertion - -Olaf has observed this assertion to trigger after an aborted migration -of a PV guest: - -(XEN) Xen call trace: -(XEN) [] do_page_fault+0x39f/0x55c -(XEN) [] x86_64/entry.S#handle_exception_saved+0x66/0xa4 -(XEN) [] __copy_to_user_ll+0x22/0x30 -(XEN) [] update_runstate_area+0x19c/0x228 -(XEN) [] domain.c#_update_runstate_area+0x11/0x39 -(XEN) [] context_switch+0x1fd/0xf25 -(XEN) [] schedule.c#schedule+0x303/0x6a8 -(XEN) [] softirq.c#__do_softirq+0x6c/0x95 -(XEN) [] do_softirq+0x13/0x15 -(XEN) [] x86_64/entry.S#process_softirqs+0x21/0x30 - -Release builds work fine, which is a first indication that the assertion -isn't really needed. - -What's worse though - there appears to be a timing window where the -guest runs in shadow mode, but not in log-dirty mode, and that is what -triggers the assertion (the same could, afaict, be achieved by test- -enabling shadow mode on a PV guest). This is because turing off log- -dirty mode is being performed in two steps: First the log-dirty bit gets -cleared (paging_log_dirty_disable() [having paused the domain] -> -sh_disable_log_dirty() -> shadow_one_bit_disable()), followed by -unpausing the domain and only then clearing shadow mode (via -shadow_test_disable(), which pauses the domain a second time). - -Hence besides removing the ASSERT() here (or optionally replacing it by -explicit translate and refcounts mode checks, but this seems rather -pointless now that the three are tied together) I wonder whether either -shadow_one_bit_disable() should turn off shadow mode if no other bit -besides PG_SH_enable remains set (just like shadow_one_bit_enable() -enables it if not already set), or the domain pausing scope should be -extended so that both steps occur without the domain getting a chance to -run in between. - -Reported-by: Olaf Hering -Signed-off-by: Jan Beulich -Reviewed-by: Tim Deegan -Acked-by: Andrew Cooper -master commit: b95f7be32d668fa4b09300892ebe19636ecebe36 -master date: 2017-12-12 16:56:15 +0100 ---- - xen/arch/x86/traps.c | 6 +----- - xen/include/asm-x86/paging.h | 3 --- - 2 files changed, 1 insertion(+), 8 deletions(-) - -diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c -index 86506f3747..642f3cc6d7 100644 ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -1338,12 +1338,8 @@ static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs) - */ - if ( paging_mode_enabled(d) && !paging_mode_external(d) ) - { -- int ret; -+ int ret = paging_fault(addr, regs); - -- /* Logdirty mode is the only expected paging mode for PV guests. */ -- ASSERT(paging_mode_only_log_dirty(d)); -- -- ret = paging_fault(addr, regs); - if ( ret == EXCRET_fault_fixed ) - trace_trap_two_addr(TRC_PV_PAGING_FIXUP, regs->rip, addr); - return ret; -diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h -index d99ddedec0..5607ab4b1f 100644 ---- a/xen/include/asm-x86/paging.h -+++ b/xen/include/asm-x86/paging.h -@@ -69,9 +69,6 @@ - #define paging_mode_translate(_d) (!!((_d)->arch.paging.mode & PG_translate)) - #define paging_mode_external(_d) (!!((_d)->arch.paging.mode & PG_external)) - --#define paging_mode_only_log_dirty(_d) \ -- (((_d)->arch.paging.mode & PG_MASK) == PG_log_dirty) -- - /* flags used for paging debug */ - #define PAGING_DEBUG_LOGDIRTY 0 - --- -2.14.3 - - -From e5364c32c650fef60b91b9be9b10f38055ffc2cf Mon Sep 17 00:00:00 2001 -From: Tom Lendacky -Date: Wed, 20 Dec 2017 15:43:14 +0100 -Subject: [PATCH 06/77] x86/microcode: Add support for fam17h microcode loading - -The size for the Microcode Patch Block (MPB) for an AMD family 17h -processor is 3200 bytes. Add a #define for fam17h so that it does -not default to 2048 bytes and fail a microcode load/update. - -Signed-off-by: Tom Lendacky -Signed-off-by: Thomas Gleixner -Reviewed-by: Borislav Petkov -Signed-off-by: Ingo Molnar -[Linux commit f4e9b7af0cd58dd039a0fb2cd67d57cea4889abf] - -Ported to Xen. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -master commit: 61d458ba8c171809e8dd9abd19339c87f3f934ca -master date: 2017-12-13 14:30:10 +0000 ---- - xen/arch/x86/microcode_amd.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/xen/arch/x86/microcode_amd.c b/xen/arch/x86/microcode_amd.c -index b54b0b99e4..53f9f548cd 100644 ---- a/xen/arch/x86/microcode_amd.c -+++ b/xen/arch/x86/microcode_amd.c -@@ -107,6 +107,7 @@ static bool_t verify_patch_size(uint32_t patch_size) - #define F14H_MPB_MAX_SIZE 1824 - #define F15H_MPB_MAX_SIZE 4096 - #define F16H_MPB_MAX_SIZE 3458 -+#define F17H_MPB_MAX_SIZE 3200 - - switch (boot_cpu_data.x86) - { -@@ -119,6 +120,9 @@ static bool_t verify_patch_size(uint32_t patch_size) - case 0x16: - max_size = F16H_MPB_MAX_SIZE; - break; -+ case 0x17: -+ max_size = F17H_MPB_MAX_SIZE; -+ break; - default: - max_size = F1XH_MPB_MAX_SIZE; - break; --- -2.14.3 - - -From 19dcd8e47dfc81b8e9f867ee79c7ff8e15b975fb Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Wed, 20 Dec 2017 15:43:53 +0100 -Subject: [PATCH 07/77] gnttab: correct GNTTABOP_cache_flush empty batch - handling - -Jann validly points out that with a caller bogusly requesting a zero- -element batch with non-zero high command bits (the ones used for -continuation encoding), the assertion right before the call to -hypercall_create_continuation() would trigger. A similar situation would -arise afaict for non-empty batches with op and/or length zero in every -element. - -While we want the former to succeed (as we do elsewhere for similar -no-op requests), the latter can clearly be converted to an error, as -this is a state that can't be the result of a prior operation. - -Take the opportunity and also correct the order of argument checks: -We shouldn't accept zero-length elements with unknown bits set in "op". -Also constify cache_flush()'s first parameter. - -Reported-by: Jann Horn -Signed-off-by: Jan Beulich -Reviewed-by: Andre Przywara -Acked-by: Stefano Stabellini -master commit: 9c22e4d67f5552c7c896ed83bd95d5d4c5837a9d -master date: 2017-12-04 11:03:32 +0100 ---- - xen/common/grant_table.c | 13 +++++++------ - 1 file changed, 7 insertions(+), 6 deletions(-) - -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index c5950f2b3f..bce224be6e 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -3208,7 +3208,7 @@ gnttab_swap_grant_ref(XEN_GUEST_HANDLE_PARAM(gnttab_swap_grant_ref_t) uop, - return 0; - } - --static int cache_flush(gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref) -+static int cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref) - { - struct domain *d, *owner; - struct page_info *page; -@@ -3218,19 +3218,17 @@ static int cache_flush(gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref) - - if ( (cflush->offset >= PAGE_SIZE) || - (cflush->length > PAGE_SIZE) || -- (cflush->offset + cflush->length > PAGE_SIZE) ) -+ (cflush->offset + cflush->length > PAGE_SIZE) || -+ (cflush->op & ~(GNTTAB_CACHE_INVAL | GNTTAB_CACHE_CLEAN)) ) - return -EINVAL; - - if ( cflush->length == 0 || cflush->op == 0 ) -- return 0; -+ return !*cur_ref ? 0 : -EILSEQ; - - /* currently unimplemented */ - if ( cflush->op & GNTTAB_CACHE_SOURCE_GREF ) - return -EOPNOTSUPP; - -- if ( cflush->op & ~(GNTTAB_CACHE_INVAL|GNTTAB_CACHE_CLEAN) ) -- return -EINVAL; -- - d = rcu_lock_current_domain(); - mfn = cflush->a.dev_bus_addr >> PAGE_SHIFT; - -@@ -3310,6 +3308,9 @@ gnttab_cache_flush(XEN_GUEST_HANDLE_PARAM(gnttab_cache_flush_t) uop, - *cur_ref = 0; - guest_handle_add_offset(uop, 1); - } -+ -+ *cur_ref = 0; -+ - return 0; - } - --- -2.14.3 - - -From 682a9d8d37f1141b199bc3aadf8d5d276b22baf9 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Wed, 20 Dec 2017 15:44:20 +0100 -Subject: [PATCH 08/77] gnttab: improve GNTTABOP_cache_flush locking - -Dropping the lock before returning from grant_map_exists() means handing -possibly stale information back to the caller. Return back the pointer -to the active entry instead, for the caller to release the lock once -done. - -Signed-off-by: Jan Beulich -Reviewed-by: Andre Przywara -Reviewed-by: Stefano Stabellini -master commit: 553ac37137c2d1c03bf1b69cfb192ffbfe29daa4 -master date: 2017-12-04 11:04:18 +0100 ---- - xen/common/grant_table.c | 37 +++++++++++++++++-------------------- - 1 file changed, 17 insertions(+), 20 deletions(-) - -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index bce224be6e..250450bdda 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -786,10 +786,10 @@ static int _set_status(unsigned gt_version, - return _set_status_v2(domid, readonly, mapflag, shah, act, status); - } - --static int grant_map_exists(const struct domain *ld, -- struct grant_table *rgt, -- unsigned long mfn, -- grant_ref_t *cur_ref) -+static struct active_grant_entry *grant_map_exists(const struct domain *ld, -+ struct grant_table *rgt, -+ unsigned long mfn, -+ grant_ref_t *cur_ref) - { - grant_ref_t ref, max_iter; - -@@ -805,28 +805,20 @@ static int grant_map_exists(const struct domain *ld, - nr_grant_entries(rgt)); - for ( ref = *cur_ref; ref < max_iter; ref++ ) - { -- struct active_grant_entry *act; -- bool_t exists; -- -- act = active_entry_acquire(rgt, ref); -- -- exists = act->pin -- && act->domid == ld->domain_id -- && act->frame == mfn; -+ struct active_grant_entry *act = active_entry_acquire(rgt, ref); - -+ if ( act->pin && act->domid == ld->domain_id && act->frame == mfn ) -+ return act; - active_entry_release(act); -- -- if ( exists ) -- return 0; - } - - if ( ref < nr_grant_entries(rgt) ) - { - *cur_ref = ref; -- return 1; -+ return NULL; - } - -- return -EINVAL; -+ return ERR_PTR(-EINVAL); - } - - #define MAPKIND_READ 1 -@@ -3213,6 +3205,7 @@ static int cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref) - struct domain *d, *owner; - struct page_info *page; - unsigned long mfn; -+ struct active_grant_entry *act = NULL; - void *v; - int ret; - -@@ -3250,13 +3243,13 @@ static int cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref) - { - grant_read_lock(owner->grant_table); - -- ret = grant_map_exists(d, owner->grant_table, mfn, cur_ref); -- if ( ret != 0 ) -+ act = grant_map_exists(d, owner->grant_table, mfn, cur_ref); -+ if ( IS_ERR_OR_NULL(act) ) - { - grant_read_unlock(owner->grant_table); - rcu_unlock_domain(d); - put_page(page); -- return ret; -+ return act ? PTR_ERR(act) : 1; - } - } - -@@ -3273,7 +3266,11 @@ static int cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref) - ret = 0; - - if ( d != owner ) -+ { -+ active_entry_release(act); - grant_read_unlock(owner->grant_table); -+ } -+ - unmap_domain_page(v); - put_page(page); - --- -2.14.3 - - -From 135b67e9bd5281084efe9fb1d3604915dac07ce8 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 20 Dec 2017 15:44:57 +0100 -Subject: [PATCH 09/77] xen/efi: Fix build with clang-5.0 - -The clang-5.0 build is reliably failing with: - - Error: size of boot.o:.text is 0x01 - -which is because efi_arch_flush_dcache_area() exists as a single ret -instruction. Mark it as __init like everything else in the files. - -Spotted by Travis. - -Signed-off-by: Andrew Cooper -Reviewed-by: Stefano Stabellini -Acked-by: Jan Beulich -master commit: c4f6ad4c5fd25cb0ccc0cdbe711db97e097f0407 -master date: 2017-12-14 10:59:26 +0000 ---- - xen/arch/arm/efi/efi-boot.h | 2 +- - xen/arch/x86/efi/efi-boot.h | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/arm/efi/efi-boot.h b/xen/arch/arm/efi/efi-boot.h -index 56de26e918..ca655ff003 100644 ---- a/xen/arch/arm/efi/efi-boot.h -+++ b/xen/arch/arm/efi/efi-boot.h -@@ -597,7 +597,7 @@ static void __init efi_arch_video_init(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop, - { - } - --static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) -+static void __init efi_arch_flush_dcache_area(const void *vaddr, UINTN size) - { - __flush_dcache_area(vaddr, size); - } -diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h -index 8d295ff9af..d30f688a5a 100644 ---- a/xen/arch/x86/efi/efi-boot.h -+++ b/xen/arch/x86/efi/efi-boot.h -@@ -668,7 +668,7 @@ static bool __init efi_arch_use_config_file(EFI_SYSTEM_TABLE *SystemTable) - return true; /* x86 always uses a config file */ - } - --static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { } -+static void __init efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { } - - void __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) - { --- -2.14.3 - - -From 9dc5eda576bafca47abc7202f075f28d6250bf4d Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 20 Dec 2017 15:45:32 +0100 -Subject: [PATCH 10/77] x86/vmx: Don't use hvm_inject_hw_exception() in - long_mode_do_msr_write() - -Since c/s 49de10f3c1718 "x86/hvm: Don't raise #GP behind the emulators back -for MSR accesses", returning X86EMUL_EXCEPTION has pushed the exception -generation to the top of the call tree. - -Using hvm_inject_hw_exception() and returning X86EMUL_EXCEPTION causes a -double #GP injection, which combines to #DF. - -Signed-off-by: Andrew Cooper -Acked-by: Kevin Tian -Reviewed-by: Jan Beulich -master commit: 896ee3980e72866b602e743396751384de301fb0 -master date: 2017-12-14 18:05:45 +0000 ---- - xen/arch/x86/hvm/vmx/vmx.c | 11 +++-------- - 1 file changed, 3 insertions(+), 8 deletions(-) - -diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c -index b18cceab55..73254bf5d4 100644 ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -542,7 +542,7 @@ long_mode_do_msr_write(unsigned int msr, uint64_t msr_content) - case MSR_GS_BASE: - case MSR_SHADOW_GS_BASE: - if ( !is_canonical_address(msr_content) ) -- goto uncanonical_address; -+ return HNDL_exception_raised; - - if ( msr == MSR_FS_BASE ) - __vmwrite(GUEST_FS_BASE, msr_content); -@@ -560,14 +560,14 @@ long_mode_do_msr_write(unsigned int msr, uint64_t msr_content) - - case MSR_LSTAR: - if ( !is_canonical_address(msr_content) ) -- goto uncanonical_address; -+ return HNDL_exception_raised; - v->arch.hvm_vmx.lstar = msr_content; - wrmsrl(MSR_LSTAR, msr_content); - break; - - case MSR_CSTAR: - if ( !is_canonical_address(msr_content) ) -- goto uncanonical_address; -+ return HNDL_exception_raised; - v->arch.hvm_vmx.cstar = msr_content; - break; - -@@ -581,11 +581,6 @@ long_mode_do_msr_write(unsigned int msr, uint64_t msr_content) - } - - return HNDL_done; -- -- uncanonical_address: -- HVM_DBG_LOG(DBG_LEVEL_MSR, "Not cano address of msr write %x", msr); -- hvm_inject_hw_exception(TRAP_gp_fault, 0); -- return HNDL_exception_raised; - } - - /* --- -2.14.3 - - -From a87ec4833af47cdd166294f3f4db21231930d65d Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 4 Jan 2018 14:32:01 +0100 -Subject: [PATCH 11/77] x86/msr: Free msr_vcpu_policy during vcpu destruction - -c/s 4187f79dc7 "x86/msr: introduce struct msr_vcpu_policy" introduced a -per-vcpu memory allocation, but failed to free it in the clean vcpu -destruction case. - -This is XSA-253. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: e204e60f77702bf5c884dd37c3f1b01f14e396ae -master date: 2018-01-04 14:27:38 +0100 ---- - xen/arch/x86/domain.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index 735f45c133..b44c95b493 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -382,6 +382,9 @@ void vcpu_destroy(struct vcpu *v) - - vcpu_destroy_fpu(v); - -+ xfree(v->arch.msr); -+ v->arch.msr = NULL; -+ - if ( !is_idle_domain(v->domain) ) - vpmu_destroy(v); - --- -2.14.3 - - -From 69e302e59cfd281449eafb6193476a11a1c286df Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Thu, 11 Jan 2018 17:51:14 +0000 -Subject: [PATCH 12/77] x86/upcall: inject a spurious event after setting - upcall vector -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -In case the vCPU has pending events to inject. This fixes a bug that -happened if the guest mapped the vcpu info area using -VCPUOP_register_vcpu_info without having setup the event channel -upcall, and then setup the upcall vector. - -In this scenario the guest would not receive any upcalls, because the -call to VCPUOP_register_vcpu_info would have marked the vCPU as having -pending events, but the vector could not be injected because it was -not yet setup. - -This has not caused issues so far because all the consumers first -setup the vector callback and then map the vcpu info page, but there's -no limitation that prevents doing it in the inverse order. - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich ---- - xen/arch/x86/hvm/hvm.c | 1 + - xen/arch/x86/hvm/irq.c | 5 +++++ - 2 files changed, 6 insertions(+) - -diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c -index 28bc7e4252..9f7b096072 100644 ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -4069,6 +4069,7 @@ static int hvmop_set_evtchn_upcall_vector( - printk(XENLOG_G_INFO "%pv: upcall vector %02x\n", v, op.vector); - - v->arch.hvm_vcpu.evtchn_upcall_vector = op.vector; -+ hvm_assert_evtchn_irq(v); - return 0; - } - -diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c -index 0077f68a83..f528e2d081 100644 ---- a/xen/arch/x86/hvm/irq.c -+++ b/xen/arch/x86/hvm/irq.c -@@ -385,6 +385,7 @@ void hvm_set_callback_via(struct domain *d, uint64_t via) - struct hvm_irq *hvm_irq = hvm_domain_irq(d); - unsigned int gsi=0, pdev=0, pintx=0; - uint8_t via_type; -+ struct vcpu *v; - - via_type = (uint8_t)MASK_EXTR(via, HVM_PARAM_CALLBACK_IRQ_TYPE_MASK) + 1; - if ( ((via_type == HVMIRQ_callback_gsi) && (via == 0)) || -@@ -447,6 +448,10 @@ void hvm_set_callback_via(struct domain *d, uint64_t via) - - spin_unlock(&d->arch.hvm_domain.irq_lock); - -+ for_each_vcpu ( d, v ) -+ if ( is_vcpu_online(v) ) -+ hvm_assert_evtchn_irq(v); -+ - #ifndef NDEBUG - printk(XENLOG_G_INFO "Dom%u callback via changed to ", d->domain_id); - switch ( via_type ) --- -2.14.3 - - -From caff7f9b59455f1942c96ea7f631e6b0cd9b8e52 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 11 Jan 2018 17:47:57 +0000 -Subject: [PATCH 13/77] x86/svm: Offer CPUID Faulting to AMD HVM guests as well - -CPUID Faulting can be virtulised for HVM guests without hardware support, -meaning it can be offered to SVM guests. - -Signed-off-by: Andrew Cooper ---- - xen/arch/x86/hvm/svm/svm.c | 6 ++++++ - xen/arch/x86/msr.c | 3 ++- - 2 files changed, 8 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c -index b9cf423fd9..8864d82c11 100644 ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -1784,6 +1784,12 @@ static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs) - if ( (inst_len = __get_instruction_length(curr, INSTR_CPUID)) == 0 ) - return; - -+ if ( hvm_check_cpuid_faulting(curr) ) -+ { -+ hvm_inject_hw_exception(TRAP_gp_fault, 0); -+ return; -+ } -+ - guest_cpuid(curr, regs->eax, regs->ecx, &res); - HVMTRACE_5D(CPUID, regs->eax, res.a, res.b, res.c, res.d); - -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index 31983edc54..187f8623a5 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -39,7 +39,8 @@ static void __init calculate_hvm_max_policy(void) - return; - - /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ -- if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) -+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL || -+ boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) - { - dp->plaform_info.available = true; - dp->plaform_info.cpuid_faulting = true; --- -2.14.3 - - -From 5840f40e88fbdcdcf748d0e581dad587ffdde0a1 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 17:47:58 +0000 -Subject: [PATCH 14/77] xen/x86: report domain id on cpuid -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Use the ECX register of the hypervisor leaf 5. The EAX register on -this leaf is a flags field that can be used to notice the presence of -the domain id in ECX. Note that this is only available to HVM guests. - -Signed-off-by: Roger Pau Monné ---- -Changes since v1: - - Use leaf 5 instead. ---- - xen/arch/x86/traps.c | 5 +++++ - xen/include/public/arch-x86/cpuid.h | 2 ++ - 2 files changed, 7 insertions(+) - -diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c -index 642f3cc6d7..348866b8b5 100644 ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -928,6 +928,11 @@ void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf, - /* Indicate presence of vcpu id and set it in ebx */ - res->a |= XEN_HVM_CPUID_VCPU_ID_PRESENT; - res->b = v->vcpu_id; -+ -+ /* Indicate presence of domain id and set it in ecx */ -+ res->a |= XEN_HVM_CPUID_DOMID_PRESENT; -+ res->c = d->domain_id; -+ - break; - - case 5: /* PV-specific parameters */ -diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h -index eb76875d0e..665c4b644d 100644 ---- a/xen/include/public/arch-x86/cpuid.h -+++ b/xen/include/public/arch-x86/cpuid.h -@@ -94,12 +94,14 @@ - * HVM-specific features - * Sub-leaf 0: EAX: Features - * Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag) -+ * Sub-leaf 0: ECX: domain id (iff EAX has XEN_HVM_CPUID_DOMID_PRESENT flag) - */ - #define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */ - #define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Virtualized x2APIC accesses */ - /* Memory mapped from other domains has valid IOMMU entries */ - #define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) - #define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */ -+#define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */ - - /* - * Leaf 6 (0x40000x05) --- -2.14.3 - - -From 40938b5d5696ccdec67b15fb3a49e8a9f1ab1998 Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Thu, 11 Jan 2018 17:47:58 +0000 -Subject: [PATCH 15/77] tools/libxc: remove extraneous newline in - xc_dom_load_acpi - -Signed-off-by: Wei Liu -Reviewed-by: Andrew Cooper ---- - tools/libxc/xc_dom_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c -index b5f316a1dc..303cb971e8 100644 ---- a/tools/libxc/xc_dom_core.c -+++ b/tools/libxc/xc_dom_core.c -@@ -1078,7 +1078,7 @@ static int xc_dom_load_acpi(struct xc_dom_image *dom) - - while ( (i < MAX_ACPI_MODULES) && dom->acpi_modules[i].length ) - { -- DOMPRINTF("%s: %d bytes at address %" PRIx64 "\n", __FUNCTION__, -+ DOMPRINTF("%s: %d bytes at address %" PRIx64, __FUNCTION__, - dom->acpi_modules[i].length, - dom->acpi_modules[i].guest_addr_out); - --- -2.14.3 - - -From 4621c10f489de827742f95c31ac0f43fc3bcde88 Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Thu, 11 Jan 2018 17:47:58 +0000 -Subject: [PATCH 16/77] tools/libelf: fix elf notes check for PVH guest - -PVH only requires PHYS32_ENTRY to be set. Return immediately if that's -the case. - -Also remove the printk in pvh_load_kernel. - -Signed-off-by: Wei Liu -Reviewed-by: Andrew Cooper ---- - xen/arch/x86/hvm/dom0_build.c | 4 ---- - xen/common/libelf/libelf-dominfo.c | 9 ++++++++- - 2 files changed, 8 insertions(+), 5 deletions(-) - -diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c -index a67071c739..303ae4e7b5 100644 ---- a/xen/arch/x86/hvm/dom0_build.c -+++ b/xen/arch/x86/hvm/dom0_build.c -@@ -484,10 +484,6 @@ static int __init pvh_load_kernel(struct domain *d, const module_t *image, - return -EINVAL; - } - -- printk("OS: %s version: %s loader: %s bitness: %s\n", parms.guest_os, -- parms.guest_ver, parms.loader, -- elf_64bit(&elf) ? "64-bit" : "32-bit"); -- - /* Copy the OS image and free temporary buffer. */ - elf.dest_base = (void *)(parms.virt_kstart - parms.virt_base); - elf.dest_size = parms.virt_kend - parms.virt_kstart; -diff --git a/xen/common/libelf/libelf-dominfo.c b/xen/common/libelf/libelf-dominfo.c -index a52900c00c..378bc05f39 100644 ---- a/xen/common/libelf/libelf-dominfo.c -+++ b/xen/common/libelf/libelf-dominfo.c -@@ -373,6 +373,13 @@ static elf_errorstatus elf_xen_note_check(struct elf_binary *elf, - return 0; - } - -+ /* PVH only requires one ELF note to be set */ -+ if ( parms->phys_entry != UNSET_ADDR32 ) -+ { -+ elf_msg(elf, "ELF: Found PVH image\n"); -+ return 0; -+ } -+ - /* Check the contents of the Xen notes or guest string. */ - if ( ((strlen(parms->loader) == 0) || - strncmp(parms->loader, "generic", 7)) && -@@ -381,7 +388,7 @@ static elf_errorstatus elf_xen_note_check(struct elf_binary *elf, - { - elf_err(elf, - "ERROR: Will only load images built for the generic loader or Linux images" -- " (Not '%.*s' and '%.*s')\n", -+ " (Not '%.*s' and '%.*s') or with PHYS32_ENTRY set\n", - (int)sizeof(parms->loader), parms->loader, - (int)sizeof(parms->guest_os), parms->guest_os); - return -1; --- -2.14.3 - - -From 667275050d83fdca61303b09d9c2448f0badf5a9 Mon Sep 17 00:00:00 2001 -From: Jonathan Ludlam -Date: Thu, 11 Jan 2018 17:47:58 +0000 -Subject: [PATCH 17/77] tools/libxc: Multi modules support - -Signed-off-by: Jonathan Ludlam -Signed-off-by: Sergey Dyasli -Signed-off-by: Andrew Cooper -Signed-off-by: Wei Liu ---- - stubdom/grub/kexec.c | 7 +- - tools/helpers/init-xenstore-domain.c | 4 +- - tools/libxc/include/xc_dom.h | 48 ++++++----- - tools/libxc/xc_dom_compat_linux.c | 2 +- - tools/libxc/xc_dom_core.c | 152 +++++++++++++++++++++++------------ - tools/libxc/xc_dom_x86.c | 65 ++++++++------- - tools/libxl/libxl_dom.c | 10 +-- - 7 files changed, 175 insertions(+), 113 deletions(-) - -diff --git a/stubdom/grub/kexec.c b/stubdom/grub/kexec.c -index 437a0a96e9..61ca082d42 100644 ---- a/stubdom/grub/kexec.c -+++ b/stubdom/grub/kexec.c -@@ -202,7 +202,7 @@ static void tpm_hash2pcr(struct xc_dom_image *dom, char *cmdline) - ASSERT(rv == 0 && resp->status == 0); - - cmd.pcr = bswap_32(5); // PCR #5 for initrd -- sha1(dom->ramdisk_blob, dom->ramdisk_size, cmd.hash); -+ sha1(dom->modules[0].blob, dom->modules[0].size, cmd.hash); - rv = tpmfront_cmd(tpm, (void*)&cmd, sizeof(cmd), (void*)&resp, &resplen); - ASSERT(rv == 0 && resp->status == 0); - -@@ -231,13 +231,12 @@ void kexec(void *kernel, long kernel_size, void *module, long module_size, char - - /* We are using guest owned memory, therefore no limits. */ - xc_dom_kernel_max_size(dom, 0); -- xc_dom_ramdisk_max_size(dom, 0); -+ xc_dom_module_max_size(dom, 0); - - dom->kernel_blob = kernel; - dom->kernel_size = kernel_size; - -- dom->ramdisk_blob = module; -- dom->ramdisk_size = module_size; -+ xc_dom_module_mem(dom, module, module_size, NULL); - - dom->flags = flags; - dom->console_evtchn = start_info.console.domU.evtchn; -diff --git a/tools/helpers/init-xenstore-domain.c b/tools/helpers/init-xenstore-domain.c -index 047ad0cb1d..8453be283b 100644 ---- a/tools/helpers/init-xenstore-domain.c -+++ b/tools/helpers/init-xenstore-domain.c -@@ -145,10 +145,10 @@ static int build(xc_interface *xch) - - if ( ramdisk ) - { -- rv = xc_dom_ramdisk_file(dom, ramdisk); -+ rv = xc_dom_module_file(dom, ramdisk, NULL); - if ( rv ) - { -- fprintf(stderr, "xc_dom_ramdisk_file failed\n"); -+ fprintf(stderr, "xc_dom_module_file failed\n"); - goto err; - } - } -diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h -index cdcdd07d2b..08be8a8f3f 100644 ---- a/tools/libxc/include/xc_dom.h -+++ b/tools/libxc/include/xc_dom.h -@@ -22,6 +22,7 @@ - #define INVALID_PFN ((xen_pfn_t)-1) - #define X86_HVM_NR_SPECIAL_PAGES 8 - #define X86_HVM_END_SPECIAL_REGION 0xff000u -+#define XG_MAX_MODULES 2 - - /* --- typedefs and structs ---------------------------------------- */ - -@@ -56,17 +57,32 @@ struct xc_dom_phys { - xen_pfn_t count; - }; - -+struct xc_dom_module { -+ void *blob; -+ size_t size; -+ void *cmdline; -+ /* If seg.vstart is non zero then the module will be loaded at that -+ * address, otherwise it will automatically placed. -+ * -+ * If automatic placement is used and the module is gzip -+ * compressed then it will be decompressed as it is loaded. If the -+ * module has been explicitly placed then it is loaded as is -+ * otherwise decompressing risks undoing the manual placement. -+ */ -+ struct xc_dom_seg seg; -+}; -+ - struct xc_dom_image { - /* files */ - void *kernel_blob; - size_t kernel_size; -- void *ramdisk_blob; -- size_t ramdisk_size; -+ unsigned int num_modules; -+ struct xc_dom_module modules[XG_MAX_MODULES]; - void *devicetree_blob; - size_t devicetree_size; - - size_t max_kernel_size; -- size_t max_ramdisk_size; -+ size_t max_module_size; - size_t max_devicetree_size; - - /* arguments and parameters */ -@@ -80,15 +96,6 @@ struct xc_dom_image { - - /* memory layout */ - struct xc_dom_seg kernel_seg; -- /* If ramdisk_seg.vstart is non zero then the ramdisk will be -- * loaded at that address, otherwise it will automatically placed. -- * -- * If automatic placement is used and the ramdisk is gzip -- * compressed then it will be decompressed as it is loaded. If the -- * ramdisk has been explicitly placed then it is loaded as is -- * otherwise decompressing risks undoing the manual placement. -- */ -- struct xc_dom_seg ramdisk_seg; - struct xc_dom_seg p2m_seg; - struct xc_dom_seg pgtables_seg; - struct xc_dom_seg devicetree_seg; -@@ -277,12 +284,12 @@ void xc_dom_release(struct xc_dom_image *dom); - int xc_dom_rambase_init(struct xc_dom_image *dom, uint64_t rambase); - int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb); - --/* Set this larger if you have enormous ramdisks/kernels. Note that -+/* Set this larger if you have enormous modules/kernels. Note that - * you should trust all kernels not to be maliciously large (e.g. to - * exhaust all dom0 memory) if you do this (see CVE-2012-4544 / - * XSA-25). You can also set the default independently for -- * ramdisks/kernels in xc_dom_allocate() or call -- * xc_dom_{kernel,ramdisk}_max_size. -+ * modules/kernels in xc_dom_allocate() or call -+ * xc_dom_{kernel,module}_max_size. - */ - #ifndef XC_DOM_DECOMPRESS_MAX - #define XC_DOM_DECOMPRESS_MAX (1024*1024*1024) /* 1GB */ -@@ -291,8 +298,8 @@ int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb); - int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz); - int xc_dom_kernel_max_size(struct xc_dom_image *dom, size_t sz); - --int xc_dom_ramdisk_check_size(struct xc_dom_image *dom, size_t sz); --int xc_dom_ramdisk_max_size(struct xc_dom_image *dom, size_t sz); -+int xc_dom_module_check_size(struct xc_dom_image *dom, size_t sz); -+int xc_dom_module_max_size(struct xc_dom_image *dom, size_t sz); - - int xc_dom_devicetree_max_size(struct xc_dom_image *dom, size_t sz); - -@@ -303,11 +310,12 @@ int xc_dom_do_gunzip(xc_interface *xch, - int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size); - - int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename); --int xc_dom_ramdisk_file(struct xc_dom_image *dom, const char *filename); -+int xc_dom_module_file(struct xc_dom_image *dom, const char *filename, -+ const char *cmdline); - int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem, - size_t memsize); --int xc_dom_ramdisk_mem(struct xc_dom_image *dom, const void *mem, -- size_t memsize); -+int xc_dom_module_mem(struct xc_dom_image *dom, const void *mem, -+ size_t memsize, const char *cmdline); - int xc_dom_devicetree_file(struct xc_dom_image *dom, const char *filename); - int xc_dom_devicetree_mem(struct xc_dom_image *dom, const void *mem, - size_t memsize); -diff --git a/tools/libxc/xc_dom_compat_linux.c b/tools/libxc/xc_dom_compat_linux.c -index c922c61e90..b3d43feed9 100644 ---- a/tools/libxc/xc_dom_compat_linux.c -+++ b/tools/libxc/xc_dom_compat_linux.c -@@ -56,7 +56,7 @@ int xc_linux_build(xc_interface *xch, uint32_t domid, - if ( (rc = xc_dom_kernel_file(dom, image_name)) != 0 ) - goto out; - if ( initrd_name && strlen(initrd_name) && -- ((rc = xc_dom_ramdisk_file(dom, initrd_name)) != 0) ) -+ ((rc = xc_dom_module_file(dom, initrd_name, NULL)) != 0) ) - goto out; - - dom->flags |= flags; -diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c -index 303cb971e8..3e65aff22b 100644 ---- a/tools/libxc/xc_dom_core.c -+++ b/tools/libxc/xc_dom_core.c -@@ -314,16 +314,16 @@ int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz) - return 0; - } - --int xc_dom_ramdisk_check_size(struct xc_dom_image *dom, size_t sz) -+int xc_dom_module_check_size(struct xc_dom_image *dom, size_t sz) - { - /* No limit */ -- if ( !dom->max_ramdisk_size ) -+ if ( !dom->max_module_size ) - return 0; - -- if ( sz > dom->max_ramdisk_size ) -+ if ( sz > dom->max_module_size ) - { - xc_dom_panic(dom->xch, XC_INVALID_KERNEL, -- "ramdisk image too large"); -+ "module image too large"); - return 1; - } - -@@ -764,7 +764,7 @@ struct xc_dom_image *xc_dom_allocate(xc_interface *xch, - dom->xch = xch; - - dom->max_kernel_size = XC_DOM_DECOMPRESS_MAX; -- dom->max_ramdisk_size = XC_DOM_DECOMPRESS_MAX; -+ dom->max_module_size = XC_DOM_DECOMPRESS_MAX; - dom->max_devicetree_size = XC_DOM_DECOMPRESS_MAX; - - if ( cmdline ) -@@ -797,10 +797,10 @@ int xc_dom_kernel_max_size(struct xc_dom_image *dom, size_t sz) - return 0; - } - --int xc_dom_ramdisk_max_size(struct xc_dom_image *dom, size_t sz) -+int xc_dom_module_max_size(struct xc_dom_image *dom, size_t sz) - { -- DOMPRINTF("%s: ramdisk_max_size=%zx", __FUNCTION__, sz); -- dom->max_ramdisk_size = sz; -+ DOMPRINTF("%s: module_max_size=%zx", __FUNCTION__, sz); -+ dom->max_module_size = sz; - return 0; - } - -@@ -821,16 +821,30 @@ int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename) - return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size); - } - --int xc_dom_ramdisk_file(struct xc_dom_image *dom, const char *filename) -+int xc_dom_module_file(struct xc_dom_image *dom, const char *filename, const char *cmdline) - { -+ unsigned int mod = dom->num_modules++; -+ - DOMPRINTF("%s: filename=\"%s\"", __FUNCTION__, filename); -- dom->ramdisk_blob = -- xc_dom_malloc_filemap(dom, filename, &dom->ramdisk_size, -- dom->max_ramdisk_size); -+ dom->modules[mod].blob = -+ xc_dom_malloc_filemap(dom, filename, &dom->modules[mod].size, -+ dom->max_module_size); - -- if ( dom->ramdisk_blob == NULL ) -+ if ( dom->modules[mod].blob == NULL ) - return -1; --// return xc_dom_try_gunzip(dom, &dom->ramdisk_blob, &dom->ramdisk_size); -+ -+ if ( cmdline ) -+ { -+ dom->modules[mod].cmdline = xc_dom_strdup(dom, cmdline); -+ -+ if ( dom->modules[mod].cmdline == NULL ) -+ return -1; -+ } -+ else -+ { -+ dom->modules[mod].cmdline = NULL; -+ } -+ - return 0; - } - -@@ -859,13 +873,28 @@ int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem, size_t memsize) - return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size); - } - --int xc_dom_ramdisk_mem(struct xc_dom_image *dom, const void *mem, -- size_t memsize) -+int xc_dom_module_mem(struct xc_dom_image *dom, const void *mem, -+ size_t memsize, const char *cmdline) - { -+ unsigned int mod = dom->num_modules++; -+ - DOMPRINTF_CALLED(dom->xch); -- dom->ramdisk_blob = (void *)mem; -- dom->ramdisk_size = memsize; --// return xc_dom_try_gunzip(dom, &dom->ramdisk_blob, &dom->ramdisk_size); -+ -+ dom->modules[mod].blob = (void *)mem; -+ dom->modules[mod].size = memsize; -+ -+ if ( cmdline ) -+ { -+ dom->modules[mod].cmdline = xc_dom_strdup(dom, cmdline); -+ -+ if ( dom->modules[mod].cmdline == NULL ) -+ return -1; -+ } -+ else -+ { -+ dom->modules[mod].cmdline = NULL; -+ } -+ - return 0; - } - -@@ -990,41 +1019,42 @@ int xc_dom_update_guest_p2m(struct xc_dom_image *dom) - return 0; - } - --static int xc_dom_build_ramdisk(struct xc_dom_image *dom) -+static int xc_dom_build_module(struct xc_dom_image *dom, unsigned int mod) - { -- size_t unziplen, ramdisklen; -- void *ramdiskmap; -+ size_t unziplen, modulelen; -+ void *modulemap; -+ char name[10]; - -- if ( !dom->ramdisk_seg.vstart ) -+ if ( !dom->modules[mod].seg.vstart ) - { - unziplen = xc_dom_check_gzip(dom->xch, -- dom->ramdisk_blob, dom->ramdisk_size); -- if ( xc_dom_ramdisk_check_size(dom, unziplen) != 0 ) -+ dom->modules[mod].blob, dom->modules[mod].size); -+ if ( xc_dom_module_check_size(dom, unziplen) != 0 ) - unziplen = 0; - } - else - unziplen = 0; - -- ramdisklen = unziplen ? unziplen : dom->ramdisk_size; -- -- if ( xc_dom_alloc_segment(dom, &dom->ramdisk_seg, "ramdisk", -- dom->ramdisk_seg.vstart, ramdisklen) != 0 ) -+ modulelen = unziplen ? unziplen : dom->modules[mod].size; -+ snprintf(name, sizeof(name), "module%u", mod); -+ if ( xc_dom_alloc_segment(dom, &dom->modules[mod].seg, name, -+ dom->modules[mod].seg.vstart, modulelen) != 0 ) - goto err; -- ramdiskmap = xc_dom_seg_to_ptr(dom, &dom->ramdisk_seg); -- if ( ramdiskmap == NULL ) -+ modulemap = xc_dom_seg_to_ptr(dom, &dom->modules[mod].seg); -+ if ( modulemap == NULL ) - { -- DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &dom->ramdisk_seg) => NULL", -- __FUNCTION__); -+ DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &dom->modules[%u].seg) => NULL", -+ __FUNCTION__, mod); - goto err; - } - if ( unziplen ) - { -- if ( xc_dom_do_gunzip(dom->xch, dom->ramdisk_blob, dom->ramdisk_size, -- ramdiskmap, ramdisklen) == -1 ) -+ if ( xc_dom_do_gunzip(dom->xch, dom->modules[mod].blob, dom->modules[mod].size, -+ modulemap, modulelen) == -1 ) - goto err; - } - else -- memcpy(ramdiskmap, dom->ramdisk_blob, dom->ramdisk_size); -+ memcpy(modulemap, dom->modules[mod].blob, dom->modules[mod].size); - - return 0; - -@@ -1131,6 +1161,7 @@ int xc_dom_build_image(struct xc_dom_image *dom) - { - unsigned int page_size; - bool unmapped_initrd; -+ unsigned int mod; - - DOMPRINTF_CALLED(dom->xch); - -@@ -1154,15 +1185,24 @@ int xc_dom_build_image(struct xc_dom_image *dom) - if ( dom->kernel_loader->loader(dom) != 0 ) - goto err; - -- /* Don't load ramdisk now if no initial mapping required. */ -- unmapped_initrd = dom->parms.unmapped_initrd && !dom->ramdisk_seg.vstart; -- -- if ( dom->ramdisk_blob && !unmapped_initrd ) -+ /* Don't load ramdisk / other modules now if no initial mapping required. */ -+ for ( mod = 0; mod < dom->num_modules; mod++ ) - { -- if ( xc_dom_build_ramdisk(dom) != 0 ) -- goto err; -- dom->initrd_start = dom->ramdisk_seg.vstart; -- dom->initrd_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart; -+ unmapped_initrd = (dom->parms.unmapped_initrd && -+ !dom->modules[mod].seg.vstart); -+ -+ if ( dom->modules[mod].blob && !unmapped_initrd ) -+ { -+ if ( xc_dom_build_module(dom, mod) != 0 ) -+ goto err; -+ -+ if ( mod == 0 ) -+ { -+ dom->initrd_start = dom->modules[mod].seg.vstart; -+ dom->initrd_len = -+ dom->modules[mod].seg.vend - dom->modules[mod].seg.vstart; -+ } -+ } - } - - /* load devicetree */ -@@ -1216,14 +1256,24 @@ int xc_dom_build_image(struct xc_dom_image *dom) - if ( dom->virt_pgtab_end && xc_dom_alloc_pad(dom, dom->virt_pgtab_end) ) - return -1; - -- /* Load ramdisk if no initial mapping required. */ -- if ( dom->ramdisk_blob && unmapped_initrd ) -+ for ( mod = 0; mod < dom->num_modules; mod++ ) - { -- if ( xc_dom_build_ramdisk(dom) != 0 ) -- goto err; -- dom->flags |= SIF_MOD_START_PFN; -- dom->initrd_start = dom->ramdisk_seg.pfn; -- dom->initrd_len = page_size * dom->ramdisk_seg.pages; -+ unmapped_initrd = (dom->parms.unmapped_initrd && -+ !dom->modules[mod].seg.vstart); -+ -+ /* Load ramdisk / other modules if no initial mapping required. */ -+ if ( dom->modules[mod].blob && unmapped_initrd ) -+ { -+ if ( xc_dom_build_module(dom, mod) != 0 ) -+ goto err; -+ -+ if ( mod == 0 ) -+ { -+ dom->flags |= SIF_MOD_START_PFN; -+ dom->initrd_start = dom->modules[mod].seg.pfn; -+ dom->initrd_len = page_size * dom->modules[mod].seg.pages; -+ } -+ } - } - - /* Allocate p2m list if outside of initial kernel mapping. */ -diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c -index bff68a011f..0b65dab4bc 100644 ---- a/tools/libxc/xc_dom_x86.c -+++ b/tools/libxc/xc_dom_x86.c -@@ -70,8 +70,8 @@ - #define round_up(addr, mask) ((addr) | (mask)) - #define round_pg_up(addr) (((addr) + PAGE_SIZE_X86 - 1) & ~(PAGE_SIZE_X86 - 1)) - --#define HVMLOADER_MODULE_MAX_COUNT 1 --#define HVMLOADER_MODULE_NAME_SIZE 10 -+#define HVMLOADER_MODULE_MAX_COUNT 2 -+#define HVMLOADER_MODULE_CMDLINE_SIZE MAX_GUEST_CMDLINE - - struct xc_dom_params { - unsigned levels; -@@ -627,6 +627,12 @@ static int alloc_magic_pages_hvm(struct xc_dom_image *dom) - xc_hvm_param_set(xch, domid, HVM_PARAM_SHARING_RING_PFN, - special_pfn(SPECIALPAGE_SHARING)); - -+ start_info_size += -+ sizeof(struct hvm_modlist_entry) * HVMLOADER_MODULE_MAX_COUNT; -+ -+ start_info_size += -+ HVMLOADER_MODULE_CMDLINE_SIZE * HVMLOADER_MODULE_MAX_COUNT; -+ - if ( !dom->device_model ) - { - if ( dom->cmdline ) -@@ -634,22 +640,9 @@ static int alloc_magic_pages_hvm(struct xc_dom_image *dom) - dom->cmdline_size = ROUNDUP(strlen(dom->cmdline) + 1, 8); - start_info_size += dom->cmdline_size; - } -- -- /* Limited to one module. */ -- if ( dom->ramdisk_blob ) -- start_info_size += sizeof(struct hvm_modlist_entry); - } - else - { -- start_info_size += -- sizeof(struct hvm_modlist_entry) * HVMLOADER_MODULE_MAX_COUNT; -- /* -- * Add extra space to write modules name. -- * The HVMLOADER_MODULE_NAME_SIZE accounts for NUL byte. -- */ -- start_info_size += -- HVMLOADER_MODULE_NAME_SIZE * HVMLOADER_MODULE_MAX_COUNT; -- - /* - * Allocate and clear additional ioreq server pages. The default - * server will use the IOREQ and BUFIOREQ special pages above. -@@ -749,7 +742,7 @@ static int start_info_x86_32(struct xc_dom_image *dom) - start_info->console.domU.mfn = xc_dom_p2m(dom, dom->console_pfn); - start_info->console.domU.evtchn = dom->console_evtchn; - -- if ( dom->ramdisk_blob ) -+ if ( dom->modules[0].blob ) - { - start_info->mod_start = dom->initrd_start; - start_info->mod_len = dom->initrd_len; -@@ -800,7 +793,7 @@ static int start_info_x86_64(struct xc_dom_image *dom) - start_info->console.domU.mfn = xc_dom_p2m(dom, dom->console_pfn); - start_info->console.domU.evtchn = dom->console_evtchn; - -- if ( dom->ramdisk_blob ) -+ if ( dom->modules[0].blob ) - { - start_info->mod_start = dom->initrd_start; - start_info->mod_len = dom->initrd_len; -@@ -1237,7 +1230,7 @@ static int meminit_hvm(struct xc_dom_image *dom) - unsigned long target_pages = dom->target_pages; - unsigned long cur_pages, cur_pfn; - int rc; -- unsigned long stat_normal_pages = 0, stat_2mb_pages = 0, -+ unsigned long stat_normal_pages = 0, stat_2mb_pages = 0, - stat_1gb_pages = 0; - unsigned int memflags = 0; - int claim_enabled = dom->claim_enabled; -@@ -1303,6 +1296,8 @@ static int meminit_hvm(struct xc_dom_image *dom) - p2m_size = 0; - for ( i = 0; i < nr_vmemranges; i++ ) - { -+ DOMPRINTF("range: start=0x%"PRIx64" end=0x%"PRIx64, vmemranges[i].start, vmemranges[i].end); -+ - total_pages += ((vmemranges[i].end - vmemranges[i].start) - >> PAGE_SHIFT); - p2m_size = p2m_size > (vmemranges[i].end >> PAGE_SHIFT) ? -@@ -1633,7 +1628,7 @@ static int alloc_pgtables_hvm(struct xc_dom_image *dom) - */ - static void add_module_to_list(struct xc_dom_image *dom, - struct xc_hvm_firmware_module *module, -- const char *name, -+ const char *cmdline, - struct hvm_modlist_entry *modlist, - struct hvm_start_info *start_info) - { -@@ -1648,16 +1643,20 @@ static void add_module_to_list(struct xc_dom_image *dom, - return; - - assert(start_info->nr_modules < HVMLOADER_MODULE_MAX_COUNT); -- assert(strnlen(name, HVMLOADER_MODULE_NAME_SIZE) -- < HVMLOADER_MODULE_NAME_SIZE); - - modlist[index].paddr = module->guest_addr_out; - modlist[index].size = module->length; - -- strncpy(modules_cmdline_start + HVMLOADER_MODULE_NAME_SIZE * index, -- name, HVMLOADER_MODULE_NAME_SIZE); -+ if ( cmdline ) -+ { -+ assert(strnlen(cmdline, HVMLOADER_MODULE_CMDLINE_SIZE) -+ < HVMLOADER_MODULE_CMDLINE_SIZE); -+ strncpy(modules_cmdline_start + HVMLOADER_MODULE_CMDLINE_SIZE * index, -+ cmdline, HVMLOADER_MODULE_CMDLINE_SIZE); -+ } -+ - modlist[index].cmdline_paddr = -- modules_cmdline_paddr + HVMLOADER_MODULE_NAME_SIZE * index; -+ modules_cmdline_paddr + HVMLOADER_MODULE_CMDLINE_SIZE * index; - - start_info->nr_modules++; - } -@@ -1669,10 +1668,10 @@ static int bootlate_hvm(struct xc_dom_image *dom) - struct hvm_start_info *start_info; - size_t start_info_size; - struct hvm_modlist_entry *modlist; -+ unsigned int i; - - start_info_size = sizeof(*start_info) + dom->cmdline_size; -- if ( dom->ramdisk_blob ) -- start_info_size += sizeof(struct hvm_modlist_entry); -+ start_info_size += sizeof(struct hvm_modlist_entry) * dom->num_modules; - - if ( start_info_size > - dom->start_info_seg.pages << XC_DOM_PAGE_SHIFT(dom) ) -@@ -1703,12 +1702,18 @@ static int bootlate_hvm(struct xc_dom_image *dom) - ((uintptr_t)cmdline - (uintptr_t)start_info); - } - -- if ( dom->ramdisk_blob ) -+ for ( i = 0; i < dom->num_modules; i++ ) - { -+ struct xc_hvm_firmware_module mod; -+ -+ DOMPRINTF("Adding module %u", i); -+ mod.guest_addr_out = -+ dom->modules[i].seg.vstart - dom->parms.virt_base; -+ mod.length = -+ dom->modules[i].seg.vend - dom->modules[i].seg.vstart; - -- modlist[0].paddr = dom->ramdisk_seg.vstart - dom->parms.virt_base; -- modlist[0].size = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart; -- start_info->nr_modules = 1; -+ add_module_to_list(dom, &mod, dom->modules[i].cmdline, -+ modlist, start_info); - } - - /* ACPI module 0 is the RSDP */ -diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c -index ef834e652d..fbbdb9ec2f 100644 ---- a/tools/libxl/libxl_dom.c -+++ b/tools/libxl/libxl_dom.c -@@ -796,12 +796,12 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid, - - if ( state->pv_ramdisk.path && strlen(state->pv_ramdisk.path) ) { - if (state->pv_ramdisk.mapped) { -- if ( (ret = xc_dom_ramdisk_mem(dom, state->pv_ramdisk.data, state->pv_ramdisk.size)) != 0 ) { -+ if ( (ret = xc_dom_module_mem(dom, state->pv_ramdisk.data, state->pv_ramdisk.size, NULL)) != 0 ) { - LOGE(ERROR, "xc_dom_ramdisk_mem failed"); - goto out; - } - } else { -- if ( (ret = xc_dom_ramdisk_file(dom, state->pv_ramdisk.path)) != 0 ) { -+ if ( (ret = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL)) != 0 ) { - LOGE(ERROR, "xc_dom_ramdisk_file failed"); - goto out; - } -@@ -1043,14 +1043,14 @@ static int libxl__domain_firmware(libxl__gc *gc, - - if (state->pv_ramdisk.path && strlen(state->pv_ramdisk.path)) { - if (state->pv_ramdisk.mapped) { -- rc = xc_dom_ramdisk_mem(dom, state->pv_ramdisk.data, -- state->pv_ramdisk.size); -+ rc = xc_dom_module_mem(dom, state->pv_ramdisk.data, -+ state->pv_ramdisk.size, NULL); - if (rc) { - LOGE(ERROR, "xc_dom_ramdisk_mem failed"); - goto out; - } - } else { -- rc = xc_dom_ramdisk_file(dom, state->pv_ramdisk.path); -+ rc = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL); - if (rc) { - LOGE(ERROR, "xc_dom_ramdisk_file failed"); - goto out; --- -2.14.3 - - -From 78e9cc3488ffd55131b129a3ab90169d4e903efe Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 11 Jan 2018 17:47:58 +0000 -Subject: [PATCH 18/77] xen/common: Widen the guest logging buffer slightly - -This reduces the amount of line wrapping from guests; Xen in particular likes -to print lines longer than 80 characters. - -Signed-off-by: Andrew Cooper -Reviewed-by: Wei Liu ---- - xen/include/xen/sched.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h -index 002ba29d6d..64abc1df6c 100644 ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -427,7 +427,7 @@ struct domain - xen_domain_handle_t handle; - - /* hvm_print_line() and guest_console_write() logging. */ --#define DOMAIN_PBUF_SIZE 80 -+#define DOMAIN_PBUF_SIZE 200 - char *pbuf; - unsigned pbuf_idx; - spinlock_t pbuf_lock; --- -2.14.3 - - -From 92a6295c30a9f323de9d741e2e43f49df4412308 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 11 Jan 2018 17:47:59 +0000 -Subject: [PATCH 19/77] x86/time: Print a more helpful error when a platform - timer can't be found - -Signed-off-by: Andrew Cooper -Reviewed-by: Wei Liu ---- - xen/arch/x86/time.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c -index eba7aed72d..6c20b1036d 100644 ---- a/xen/arch/x86/time.c -+++ b/xen/arch/x86/time.c -@@ -708,7 +708,8 @@ static u64 __init init_platform_timer(void) - } - } - -- BUG_ON(rc <= 0); -+ if ( rc <= 0 ) -+ panic("Unable to find usable platform timer"); - - printk("Platform timer is %s %s\n", - freq_string(pts->frequency), pts->name); --- -2.14.3 - - -From ff1fb8fe53bb91823a1a37b6dd0e816d519c19d8 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 11 Jan 2018 17:47:59 +0000 -Subject: [PATCH 20/77] x86/link: Introduce and use SECTION_ALIGN - -... to reduce the quantity of #ifdef EFI. - -Signed-off-by: Andrew Cooper -Reviewed-by: Wei Liu ---- -CC: Jan Beulich ---- - xen/arch/x86/xen.lds.S | 50 +++++++++++++------------------------------------- - 1 file changed, 13 insertions(+), 37 deletions(-) - -diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S -index d5e8821d41..6164ad094f 100644 ---- a/xen/arch/x86/xen.lds.S -+++ b/xen/arch/x86/xen.lds.S -@@ -12,12 +12,14 @@ - #define FORMAT "pei-x86-64" - #undef __XEN_VIRT_START - #define __XEN_VIRT_START __image_base__ -+#define SECTION_ALIGN MB(2) - - ENTRY(efi_start) - - #else /* !EFI */ - - #define FORMAT "elf64-x86-64" -+#define SECTION_ALIGN PAGE_SIZE - - ENTRY(start) - -@@ -67,11 +69,7 @@ SECTIONS - _etext = .; /* End of text section */ - } :text = 0x9090 - --#ifdef EFI -- . = ALIGN(MB(2)); --#else -- . = ALIGN(PAGE_SIZE); --#endif -+ . = ALIGN(SECTION_ALIGN); - __2M_text_end = .; - - __2M_rodata_start = .; /* Start of 2M superpages, mapped RO. */ -@@ -149,11 +147,7 @@ SECTIONS - #endif - _erodata = .; - --#ifdef EFI -- . = ALIGN(MB(2)); --#else -- . = ALIGN(PAGE_SIZE); --#endif -+ . = ALIGN(SECTION_ALIGN); - __2M_rodata_end = .; - - __2M_init_start = .; /* Start of 2M superpages, mapped RWX (boot only). */ -@@ -215,11 +209,7 @@ SECTIONS - __ctors_end = .; - } :text - --#ifdef EFI -- . = ALIGN(MB(2)); --#else -- . = ALIGN(PAGE_SIZE); --#endif -+ . = ALIGN(SECTION_ALIGN); - __init_end = .; - __2M_init_end = .; - -@@ -257,11 +247,7 @@ SECTIONS - } :text - _end = . ; - --#ifdef EFI -- . = ALIGN(MB(2)); --#else -- . = ALIGN(PAGE_SIZE); --#endif -+ . = ALIGN(SECTION_ALIGN); - __2M_rwdata_end = .; - - #ifdef EFI -@@ -310,23 +296,13 @@ ASSERT(__image_base__ > XEN_VIRT_START || - ASSERT(kexec_reloc_size - kexec_reloc <= PAGE_SIZE, "kexec_reloc is too large") - #endif - --#ifdef EFI --ASSERT(IS_ALIGNED(__2M_text_end, MB(2)), "__2M_text_end misaligned") --ASSERT(IS_ALIGNED(__2M_rodata_start, MB(2)), "__2M_rodata_start misaligned") --ASSERT(IS_ALIGNED(__2M_rodata_end, MB(2)), "__2M_rodata_end misaligned") --ASSERT(IS_ALIGNED(__2M_init_start, MB(2)), "__2M_init_start misaligned") --ASSERT(IS_ALIGNED(__2M_init_end, MB(2)), "__2M_init_end misaligned") --ASSERT(IS_ALIGNED(__2M_rwdata_start, MB(2)), "__2M_rwdata_start misaligned") --ASSERT(IS_ALIGNED(__2M_rwdata_end, MB(2)), "__2M_rwdata_end misaligned") --#else --ASSERT(IS_ALIGNED(__2M_text_end, PAGE_SIZE), "__2M_text_end misaligned") --ASSERT(IS_ALIGNED(__2M_rodata_start, PAGE_SIZE), "__2M_rodata_start misaligned") --ASSERT(IS_ALIGNED(__2M_rodata_end, PAGE_SIZE), "__2M_rodata_end misaligned") --ASSERT(IS_ALIGNED(__2M_init_start, PAGE_SIZE), "__2M_init_start misaligned") --ASSERT(IS_ALIGNED(__2M_init_end, PAGE_SIZE), "__2M_init_end misaligned") --ASSERT(IS_ALIGNED(__2M_rwdata_start, PAGE_SIZE), "__2M_rwdata_start misaligned") --ASSERT(IS_ALIGNED(__2M_rwdata_end, PAGE_SIZE), "__2M_rwdata_end misaligned") --#endif -+ASSERT(IS_ALIGNED(__2M_text_end, SECTION_ALIGN), "__2M_text_end misaligned") -+ASSERT(IS_ALIGNED(__2M_rodata_start, SECTION_ALIGN), "__2M_rodata_start misaligned") -+ASSERT(IS_ALIGNED(__2M_rodata_end, SECTION_ALIGN), "__2M_rodata_end misaligned") -+ASSERT(IS_ALIGNED(__2M_init_start, SECTION_ALIGN), "__2M_init_start misaligned") -+ASSERT(IS_ALIGNED(__2M_init_end, SECTION_ALIGN), "__2M_init_end misaligned") -+ASSERT(IS_ALIGNED(__2M_rwdata_start, SECTION_ALIGN), "__2M_rwdata_start misaligned") -+ASSERT(IS_ALIGNED(__2M_rwdata_end, SECTION_ALIGN), "__2M_rwdata_end misaligned") - - ASSERT(IS_ALIGNED(cpu0_stack, STACK_SIZE), "cpu0_stack misaligned") - --- -2.14.3 - - -From 9e46ae12edc8be1dd846ce545600db28dabfabc8 Mon Sep 17 00:00:00 2001 -From: Bob Moore -Date: Thu, 11 Jan 2018 17:47:59 +0000 -Subject: [PATCH 21/77] ACPICA: Make ACPI Power Management Timer (PM Timer) - optional. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -PM Timer is now optional. -This support is already in Windows8 and "SHOULD" come out in ACPI 5.0A -(if all goes well). - -The change doesn't affect Xen directly, because it does not rely -on the presence of the PM timer. - -Signed-off-by: Bob Moore -Signed-off-by: Lv Zheng -Signed-off-by: Rafael J. Wysocki -[ported to Xen] -Signed-off-by: Roger Pau Monné ---- - xen/drivers/acpi/tables/tbfadt.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/xen/drivers/acpi/tables/tbfadt.c b/xen/drivers/acpi/tables/tbfadt.c -index d62d8d5cb9..f11fd5a900 100644 ---- a/xen/drivers/acpi/tables/tbfadt.c -+++ b/xen/drivers/acpi/tables/tbfadt.c -@@ -95,7 +95,8 @@ static struct acpi_fadt_info __initdata fadt_info_table[] = { - - {"PmTimerBlock", ACPI_FADT_OFFSET(xpm_timer_block), - ACPI_FADT_OFFSET(pm_timer_block), -- ACPI_FADT_OFFSET(pm_timer_length), ACPI_FADT_REQUIRED}, -+ ACPI_FADT_OFFSET(pm_timer_length), -+ ACPI_FADT_SEPARATE_LENGTH}, /* ACPI 5.0A: Timer is optional */ - - {"Gpe0Block", ACPI_FADT_OFFSET(xgpe0_block), - ACPI_FADT_OFFSET(gpe0_block), -@@ -437,7 +438,7 @@ static void __init acpi_tb_validate_fadt(void) - - if (fadt_info_table[i].type & ACPI_FADT_REQUIRED) { - /* -- * Field is required (Pm1a_event, Pm1a_control, pm_timer). -+ * Field is required (Pm1a_event, Pm1a_control). - * Both the address and length must be non-zero. - */ - if (!address64->address || !length) { --- -2.14.3 - - -From e7c8187b91fbff4c15e2cba06e33a1dce4b0b55e Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 11 Jan 2018 17:47:59 +0000 -Subject: [PATCH 22/77] xen/domctl: Return arch_config via getdomaininfo - -This allows toolstack software to distinguish HVM from PVH guests. - -Signed-off-by: Andrew Cooper -Signed-off-by: Wei Liu -Reviewed-by: Jan Beulich ---- -v2: bump domctl version number ---- - tools/libxc/include/xenctrl.h | 1 + - tools/libxc/xc_domain.c | 1 + - xen/arch/x86/domctl.c | 2 ++ - xen/include/public/domctl.h | 3 ++- - 4 files changed, 6 insertions(+), 1 deletion(-) - -diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h -index 666db0b919..a92a8d7a53 100644 ---- a/tools/libxc/include/xenctrl.h -+++ b/tools/libxc/include/xenctrl.h -@@ -456,6 +456,7 @@ typedef struct xc_dominfo { - unsigned int max_vcpu_id; - xen_domain_handle_t handle; - unsigned int cpupool; -+ struct xen_arch_domainconfig arch_config; - } xc_dominfo_t; - - typedef xen_domctl_getdomaininfo_t xc_domaininfo_t; -diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c -index 3ccd27f101..8169284dc1 100644 ---- a/tools/libxc/xc_domain.c -+++ b/tools/libxc/xc_domain.c -@@ -421,6 +421,7 @@ int xc_domain_getinfo(xc_interface *xch, - info->nr_online_vcpus = domctl.u.getdomaininfo.nr_online_vcpus; - info->max_vcpu_id = domctl.u.getdomaininfo.max_vcpu_id; - info->cpupool = domctl.u.getdomaininfo.cpupool; -+ info->arch_config = domctl.u.getdomaininfo.arch_config; - - memcpy(info->handle, domctl.u.getdomaininfo.handle, - sizeof(xen_domain_handle_t)); -diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c -index 075ee92cd7..b52d6d9552 100644 ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -345,6 +345,8 @@ void arch_get_domain_info(const struct domain *d, - { - if ( paging_mode_hap(d) ) - info->flags |= XEN_DOMINF_hap; -+ -+ info->arch_config.emulation_flags = d->arch.emulation_flags; - } - - #define MAX_IOPORTS 0x10000 -diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h -index 70027abc00..463f8cc420 100644 ---- a/xen/include/public/domctl.h -+++ b/xen/include/public/domctl.h -@@ -38,7 +38,7 @@ - #include "hvm/save.h" - #include "memory.h" - --#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000e -+#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000f - - /* - * NB. xen_domctl.domain is an IN/OUT parameter for this operation. -@@ -116,6 +116,7 @@ struct xen_domctl_getdomaininfo { - uint32_t ssidref; - xen_domain_handle_t handle; - uint32_t cpupool; -+ struct xen_arch_domainconfig arch_config; - }; - typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; - DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); --- -2.14.3 - - -From 78898c9d1b5bffe141da923bf4b5b19cc388e260 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 11 Jan 2018 17:47:59 +0000 -Subject: [PATCH 23/77] tools/ocaml: Expose arch_config in domaininfo - -Signed-off-by: Andrew Cooper ---- - tools/ocaml/libs/xc/xenctrl.ml | 29 +++++++++++++++++++++++++++++ - tools/ocaml/libs/xc/xenctrl.mli | 28 ++++++++++++++++++++++++++++ - tools/ocaml/libs/xc/xenctrl_stubs.c | 26 ++++++++++++++++++++++++-- - 3 files changed, 81 insertions(+), 2 deletions(-) - -diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml -index 70a325b0e9..d549068d60 100644 ---- a/tools/ocaml/libs/xc/xenctrl.ml -+++ b/tools/ocaml/libs/xc/xenctrl.ml -@@ -28,6 +28,34 @@ type vcpuinfo = - cpumap: int32; - } - -+type xen_arm_arch_domainconfig = -+{ -+ gic_version: int; -+ nr_spis: int; -+ clock_frequency: int32; -+} -+ -+type x86_arch_emulation_flags = -+ | X86_EMU_LAPIC -+ | X86_EMU_HPET -+ | X86_EMU_PM -+ | X86_EMU_RTC -+ | X86_EMU_IOAPIC -+ | X86_EMU_PIC -+ | X86_EMU_VGA -+ | X86_EMU_IOMMU -+ | X86_EMU_PIT -+ | X86_EMU_USE_PIRQ -+ -+type xen_x86_arch_domainconfig = -+{ -+ emulation_flags: x86_arch_emulation_flags list; -+} -+ -+type arch_domainconfig = -+ | ARM of xen_arm_arch_domainconfig -+ | X86 of xen_x86_arch_domainconfig -+ - type domaininfo = - { - domid : domid; -@@ -46,6 +74,7 @@ type domaininfo = - max_vcpu_id : int; - ssidref : int32; - handle : int array; -+ arch_config : arch_domainconfig; - } - - type sched_control = -diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli -index 702d8a7ab8..08f1fd26ae 100644 ---- a/tools/ocaml/libs/xc/xenctrl.mli -+++ b/tools/ocaml/libs/xc/xenctrl.mli -@@ -22,6 +22,33 @@ type vcpuinfo = { - cputime : int64; - cpumap : int32; - } -+ -+type xen_arm_arch_domainconfig = { -+ gic_version: int; -+ nr_spis: int; -+ clock_frequency: int32; -+} -+ -+type x86_arch_emulation_flags = -+ | X86_EMU_LAPIC -+ | X86_EMU_HPET -+ | X86_EMU_PM -+ | X86_EMU_RTC -+ | X86_EMU_IOAPIC -+ | X86_EMU_PIC -+ | X86_EMU_VGA -+ | X86_EMU_IOMMU -+ | X86_EMU_PIT -+ | X86_EMU_USE_PIRQ -+ -+type xen_x86_arch_domainconfig = { -+ emulation_flags: x86_arch_emulation_flags list; -+} -+ -+type arch_domainconfig = -+ | ARM of xen_arm_arch_domainconfig -+ | X86 of xen_x86_arch_domainconfig -+ - type domaininfo = { - domid : domid; - dying : bool; -@@ -39,6 +66,7 @@ type domaininfo = { - max_vcpu_id : int; - ssidref : int32; - handle : int array; -+ arch_config : arch_domainconfig; - } - type sched_control = { weight : int; cap : int; } - type physinfo_cap_flag = CAP_HVM | CAP_DirectIO -diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c -index c66732f67c..124aa34fe8 100644 ---- a/tools/ocaml/libs/xc/xenctrl_stubs.c -+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c -@@ -273,10 +273,10 @@ CAMLprim value stub_xc_domain_shutdown(value xch, value domid, value reason) - static value alloc_domaininfo(xc_domaininfo_t * info) - { - CAMLparam0(); -- CAMLlocal2(result, tmp); -+ CAMLlocal5(result, tmp, arch_config, x86_arch_config, emul_list); - int i; - -- result = caml_alloc_tuple(16); -+ result = caml_alloc_tuple(17); - - Store_field(result, 0, Val_int(info->domain)); - Store_field(result, 1, Val_bool(info->flags & XEN_DOMINF_dying)); -@@ -302,6 +302,28 @@ static value alloc_domaininfo(xc_domaininfo_t * info) - - Store_field(result, 15, tmp); - -+ /* emulation_flags: x86_arch_emulation_flags list; */ -+ tmp = emul_list = Val_emptylist; -+ for (i = 0; i < 10; i++) { -+ if ((info->arch_config.emulation_flags >> i) & 1) { -+ tmp = caml_alloc_small(2, Tag_cons); -+ Field(tmp, 0) = Val_int(i); -+ Field(tmp, 1) = emul_list; -+ emul_list = tmp; -+ } -+ } -+ -+ /* xen_x86_arch_domainconfig */ -+ x86_arch_config = caml_alloc_tuple(1); -+ Store_field(x86_arch_config, 0, emul_list); -+ -+ /* arch_config: arch_domainconfig */ -+ arch_config = caml_alloc_small(1, 1); -+ -+ Store_field(arch_config, 0, x86_arch_config); -+ -+ Store_field(result, 16, arch_config); -+ - CAMLreturn(result); - } - --- -2.14.3 - - -From 48811d481cedd5838a2d0ba8dfa149133888c84b Mon Sep 17 00:00:00 2001 -From: Jon Ludlam -Date: Thu, 11 Jan 2018 17:47:59 +0000 -Subject: [PATCH 24/77] tools/ocaml: Extend domain_create() to take - arch_domainconfig - -No longer passing NULL into xc_domain_create() allows for the creation -of PVH guests. - -Signed-off-by: Jon Ludlam -Signed-off-by: Andrew Cooper ---- - tools/ocaml/libs/xc/xenctrl.ml | 2 +- - tools/ocaml/libs/xc/xenctrl.mli | 2 +- - tools/ocaml/libs/xc/xenctrl_stubs.c | 22 ++++++++++++++++++++-- - 3 files changed, 22 insertions(+), 4 deletions(-) - -diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml -index d549068d60..9116aa222c 100644 ---- a/tools/ocaml/libs/xc/xenctrl.ml -+++ b/tools/ocaml/libs/xc/xenctrl.ml -@@ -143,7 +143,7 @@ let with_intf f = - interface_close xc; - r - --external _domain_create: handle -> int32 -> domain_create_flag list -> int array -> domid -+external _domain_create: handle -> int32 -> domain_create_flag list -> int array -> arch_domainconfig -> domid - = "stub_xc_domain_create" - - let int_array_of_uuid_string s = -diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli -index 08f1fd26ae..54c099c88f 100644 ---- a/tools/ocaml/libs/xc/xenctrl.mli -+++ b/tools/ocaml/libs/xc/xenctrl.mli -@@ -102,7 +102,7 @@ external sizeof_xen_pfn : unit -> int = "stub_sizeof_xen_pfn" - external interface_open : unit -> handle = "stub_xc_interface_open" - external interface_close : handle -> unit = "stub_xc_interface_close" - val with_intf : (handle -> 'a) -> 'a --val domain_create : handle -> int32 -> domain_create_flag list -> string -> domid -+val domain_create : handle -> int32 -> domain_create_flag list -> string -> arch_domainconfig -> domid - val domain_sethandle : handle -> domid -> string -> unit - external domain_max_vcpus : handle -> domid -> int -> unit - = "stub_xc_domain_max_vcpus" -diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c -index 124aa34fe8..0b5a2361c0 100644 ---- a/tools/ocaml/libs/xc/xenctrl_stubs.c -+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c -@@ -144,7 +144,8 @@ static int domain_create_flag_table[] = { - }; - - CAMLprim value stub_xc_domain_create(value xch, value ssidref, -- value flags, value handle) -+ value flags, value handle, -+ value domconfig) - { - CAMLparam4(xch, ssidref, flags, handle); - -@@ -155,6 +156,7 @@ CAMLprim value stub_xc_domain_create(value xch, value ssidref, - uint32_t c_ssidref = Int32_val(ssidref); - unsigned int c_flags = 0; - value l; -+ xc_domain_configuration_t config = {}; - - if (Wosize_val(handle) != 16) - caml_invalid_argument("Handle not a 16-integer array"); -@@ -168,8 +170,24 @@ CAMLprim value stub_xc_domain_create(value xch, value ssidref, - c_flags |= domain_create_flag_table[v]; - } - -+ switch(Tag_val(domconfig)) { -+ case 0: /* ARM - nothing to do */ -+ caml_failwith("Unhandled: ARM"); -+ break; -+ -+ case 1: /* X86 - emulation flags in the block */ -+ for (l = Field(Field(domconfig, 0), 0); -+ l != Val_none; -+ l = Field(l, 1)) -+ config.emulation_flags |= 1u << Int_val(Field(l, 0)); -+ break; -+ -+ default: -+ caml_failwith("Unhandled domconfig type"); -+ } -+ - caml_enter_blocking_section(); -- result = xc_domain_create(_H(xch), c_ssidref, h, c_flags, &domid, NULL); -+ result = xc_domain_create(_H(xch), c_ssidref, h, c_flags, &domid, &config); - caml_leave_blocking_section(); - - if (result < 0) --- -2.14.3 - - -From 57dc22b80d3ba6db7eea87d84a009015e65eefb0 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 11 Jan 2018 17:48:00 +0000 -Subject: [PATCH 25/77] x86/fixmap: Modify fix_to_virt() to return a void - pointer - -Almost all users of fix_to_virt() actually want a pointer. Include the cast -within the definition, so the callers don't need to. - -Two users which need the integer value are switched to using __fix_to_virt() -directly. A few users stay fully unchanged, due to GCC's void pointer -arithmetic extension causing the same behaviour. Most users however have -their explicit casting dropped. - -Since __iomem is not used consistently in Xen, we drop it too. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Wei Liu -Signed-off-by: Wei Liu ---- -v2: update commit message and remove unnecessary parentheses. ---- - xen/arch/x86/acpi/lib.c | 2 +- - xen/arch/x86/mm.c | 4 ++-- - xen/arch/x86/mpparse.c | 2 +- - xen/arch/x86/msi.c | 3 +-- - xen/arch/x86/tboot.c | 4 ++-- - xen/drivers/acpi/apei/apei-io.c | 2 +- - xen/drivers/char/ehci-dbgp.c | 2 +- - xen/drivers/char/ns16550.c | 2 +- - xen/include/asm-x86/apicdef.h | 2 +- - xen/include/asm-x86/fixmap.h | 2 +- - 10 files changed, 12 insertions(+), 13 deletions(-) - -diff --git a/xen/arch/x86/acpi/lib.c b/xen/arch/x86/acpi/lib.c -index 7d7c71848b..265b9ad819 100644 ---- a/xen/arch/x86/acpi/lib.c -+++ b/xen/arch/x86/acpi/lib.c -@@ -49,7 +49,7 @@ char *__acpi_map_table(paddr_t phys, unsigned long size) - offset = phys & (PAGE_SIZE - 1); - mapped_size = PAGE_SIZE - offset; - set_fixmap(FIX_ACPI_END, phys); -- base = fix_to_virt(FIX_ACPI_END); -+ base = __fix_to_virt(FIX_ACPI_END); - - /* - * Most cases can be covered by the below. -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index a7a76a71db..0569342200 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -5205,12 +5205,12 @@ void __set_fixmap( - enum fixed_addresses idx, unsigned long mfn, unsigned long flags) - { - BUG_ON(idx >= __end_of_fixed_addresses); -- map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags); -+ map_pages_to_xen(__fix_to_virt(idx), mfn, 1, flags); - } - - void *__init arch_vmap_virt_end(void) - { -- return (void *)fix_to_virt(__end_of_fixed_addresses); -+ return fix_to_virt(__end_of_fixed_addresses); - } - - void __iomem *ioremap(paddr_t pa, size_t len) -diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c -index a1a0738a19..49140e46f0 100644 ---- a/xen/arch/x86/mpparse.c -+++ b/xen/arch/x86/mpparse.c -@@ -703,7 +703,7 @@ static void __init efi_check_config(void) - return; - - __set_fixmap(FIX_EFI_MPF, PFN_DOWN(efi.mps), __PAGE_HYPERVISOR); -- mpf = (void *)fix_to_virt(FIX_EFI_MPF) + ((long)efi.mps & (PAGE_SIZE-1)); -+ mpf = fix_to_virt(FIX_EFI_MPF) + ((long)efi.mps & (PAGE_SIZE-1)); - - if (memcmp(mpf->mpf_signature, "_MP_", 4) == 0 && - mpf->mpf_length == 1 && -diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c -index 4652b98c2d..475881ed89 100644 ---- a/xen/arch/x86/msi.c -+++ b/xen/arch/x86/msi.c -@@ -961,8 +961,7 @@ static int msix_capability_init(struct pci_dev *dev, - xfree(entry); - return idx; - } -- base = (void *)(fix_to_virt(idx) + -- ((unsigned long)entry_paddr & (PAGE_SIZE - 1))); -+ base = fix_to_virt(idx) + (entry_paddr & (PAGE_SIZE - 1)); - - /* Mask interrupt here */ - writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); -diff --git a/xen/arch/x86/tboot.c b/xen/arch/x86/tboot.c -index 59d7c477f4..d36bf33407 100644 ---- a/xen/arch/x86/tboot.c -+++ b/xen/arch/x86/tboot.c -@@ -82,7 +82,7 @@ static void __init tboot_copy_memory(unsigned char *va, uint32_t size, - { - map_base = PFN_DOWN(pa + i); - set_fixmap(FIX_TBOOT_MAP_ADDRESS, map_base << PAGE_SHIFT); -- map_addr = (unsigned char *)fix_to_virt(FIX_TBOOT_MAP_ADDRESS); -+ map_addr = fix_to_virt(FIX_TBOOT_MAP_ADDRESS); - } - va[i] = map_addr[pa + i - (map_base << PAGE_SHIFT)]; - } -@@ -98,7 +98,7 @@ void __init tboot_probe(void) - - /* Map and check for tboot UUID. */ - set_fixmap(FIX_TBOOT_SHARED_BASE, opt_tboot_pa); -- tboot_shared = (tboot_shared_t *)fix_to_virt(FIX_TBOOT_SHARED_BASE); -+ tboot_shared = fix_to_virt(FIX_TBOOT_SHARED_BASE); - if ( tboot_shared == NULL ) - return; - if ( memcmp(&tboot_shared_uuid, (uuid_t *)tboot_shared, sizeof(uuid_t)) ) -diff --git a/xen/drivers/acpi/apei/apei-io.c b/xen/drivers/acpi/apei/apei-io.c -index 8955de935e..89b70f45ef 100644 ---- a/xen/drivers/acpi/apei/apei-io.c -+++ b/xen/drivers/acpi/apei/apei-io.c -@@ -92,7 +92,7 @@ static void __iomem *__init apei_range_map(paddr_t paddr, unsigned long size) - apei_range_nr++; - } - -- return (void __iomem *)fix_to_virt(FIX_APEI_RANGE_BASE + start_nr); -+ return fix_to_virt(FIX_APEI_RANGE_BASE + start_nr); - } - - /* -diff --git a/xen/drivers/char/ehci-dbgp.c b/xen/drivers/char/ehci-dbgp.c -index d48e777c34..d0071d3114 100644 ---- a/xen/drivers/char/ehci-dbgp.c -+++ b/xen/drivers/char/ehci-dbgp.c -@@ -1327,7 +1327,7 @@ static void __init ehci_dbgp_init_preirq(struct serial_port *port) - * than enough. 1k is the biggest that was seen. - */ - set_fixmap_nocache(FIX_EHCI_DBGP, dbgp->bar_val); -- ehci_bar = (void __iomem *)fix_to_virt(FIX_EHCI_DBGP); -+ ehci_bar = fix_to_virt(FIX_EHCI_DBGP); - ehci_bar += dbgp->bar_val & ~PAGE_MASK; - dbgp_printk("ehci_bar: %p\n", ehci_bar); - -diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c -index e0f8199f98..f32dbd3247 100644 ---- a/xen/drivers/char/ns16550.c -+++ b/xen/drivers/char/ns16550.c -@@ -697,7 +697,7 @@ static void __init ns16550_init_preirq(struct serial_port *port) - enum fixed_addresses idx = FIX_COM_BEGIN + (uart - ns16550_com); - - set_fixmap_nocache(idx, uart->io_base); -- uart->remapped_io_base = (void __iomem *)fix_to_virt(idx); -+ uart->remapped_io_base = fix_to_virt(idx); - uart->remapped_io_base += uart->io_base & ~PAGE_MASK; - #else - uart->remapped_io_base = (char *)ioremap(uart->io_base, uart->io_size); -diff --git a/xen/include/asm-x86/apicdef.h b/xen/include/asm-x86/apicdef.h -index eed504a31a..2fa0b77a8a 100644 ---- a/xen/include/asm-x86/apicdef.h -+++ b/xen/include/asm-x86/apicdef.h -@@ -119,7 +119,7 @@ - /* Only available in x2APIC mode */ - #define APIC_SELF_IPI 0x3F0 - --#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) -+#define APIC_BASE __fix_to_virt(FIX_APIC_BASE) - - /* It's only used in x2APIC mode of an x2APIC unit. */ - #define APIC_MSR_BASE 0x800 -diff --git a/xen/include/asm-x86/fixmap.h b/xen/include/asm-x86/fixmap.h -index 89bf6cb611..51b0e7e945 100644 ---- a/xen/include/asm-x86/fixmap.h -+++ b/xen/include/asm-x86/fixmap.h -@@ -79,7 +79,7 @@ extern void __set_fixmap( - #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) - #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - --#define fix_to_virt(x) (__fix_to_virt(x)) -+#define fix_to_virt(x) ((void *)__fix_to_virt(x)) - - static inline unsigned long virt_to_fix(const unsigned long vaddr) - { --- -2.14.3 - - -From b538a13a68b42dbe47832d76299011765bf59e60 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 11 Jan 2018 17:48:00 +0000 -Subject: [PATCH 26/77] x86: Common cpuid faulting support - -With CPUID Faulting offered to SVM guests, move Xen's faulting code to being -common rather than Intel specific. - -This is necessary for nested Xen (inc. pv-shim mode) to prevent PV guests from -finding the outer HVM Xen leaves via native cpuid. - -Signed-off-by: Andrew Cooper ---- - xen/arch/x86/cpu/amd.c | 16 +++++--- - xen/arch/x86/cpu/common.c | 76 ++++++++++++++++++++++++++++++++++++-- - xen/arch/x86/cpu/intel.c | 82 +++++++---------------------------------- - xen/include/asm-x86/cpuid.h | 3 -- - xen/include/asm-x86/processor.h | 4 +- - 5 files changed, 98 insertions(+), 83 deletions(-) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index 5f36ac75a7..2bff3ee377 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -198,11 +198,12 @@ static void __init noinline probe_masking_msrs(void) - } - - /* -- * Context switch levelling state to the next domain. A parameter of NULL is -- * used to context switch to the default host state (by the cpu bringup-code, -- * crash path, etc). -+ * Context switch CPUID masking state to the next domain. Only called if -+ * CPUID Faulting isn't available, but masking MSRs have been detected. A -+ * parameter of NULL is used to context switch to the default host state (by -+ * the cpu bringup-code, crash path, etc). - */ --static void amd_ctxt_switch_levelling(const struct vcpu *next) -+static void amd_ctxt_switch_masking(const struct vcpu *next) - { - struct cpuidmasks *these_masks = &this_cpu(cpuidmasks); - const struct domain *nextd = next ? next->domain : NULL; -@@ -263,6 +264,9 @@ static void __init noinline amd_init_levelling(void) - { - const struct cpuidmask *m = NULL; - -+ if (probe_cpuid_faulting()) -+ return; -+ - probe_masking_msrs(); - - if (*opt_famrev != '\0') { -@@ -352,7 +356,7 @@ static void __init noinline amd_init_levelling(void) - } - - if (levelling_caps) -- ctxt_switch_levelling = amd_ctxt_switch_levelling; -+ ctxt_switch_masking = amd_ctxt_switch_masking; - } - - /* -@@ -518,7 +522,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) - if (c == &boot_cpu_data) - amd_init_levelling(); - -- amd_ctxt_switch_levelling(NULL); -+ ctxt_switch_levelling(NULL); - } - - static void init_amd(struct cpuinfo_x86 *c) -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index 6cf362849e..157bae2026 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -113,12 +113,80 @@ static const struct cpu_dev default_cpu = { - }; - static const struct cpu_dev *this_cpu = &default_cpu; - --static void default_ctxt_switch_levelling(const struct vcpu *next) -+static DEFINE_PER_CPU(uint64_t, msr_misc_features); -+void (* __read_mostly ctxt_switch_masking)(const struct vcpu *next); -+ -+bool __init probe_cpuid_faulting(void) -+{ -+ uint64_t val; -+ -+ if (rdmsr_safe(MSR_INTEL_PLATFORM_INFO, val) || -+ !(val & MSR_PLATFORM_INFO_CPUID_FAULTING) || -+ rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES, -+ this_cpu(msr_misc_features))) -+ { -+ setup_clear_cpu_cap(X86_FEATURE_CPUID_FAULTING); -+ return false; -+ } -+ -+ expected_levelling_cap |= LCAP_faulting; -+ levelling_caps |= LCAP_faulting; -+ setup_force_cpu_cap(X86_FEATURE_CPUID_FAULTING); -+ -+ return true; -+} -+ -+static void set_cpuid_faulting(bool enable) -+{ -+ uint64_t *this_misc_features = &this_cpu(msr_misc_features); -+ uint64_t val = *this_misc_features; -+ -+ if (!!(val & MSR_MISC_FEATURES_CPUID_FAULTING) == enable) -+ return; -+ -+ val ^= MSR_MISC_FEATURES_CPUID_FAULTING; -+ -+ wrmsrl(MSR_INTEL_MISC_FEATURES_ENABLES, val); -+ *this_misc_features = val; -+} -+ -+void ctxt_switch_levelling(const struct vcpu *next) - { -- /* Nop */ -+ const struct domain *nextd = next ? next->domain : NULL; -+ -+ if (cpu_has_cpuid_faulting) { -+ /* -+ * No need to alter the faulting setting if we are switching -+ * to idle; it won't affect any code running in idle context. -+ */ -+ if (nextd && is_idle_domain(nextd)) -+ return; -+ /* -+ * We *should* be enabling faulting for the control domain. -+ * -+ * Unfortunately, the domain builder (having only ever been a -+ * PV guest) expects to be able to see host cpuid state in a -+ * native CPUID instruction, to correctly build a CPUID policy -+ * for HVM guests (notably the xstate leaves). -+ * -+ * This logic is fundimentally broken for HVM toolstack -+ * domains, and faulting causes PV guests to behave like HVM -+ * guests from their point of view. -+ * -+ * Future development plans will move responsibility for -+ * generating the maximum full cpuid policy into Xen, at which -+ * this problem will disappear. -+ */ -+ set_cpuid_faulting(nextd && !is_control_domain(nextd) && -+ (is_pv_domain(nextd) || -+ next->arch.msr-> -+ misc_features_enables.cpuid_faulting)); -+ return; -+ } -+ -+ if (ctxt_switch_masking) -+ ctxt_switch_masking(next); - } --void (* __read_mostly ctxt_switch_levelling)(const struct vcpu *next) = -- default_ctxt_switch_levelling; - - bool_t opt_cpu_info; - boolean_param("cpuinfo", opt_cpu_info); -diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c -index ac932e5b38..508e56f5c1 100644 ---- a/xen/arch/x86/cpu/intel.c -+++ b/xen/arch/x86/cpu/intel.c -@@ -17,41 +17,6 @@ - - #define select_idle_routine(x) ((void)0) - --static bool __init probe_intel_cpuid_faulting(void) --{ -- uint64_t x; -- -- if (rdmsr_safe(MSR_INTEL_PLATFORM_INFO, x) || -- !(x & MSR_PLATFORM_INFO_CPUID_FAULTING)) -- return 0; -- -- expected_levelling_cap |= LCAP_faulting; -- levelling_caps |= LCAP_faulting; -- setup_force_cpu_cap(X86_FEATURE_CPUID_FAULTING); -- return 1; --} -- --DEFINE_PER_CPU(bool, cpuid_faulting_enabled); -- --static void set_cpuid_faulting(bool enable) --{ -- bool *this_enabled = &this_cpu(cpuid_faulting_enabled); -- uint32_t hi, lo; -- -- ASSERT(cpu_has_cpuid_faulting); -- -- if (*this_enabled == enable) -- return; -- -- rdmsr(MSR_INTEL_MISC_FEATURES_ENABLES, lo, hi); -- lo &= ~MSR_MISC_FEATURES_CPUID_FAULTING; -- if (enable) -- lo |= MSR_MISC_FEATURES_CPUID_FAULTING; -- wrmsr(MSR_INTEL_MISC_FEATURES_ENABLES, lo, hi); -- -- *this_enabled = enable; --} -- - /* - * Set caps in expected_levelling_cap, probe a specific masking MSR, and set - * caps in levelling_caps if it is found, or clobber the MSR index if missing. -@@ -147,40 +112,17 @@ static void __init probe_masking_msrs(void) - } - - /* -- * Context switch levelling state to the next domain. A parameter of NULL is -- * used to context switch to the default host state (by the cpu bringup-code, -- * crash path, etc). -+ * Context switch CPUID masking state to the next domain. Only called if -+ * CPUID Faulting isn't available, but masking MSRs have been detected. A -+ * parameter of NULL is used to context switch to the default host state (by -+ * the cpu bringup-code, crash path, etc). - */ --static void intel_ctxt_switch_levelling(const struct vcpu *next) -+static void intel_ctxt_switch_masking(const struct vcpu *next) - { - struct cpuidmasks *these_masks = &this_cpu(cpuidmasks); - const struct domain *nextd = next ? next->domain : NULL; -- const struct cpuidmasks *masks; -- -- if (cpu_has_cpuid_faulting) { -- /* -- * We *should* be enabling faulting for the control domain. -- * -- * Unfortunately, the domain builder (having only ever been a -- * PV guest) expects to be able to see host cpuid state in a -- * native CPUID instruction, to correctly build a CPUID policy -- * for HVM guests (notably the xstate leaves). -- * -- * This logic is fundimentally broken for HVM toolstack -- * domains, and faulting causes PV guests to behave like HVM -- * guests from their point of view. -- * -- * Future development plans will move responsibility for -- * generating the maximum full cpuid policy into Xen, at which -- * this problem will disappear. -- */ -- set_cpuid_faulting(nextd && !is_control_domain(nextd) && -- (is_pv_domain(nextd) || -- next->arch.msr->misc_features_enables.cpuid_faulting)); -- return; -- } -- -- masks = (nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks) -+ const struct cpuidmasks *masks = -+ (nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks) - ? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults; - - if (msr_basic) { -@@ -225,8 +167,10 @@ static void intel_ctxt_switch_levelling(const struct vcpu *next) - */ - static void __init noinline intel_init_levelling(void) - { -- if (!probe_intel_cpuid_faulting()) -- probe_masking_msrs(); -+ if (probe_cpuid_faulting()) -+ return; -+ -+ probe_masking_msrs(); - - if (msr_basic) { - uint32_t ecx, edx, tmp; -@@ -280,7 +224,7 @@ static void __init noinline intel_init_levelling(void) - } - - if (levelling_caps) -- ctxt_switch_levelling = intel_ctxt_switch_levelling; -+ ctxt_switch_masking = intel_ctxt_switch_masking; - } - - static void early_init_intel(struct cpuinfo_x86 *c) -@@ -320,7 +264,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) - if (c == &boot_cpu_data) - intel_init_levelling(); - -- intel_ctxt_switch_levelling(NULL); -+ ctxt_switch_levelling(NULL); - } - - /* -diff --git a/xen/include/asm-x86/cpuid.h b/xen/include/asm-x86/cpuid.h -index d2dd841e15..74d6f123e5 100644 ---- a/xen/include/asm-x86/cpuid.h -+++ b/xen/include/asm-x86/cpuid.h -@@ -58,9 +58,6 @@ DECLARE_PER_CPU(struct cpuidmasks, cpuidmasks); - /* Default masking MSR values, calculated at boot. */ - extern struct cpuidmasks cpuidmask_defaults; - --/* Whether or not cpuid faulting is available for the current domain. */ --DECLARE_PER_CPU(bool, cpuid_faulting_enabled); -- - #define CPUID_GUEST_NR_BASIC (0xdu + 1) - #define CPUID_GUEST_NR_FEAT (0u + 1) - #define CPUID_GUEST_NR_CACHE (5u + 1) -diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h -index 41a8d8c32f..c9601b2fb2 100644 ---- a/xen/include/asm-x86/processor.h -+++ b/xen/include/asm-x86/processor.h -@@ -151,7 +151,9 @@ extern struct cpuinfo_x86 boot_cpu_data; - extern struct cpuinfo_x86 cpu_data[]; - #define current_cpu_data cpu_data[smp_processor_id()] - --extern void (*ctxt_switch_levelling)(const struct vcpu *next); -+extern bool probe_cpuid_faulting(void); -+extern void ctxt_switch_levelling(const struct vcpu *next); -+extern void (*ctxt_switch_masking)(const struct vcpu *next); - - extern u64 host_pat; - extern bool_t opt_cpu_info; --- -2.14.3 - - -From af2f50b2b6f284a5498bcfe8e4203b25e120338e Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 10 Nov 2017 16:35:26 +0000 -Subject: [PATCH 27/77] x86/Kconfig: Options for Xen and PVH support - -Introduce two options. One to detect whether the binary is running on -Xen, the other enables PVH ABI support. - -The former will be useful to PV in HVM approach. Both will be used by -PV in PVH approach. - -Signed-off-by: Andrew Cooper -Signed-off-by: Wei Liu ---- -v2: -Write commit message. Didn't change the config option value as it -requires a lot of changes in later patches. ---- - xen/arch/x86/Kconfig | 17 +++++++++++++++++ - 1 file changed, 17 insertions(+) - -diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig -index 7c4582922f..c0b0bcdcb3 100644 ---- a/xen/arch/x86/Kconfig -+++ b/xen/arch/x86/Kconfig -@@ -117,6 +117,23 @@ config TBOOT - Technology (TXT) - - If unsure, say Y. -+ -+config XEN_GUEST -+ def_bool n -+ prompt "Xen Guest" -+ ---help--- -+ Support for Xen detecting when it is running under Xen. -+ -+ If unsure, say N. -+ -+config PVH_GUEST -+ def_bool n -+ prompt "PVH Guest" -+ depends on XEN_GUEST -+ ---help--- -+ Support booting using the PVH ABI. -+ -+ If unsure, say N. - endmenu - - source "common/Kconfig" --- -2.14.3 - - -From f575701f3c7a6c6afde7c289058d9d3110a617d1 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 22 Nov 2017 11:09:41 +0000 -Subject: [PATCH 28/77] x86/link: Relocate program headers - -When the xen binary is loaded by libelf (in the future) we rely on the -elf loader to load the binary accordingly. Specify the load address so -that the resulting binary can make p_vaddr and p_paddr have different -values. - -Signed-off-by: Andrew Cooper -Signed-off-by: Wei Liu ---- -v2: -Clarify commit message. Haven't tested grub1 boot. ---- - xen/arch/x86/xen.lds.S | 22 +++++++++++++--------- - 1 file changed, 13 insertions(+), 9 deletions(-) - -diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S -index 6164ad094f..400d8a56c4 100644 ---- a/xen/arch/x86/xen.lds.S -+++ b/xen/arch/x86/xen.lds.S -@@ -13,6 +13,7 @@ - #undef __XEN_VIRT_START - #define __XEN_VIRT_START __image_base__ - #define SECTION_ALIGN MB(2) -+#define DECL_SECTION(x) x : - - ENTRY(efi_start) - -@@ -20,8 +21,9 @@ ENTRY(efi_start) - - #define FORMAT "elf64-x86-64" - #define SECTION_ALIGN PAGE_SIZE -+#define DECL_SECTION(x) x : AT(ADDR(x) - __XEN_VIRT_START) - --ENTRY(start) -+ENTRY(start_pa) - - #endif /* EFI */ - -@@ -56,9 +58,11 @@ SECTIONS - __2M_text_start = .; /* Start of 2M superpages, mapped RX. */ - #endif - -+ start_pa = ABSOLUTE(start - __XEN_VIRT_START); -+ - . = __XEN_VIRT_START + XEN_IMG_OFFSET; - _start = .; -- .text : { -+ DECL_SECTION(.text) { - _stext = .; /* Text and read-only data */ - *(.text) - *(.text.cold) -@@ -73,7 +77,7 @@ SECTIONS - __2M_text_end = .; - - __2M_rodata_start = .; /* Start of 2M superpages, mapped RO. */ -- .rodata : { -+ DECL_SECTION(.rodata) { - _srodata = .; - /* Bug frames table */ - __start_bug_frames = .; -@@ -132,13 +136,13 @@ SECTIONS - * compiler may want to inject other things in the .note which we don't care - * about - hence this unique name. - */ -- .note.gnu.build-id : { -+ DECL_SECTION(.note.gnu.build-id) { - __note_gnu_build_id_start = .; - *(.note.gnu.build-id) - __note_gnu_build_id_end = .; - } :note :text - #elif defined(BUILD_ID_EFI) -- .buildid : { -+ DECL_SECTION(.buildid) { - __note_gnu_build_id_start = .; - *(.buildid) - __note_gnu_build_id_end = .; -@@ -153,7 +157,7 @@ SECTIONS - __2M_init_start = .; /* Start of 2M superpages, mapped RWX (boot only). */ - . = ALIGN(PAGE_SIZE); /* Init code and data */ - __init_begin = .; -- .init : { -+ DECL_SECTION(.init) { - _sinittext = .; - *(.init.text) - /* -@@ -215,7 +219,7 @@ SECTIONS - - __2M_rwdata_start = .; /* Start of 2M superpages, mapped RW. */ - . = ALIGN(SMP_CACHE_BYTES); -- .data.read_mostly : { -+ DECL_SECTION(.data.read_mostly) { - *(.data.read_mostly) - . = ALIGN(8); - __start_schedulers_array = .; -@@ -223,7 +227,7 @@ SECTIONS - __end_schedulers_array = .; - } :text - -- .data : { /* Data */ -+ DECL_SECTION(.data) { - *(.data.page_aligned) - *(.data) - *(.data.rel) -@@ -231,7 +235,7 @@ SECTIONS - CONSTRUCTORS - } :text - -- .bss : { /* BSS */ -+ DECL_SECTION(.bss) { - __bss_start = .; - *(.bss.stack_aligned) - *(.bss.page_aligned*) --- -2.14.3 - - -From 887c705600114c502cd3b529659af085680f526a Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Fri, 10 Nov 2017 12:36:49 +0000 -Subject: [PATCH 29/77] x86: introduce ELFNOTE macro - -It is needed later for introducing PVH entry point. - -Signed-off-by: Wei Liu ---- -v2: -1. Specify section attribute and type. -2. Use p2align. -3. Align instructions. -4. Haven't used .L or turned it into assembly macro. ---- - xen/include/asm-x86/asm_defns.h | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h -index 388fc93b9d..35a5d9ee03 100644 ---- a/xen/include/asm-x86/asm_defns.h -+++ b/xen/include/asm-x86/asm_defns.h -@@ -409,4 +409,16 @@ static always_inline void stac(void) - #define REX64_PREFIX "rex64/" - #endif - -+#define ELFNOTE(name, type, desc) \ -+ .pushsection .note.name, "a", @note ; \ -+ .p2align 2 ; \ -+ .long 2f - 1f /* namesz */ ; \ -+ .long 4f - 3f /* descsz */ ; \ -+ .long type /* type */ ; \ -+1: .asciz #name /* name */ ; \ -+2: .p2align 2 ; \ -+3: desc /* desc */ ; \ -+4: .p2align 2 ; \ -+ .popsection -+ - #endif /* __X86_ASM_DEFNS_H__ */ --- -2.14.3 - - -From 51f937a39bb6acadec1f4ab55f01048c2c1caee0 Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Fri, 10 Nov 2017 16:19:40 +0000 -Subject: [PATCH 30/77] x86: produce a binary that can be booted as PVH - -Produce a binary that can be booted as PVH. It doesn't do much yet. - -Signed-off-by: Wei Liu -Signed-off-by: Andrew Cooper ---- -v2: -1. Remove shim-y dependency. -2. Remove extraneous blank line. -3. Fix bugs in xen.lds.S. -4. Haven't split code into pvh.S because that will break later - patches. ---- - .gitignore | 1 + - xen/arch/x86/Makefile | 8 ++++++++ - xen/arch/x86/boot/head.S | 9 +++++++++ - xen/arch/x86/xen.lds.S | 9 ++++++++- - 4 files changed, 26 insertions(+), 1 deletion(-) - -diff --git a/.gitignore b/.gitignore -index d64b03d06c..8da67daf31 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -323,6 +323,7 @@ xen/xsm/flask/xenpolicy-* - tools/flask/policy/policy.conf - tools/flask/policy/xenpolicy-* - xen/xen -+xen/xen-shim - xen/xen-syms - xen/xen-syms.map - xen/xen.* -diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile -index d5d58a205e..01d1178530 100644 ---- a/xen/arch/x86/Makefile -+++ b/xen/arch/x86/Makefile -@@ -75,6 +75,8 @@ efi-y := $(shell if [ ! -r $(BASEDIR)/include/xen/compile.h -o \ - -O $(BASEDIR)/include/xen/compile.h ]; then \ - echo '$(TARGET).efi'; fi) - -+shim-$(CONFIG_PVH_GUEST) := $(TARGET)-shim -+ - ifneq ($(build_id_linker),) - notes_phdrs = --notes - else -@@ -144,6 +146,11 @@ $(TARGET)-syms: prelink.o xen.lds $(BASEDIR)/common/symbols-dummy.o - >$(@D)/$(@F).map - rm -f $(@D)/.$(@F).[0-9]* - -+# Use elf32-x86-64 if toolchain support exists, elf32-i386 otherwise. -+$(TARGET)-shim: FORMAT = $(firstword $(filter elf32-x86-64,$(shell $(OBJCOPY) --help)) elf32-i386) -+$(TARGET)-shim: $(TARGET)-syms -+ $(OBJCOPY) -O $(FORMAT) $< $@ -+ - note.o: $(TARGET)-syms - $(OBJCOPY) -O binary --only-section=.note.gnu.build-id $(BASEDIR)/xen-syms $@.bin - $(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64 \ -@@ -224,5 +231,6 @@ clean:: - rm -f asm-offsets.s *.lds boot/*.o boot/*~ boot/core boot/mkelf32 - rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d - rm -f $(BASEDIR)/.xen.efi.[0-9]* efi/*.efi efi/disabled efi/mkreloc -+ rm -f $(BASEDIR)/xen-shim - rm -f boot/cmdline.S boot/reloc.S boot/*.lnk boot/*.bin - rm -f note.o -diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S -index 9cc35da558..af25d23736 100644 ---- a/xen/arch/x86/boot/head.S -+++ b/xen/arch/x86/boot/head.S -@@ -7,6 +7,7 @@ - #include - #include - #include -+#include - - .text - .code32 -@@ -374,6 +375,14 @@ cs32_switch: - /* Jump to earlier loaded address. */ - jmp *%edi - -+#ifdef CONFIG_PVH_GUEST -+ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long sym_offs(__pvh_start)) -+ -+__pvh_start: -+ ud2a -+ -+#endif /* CONFIG_PVH_GUEST */ -+ - __start: - cld - cli -diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S -index 400d8a56c4..2023f971e4 100644 ---- a/xen/arch/x86/xen.lds.S -+++ b/xen/arch/x86/xen.lds.S -@@ -34,7 +34,7 @@ OUTPUT_ARCH(i386:x86-64) - PHDRS - { - text PT_LOAD ; --#if defined(BUILD_ID) && !defined(EFI) -+#if (defined(BUILD_ID) || defined (CONFIG_PVH_GUEST)) && !defined(EFI) - note PT_NOTE ; - #endif - } -@@ -128,6 +128,12 @@ SECTIONS - __param_end = .; - } :text - -+#if defined(CONFIG_PVH_GUEST) && !defined(EFI) -+ DECL_SECTION(.note.Xen) { -+ *(.note.Xen) -+ } :note :text -+#endif -+ - #if defined(BUILD_ID) - #if !defined(EFI) - /* -@@ -279,6 +285,7 @@ SECTIONS - #ifdef EFI - *(.comment) - *(.comment.*) -+ *(.note.Xen) - #endif - } - --- -2.14.3 - - -From db65173fe73568d0c718ce2a1c3ef8dc69c66b99 Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Mon, 13 Nov 2017 17:32:19 +0000 -Subject: [PATCH 31/77] x86/entry: Early PVH boot code - -Signed-off-by: Wei Liu -Signed-off-by: Andrew Cooper ---- -v2: -1. Fix comment. -2. Use cmpb $0. -3. Address comments on pvh-boot.c. -4. Haven't changed the pritnk modifiers to accommodate future changes. -5. Missing a prerequisite patch to relocate pvh_info to make __va work reliably. - [BLOCKER]. ---- - xen/arch/x86/Makefile | 1 + - xen/arch/x86/boot/head.S | 40 +++++++++++- - xen/arch/x86/boot/x86_64.S | 2 +- - xen/arch/x86/guest/Makefile | 1 + - xen/arch/x86/guest/pvh-boot.c | 119 +++++++++++++++++++++++++++++++++++ - xen/arch/x86/setup.c | 18 +++++- - xen/include/asm-x86/guest.h | 34 ++++++++++ - xen/include/asm-x86/guest/pvh-boot.h | 57 +++++++++++++++++ - 8 files changed, 268 insertions(+), 4 deletions(-) - create mode 100644 xen/arch/x86/guest/Makefile - create mode 100644 xen/arch/x86/guest/pvh-boot.c - create mode 100644 xen/include/asm-x86/guest.h - create mode 100644 xen/include/asm-x86/guest/pvh-boot.h - -diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile -index 01d1178530..ac91e13606 100644 ---- a/xen/arch/x86/Makefile -+++ b/xen/arch/x86/Makefile -@@ -1,6 +1,7 @@ - subdir-y += acpi - subdir-y += cpu - subdir-y += genapic -+subdir-$(CONFIG_XEN_GUEST) += guest - subdir-$(CONFIG_HVM) += hvm - subdir-y += mm - subdir-$(CONFIG_XENOPROF) += oprofile -diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S -index af25d23736..14caca6798 100644 ---- a/xen/arch/x86/boot/head.S -+++ b/xen/arch/x86/boot/head.S -@@ -379,7 +379,39 @@ cs32_switch: - ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long sym_offs(__pvh_start)) - - __pvh_start: -- ud2a -+ cld -+ cli -+ -+ /* -+ * We need one push/pop to determine load address. Use the same -+ * absolute stack address as the native path, for lack of a better -+ * alternative. -+ */ -+ mov $0x1000, %esp -+ -+ /* Calculate the load base address. */ -+ call 1f -+1: pop %esi -+ sub $sym_offs(1b), %esi -+ -+ /* Set up stack. */ -+ lea STACK_SIZE + sym_esi(cpu0_stack), %esp -+ -+ mov %ebx, sym_esi(pvh_start_info_pa) -+ -+ /* Prepare gdt and segments */ -+ add %esi, sym_esi(gdt_boot_base) -+ lgdt sym_esi(gdt_boot_descr) -+ -+ mov $BOOT_DS, %ecx -+ mov %ecx, %ds -+ mov %ecx, %es -+ mov %ecx, %ss -+ -+ /* Skip bootloader setup and bios setup, go straight to trampoline */ -+ movb $1, sym_esi(pvh_boot) -+ movb $1, sym_esi(skip_realmode) -+ jmp trampoline_setup - - #endif /* CONFIG_PVH_GUEST */ - -@@ -543,12 +575,18 @@ trampoline_setup: - /* Get bottom-most low-memory stack address. */ - add $TRAMPOLINE_SPACE,%ecx - -+#ifdef CONFIG_PVH_GUEST -+ cmpb $0, sym_fs(pvh_boot) -+ jne 1f -+#endif -+ - /* Save the Multiboot info struct (after relocation) for later use. */ - push %ecx /* Bottom-most low-memory stack address. */ - push %ebx /* Multiboot information address. */ - push %eax /* Multiboot magic. */ - call reloc - mov %eax,sym_fs(multiboot_ptr) -+1: - - /* - * Now trampoline_phys points to the following structure (lowest address -diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S -index 925fd4bb0a..cf47e019f5 100644 ---- a/xen/arch/x86/boot/x86_64.S -+++ b/xen/arch/x86/boot/x86_64.S -@@ -31,7 +31,7 @@ ENTRY(__high_start) - test %ebx,%ebx - jnz start_secondary - -- /* Pass off the Multiboot info structure to C land. */ -+ /* Pass off the Multiboot info structure to C land (if applicable). */ - mov multiboot_ptr(%rip),%edi - call __start_xen - BUG /* __start_xen() shouldn't return. */ -diff --git a/xen/arch/x86/guest/Makefile b/xen/arch/x86/guest/Makefile -new file mode 100644 -index 0000000000..a5f1625ab1 ---- /dev/null -+++ b/xen/arch/x86/guest/Makefile -@@ -0,0 +1 @@ -+obj-bin-$(CONFIG_PVH_GUEST) += pvh-boot.init.o -diff --git a/xen/arch/x86/guest/pvh-boot.c b/xen/arch/x86/guest/pvh-boot.c -new file mode 100644 -index 0000000000..186e332657 ---- /dev/null -+++ b/xen/arch/x86/guest/pvh-boot.c -@@ -0,0 +1,119 @@ -+/****************************************************************************** -+ * arch/x86/guest/pvh-boot.c -+ * -+ * PVH boot time support -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; If not, see . -+ * -+ * Copyright (c) 2017 Citrix Systems Ltd. -+ */ -+#include -+#include -+#include -+ -+#include -+ -+#include -+ -+/* Initialised in head.S, before .bss is zeroed. */ -+bool __initdata pvh_boot; -+uint32_t __initdata pvh_start_info_pa; -+ -+static multiboot_info_t __initdata pvh_mbi; -+static module_t __initdata pvh_mbi_mods[8]; -+static const char *__initdata pvh_loader = "PVH Directboot"; -+ -+static void __init convert_pvh_info(void) -+{ -+ const struct hvm_start_info *pvh_info = __va(pvh_start_info_pa); -+ const struct hvm_modlist_entry *entry; -+ module_t *mod; -+ unsigned int i; -+ -+ ASSERT(pvh_info->magic == XEN_HVM_START_MAGIC_VALUE); -+ -+ /* -+ * Turn hvm_start_info into mbi. Luckily all modules are placed under 4GB -+ * boundary on x86. -+ */ -+ pvh_mbi.flags = MBI_CMDLINE | MBI_MODULES | MBI_LOADERNAME; -+ -+ BUG_ON(pvh_info->cmdline_paddr >> 32); -+ pvh_mbi.cmdline = pvh_info->cmdline_paddr; -+ pvh_mbi.boot_loader_name = __pa(pvh_loader); -+ -+ BUG_ON(pvh_info->nr_modules >= ARRAY_SIZE(pvh_mbi_mods)); -+ pvh_mbi.mods_count = pvh_info->nr_modules; -+ pvh_mbi.mods_addr = __pa(pvh_mbi_mods); -+ -+ mod = pvh_mbi_mods; -+ entry = __va(pvh_info->modlist_paddr); -+ for ( i = 0; i < pvh_info->nr_modules; i++ ) -+ { -+ BUG_ON(entry[i].paddr >> 32); -+ BUG_ON(entry[i].cmdline_paddr >> 32); -+ -+ mod[i].mod_start = entry[i].paddr; -+ mod[i].mod_end = entry[i].paddr + entry[i].size; -+ mod[i].string = entry[i].cmdline_paddr; -+ } -+} -+ -+multiboot_info_t *__init pvh_init(void) -+{ -+ convert_pvh_info(); -+ -+ return &pvh_mbi; -+} -+ -+void __init pvh_print_info(void) -+{ -+ const struct hvm_start_info *pvh_info = __va(pvh_start_info_pa); -+ const struct hvm_modlist_entry *entry; -+ unsigned int i; -+ -+ ASSERT(pvh_info->magic == XEN_HVM_START_MAGIC_VALUE); -+ -+ printk("PVH start info: (pa %08x)\n", pvh_start_info_pa); -+ printk(" version: %u\n", pvh_info->version); -+ printk(" flags: %#"PRIx32"\n", pvh_info->flags); -+ printk(" nr_modules: %u\n", pvh_info->nr_modules); -+ printk(" modlist_pa: %016"PRIx64"\n", pvh_info->modlist_paddr); -+ printk(" cmdline_pa: %016"PRIx64"\n", pvh_info->cmdline_paddr); -+ if ( pvh_info->cmdline_paddr ) -+ printk(" cmdline: '%s'\n", (char *)__va(pvh_info->cmdline_paddr)); -+ printk(" rsdp_pa: %016"PRIx64"\n", pvh_info->rsdp_paddr); -+ -+ entry = __va(pvh_info->modlist_paddr); -+ for ( i = 0; i < pvh_info->nr_modules; i++ ) -+ { -+ printk(" mod[%u].pa: %016"PRIx64"\n", i, entry[i].paddr); -+ printk(" mod[%u].size: %016"PRIu64"\n", i, entry[i].size); -+ printk(" mod[%u].cmdline_pa: %016"PRIx64"\n", -+ i, entry[i].cmdline_paddr); -+ if ( entry[i].cmdline_paddr ) -+ printk(" mod[%1u].cmdline: '%s'\n", i, -+ (char *)__va(entry[i].cmdline_paddr)); -+ } -+} -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index 2e10c6bdf4..4b8d09b751 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -51,6 +51,7 @@ - #include - #include - #include -+#include - - /* opt_nosmp: If true, secondary processors are ignored. */ - static bool __initdata opt_nosmp; -@@ -649,8 +650,8 @@ void __init noreturn __start_xen(unsigned long mbi_p) - char *memmap_type = NULL; - char *cmdline, *kextra, *loader; - unsigned int initrdidx, domcr_flags = DOMCRF_s3_integrity; -- multiboot_info_t *mbi = __va(mbi_p); -- module_t *mod = (module_t *)__va(mbi->mods_addr); -+ multiboot_info_t *mbi; -+ module_t *mod; - unsigned long nr_pages, raw_max_page, modules_headroom, *module_map; - int i, j, e820_warn = 0, bytes = 0; - bool acpi_boot_table_init_done = false, relocated = false; -@@ -680,6 +681,16 @@ void __init noreturn __start_xen(unsigned long mbi_p) - - /* Full exception support from here on in. */ - -+ if ( pvh_boot ) -+ { -+ ASSERT(mbi_p == 0); -+ mbi = pvh_init(); -+ } -+ else -+ mbi = __va(mbi_p); -+ -+ mod = __va(mbi->mods_addr); -+ - loader = (mbi->flags & MBI_LOADERNAME) - ? (char *)__va(mbi->boot_loader_name) : "unknown"; - -@@ -719,6 +730,9 @@ void __init noreturn __start_xen(unsigned long mbi_p) - ehci_dbgp_init(); - console_init_preirq(); - -+ if ( pvh_boot ) -+ pvh_print_info(); -+ - printk("Bootloader: %s\n", loader); - - printk("Command line: %s\n", cmdline); -diff --git a/xen/include/asm-x86/guest.h b/xen/include/asm-x86/guest.h -new file mode 100644 -index 0000000000..630c092c25 ---- /dev/null -+++ b/xen/include/asm-x86/guest.h -@@ -0,0 +1,34 @@ -+/****************************************************************************** -+ * asm-x86/guest.h -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms and conditions of the GNU General Public -+ * License, version 2, as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public -+ * License along with this program; If not, see . -+ * -+ * Copyright (c) 2017 Citrix Systems Ltd. -+ */ -+ -+#ifndef __X86_GUEST_H__ -+#define __X86_GUEST_H__ -+ -+#include -+ -+#endif /* __X86_GUEST_H__ */ -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ -diff --git a/xen/include/asm-x86/guest/pvh-boot.h b/xen/include/asm-x86/guest/pvh-boot.h -new file mode 100644 -index 0000000000..1b429f9401 ---- /dev/null -+++ b/xen/include/asm-x86/guest/pvh-boot.h -@@ -0,0 +1,57 @@ -+/****************************************************************************** -+ * asm-x86/guest/pvh-boot.h -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms and conditions of the GNU General Public -+ * License, version 2, as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public -+ * License along with this program; If not, see . -+ * -+ * Copyright (c) 2017 Citrix Systems Ltd. -+ */ -+ -+#ifndef __X86_PVH_BOOT_H__ -+#define __X86_PVH_BOOT_H__ -+ -+#include -+ -+#ifdef CONFIG_PVH_GUEST -+ -+extern bool pvh_boot; -+ -+multiboot_info_t *pvh_init(void); -+void pvh_print_info(void); -+ -+#else -+ -+#define pvh_boot 0 -+ -+static inline multiboot_info_t *pvh_init(void) -+{ -+ ASSERT_UNREACHABLE(); -+ return NULL; -+} -+ -+static inline void pvh_print_info(void) -+{ -+ ASSERT_UNREACHABLE(); -+} -+ -+#endif /* CONFIG_PVH_GUEST */ -+#endif /* __X86_PVH_BOOT_H__ */ -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ --- -2.14.3 - - -From 31b664a93f5efd8f40889d04028881c18b76a5a3 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 22 Nov 2017 11:39:04 +0000 -Subject: [PATCH 32/77] x86/boot: Map more than the first 16MB - -TODO: Replace somehow (bootstrap_map() ?) - -Signed-off-by: Andrew Cooper ---- - xen/arch/x86/boot/x86_64.S | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S -index cf47e019f5..42636cf334 100644 ---- a/xen/arch/x86/boot/x86_64.S -+++ b/xen/arch/x86/boot/x86_64.S -@@ -114,11 +114,10 @@ GLOBAL(__page_tables_start) - GLOBAL(l2_identmap) - .quad sym_offs(l1_identmap) + __PAGE_HYPERVISOR - idx = 1 -- .rept 7 -+ .rept 4 * L2_PAGETABLE_ENTRIES - 1 - .quad (idx << L2_PAGETABLE_SHIFT) | PAGE_HYPERVISOR | _PAGE_PSE - idx = idx + 1 - .endr -- .fill 4 * L2_PAGETABLE_ENTRIES - 8, 8, 0 - .size l2_identmap, . - l2_identmap - - /* --- -2.14.3 - - -From 3d1afab1f6a092006b5bbd36a84186203989d846 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 28 Nov 2017 14:53:51 +0000 -Subject: [PATCH 33/77] x86/entry: Probe for Xen early during boot - -Signed-off-by: Andrew Cooper ---- -v2: Add __read_mostly. ---- - xen/arch/x86/guest/Makefile | 2 ++ - xen/arch/x86/guest/xen.c | 75 +++++++++++++++++++++++++++++++++++++++++ - xen/arch/x86/setup.c | 2 ++ - xen/include/asm-x86/guest.h | 1 + - xen/include/asm-x86/guest/xen.h | 47 ++++++++++++++++++++++++++ - 5 files changed, 127 insertions(+) - create mode 100644 xen/arch/x86/guest/xen.c - create mode 100644 xen/include/asm-x86/guest/xen.h - -diff --git a/xen/arch/x86/guest/Makefile b/xen/arch/x86/guest/Makefile -index a5f1625ab1..1345a60c81 100644 ---- a/xen/arch/x86/guest/Makefile -+++ b/xen/arch/x86/guest/Makefile -@@ -1 +1,3 @@ -+obj-y += xen.o -+ - obj-bin-$(CONFIG_PVH_GUEST) += pvh-boot.init.o -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -new file mode 100644 -index 0000000000..8507757841 ---- /dev/null -+++ b/xen/arch/x86/guest/xen.c -@@ -0,0 +1,75 @@ -+/****************************************************************************** -+ * arch/x86/guest/xen.c -+ * -+ * Support for detecting and running under Xen. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; If not, see . -+ * -+ * Copyright (c) 2017 Citrix Systems Ltd. -+ */ -+#include -+#include -+ -+#include -+#include -+ -+#include -+ -+bool __read_mostly xen_guest; -+ -+static __read_mostly uint32_t xen_cpuid_base; -+ -+static void __init find_xen_leaves(void) -+{ -+ uint32_t eax, ebx, ecx, edx, base; -+ -+ for ( base = XEN_CPUID_FIRST_LEAF; -+ base < XEN_CPUID_FIRST_LEAF + 0x10000; base += 0x100 ) -+ { -+ cpuid(base, &eax, &ebx, &ecx, &edx); -+ -+ if ( (ebx == XEN_CPUID_SIGNATURE_EBX) && -+ (ecx == XEN_CPUID_SIGNATURE_ECX) && -+ (edx == XEN_CPUID_SIGNATURE_EDX) && -+ ((eax - base) >= 2) ) -+ { -+ xen_cpuid_base = base; -+ break; -+ } -+ } -+} -+ -+void __init probe_hypervisor(void) -+{ -+ /* Too early to use cpu_has_hypervisor */ -+ if ( !(cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_HYPERVISOR)) ) -+ return; -+ -+ find_xen_leaves(); -+ -+ if ( !xen_cpuid_base ) -+ return; -+ -+ xen_guest = true; -+} -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index 4b8d09b751..d8059f23b5 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -715,6 +715,8 @@ void __init noreturn __start_xen(unsigned long mbi_p) - * allocing any xenheap structures wanted in lower memory. */ - kexec_early_calculations(); - -+ probe_hypervisor(); -+ - parse_video_info(); - - rdmsrl(MSR_EFER, this_cpu(efer)); -diff --git a/xen/include/asm-x86/guest.h b/xen/include/asm-x86/guest.h -index 630c092c25..8d91f81451 100644 ---- a/xen/include/asm-x86/guest.h -+++ b/xen/include/asm-x86/guest.h -@@ -20,6 +20,7 @@ - #define __X86_GUEST_H__ - - #include -+#include - - #endif /* __X86_GUEST_H__ */ - -diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h -new file mode 100644 -index 0000000000..97a7c8d531 ---- /dev/null -+++ b/xen/include/asm-x86/guest/xen.h -@@ -0,0 +1,47 @@ -+/****************************************************************************** -+ * asm-x86/guest/xen.h -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms and conditions of the GNU General Public -+ * License, version 2, as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public -+ * License along with this program; If not, see . -+ * -+ * Copyright (c) 2017 Citrix Systems Ltd. -+ */ -+ -+#ifndef __X86_GUEST_XEN_H__ -+#define __X86_GUEST_XEN_H__ -+ -+#include -+ -+#ifdef CONFIG_XEN_GUEST -+ -+extern bool xen_guest; -+ -+void probe_hypervisor(void); -+ -+#else -+ -+#define xen_guest 0 -+ -+static inline void probe_hypervisor(void) {}; -+ -+#endif /* CONFIG_XEN_GUEST */ -+#endif /* __X86_GUEST_XEN_H__ */ -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ --- -2.14.3 - - -From b38cc15b2f6170e0a8864aa9f151cc0e4b388c3f Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 21 Nov 2017 13:54:47 +0000 -Subject: [PATCH 34/77] x86/guest: Hypercall support - -Signed-off-by: Andrew Cooper -Signed-off-by: Wei Liu ---- -v2: append underscores to tmp. ---- - xen/arch/x86/guest/Makefile | 1 + - xen/arch/x86/guest/hypercall_page.S | 79 ++++++++++++++++++++++++++++++ - xen/arch/x86/guest/xen.c | 5 ++ - xen/arch/x86/xen.lds.S | 1 + - xen/include/asm-x86/guest.h | 1 + - xen/include/asm-x86/guest/hypercall.h | 92 +++++++++++++++++++++++++++++++++++ - 6 files changed, 179 insertions(+) - create mode 100644 xen/arch/x86/guest/hypercall_page.S - create mode 100644 xen/include/asm-x86/guest/hypercall.h - -diff --git a/xen/arch/x86/guest/Makefile b/xen/arch/x86/guest/Makefile -index 1345a60c81..26fb4b1007 100644 ---- a/xen/arch/x86/guest/Makefile -+++ b/xen/arch/x86/guest/Makefile -@@ -1,3 +1,4 @@ -+obj-y += hypercall_page.o - obj-y += xen.o - - obj-bin-$(CONFIG_PVH_GUEST) += pvh-boot.init.o -diff --git a/xen/arch/x86/guest/hypercall_page.S b/xen/arch/x86/guest/hypercall_page.S -new file mode 100644 -index 0000000000..fdd2e72272 ---- /dev/null -+++ b/xen/arch/x86/guest/hypercall_page.S -@@ -0,0 +1,79 @@ -+#include -+#include -+#include -+ -+ .section ".text.page_aligned", "ax", @progbits -+ .p2align PAGE_SHIFT -+ -+GLOBAL(hypercall_page) -+ /* Poisoned with `ret` for safety before hypercalls are set up. */ -+ .fill PAGE_SIZE, 1, 0xc3 -+ .type hypercall_page, STT_OBJECT -+ .size hypercall_page, PAGE_SIZE -+ -+/* -+ * Identify a specific hypercall in the hypercall page -+ * @param name Hypercall name. -+ */ -+#define DECLARE_HYPERCALL(name) \ -+ .globl HYPERCALL_ ## name; \ -+ .set HYPERCALL_ ## name, hypercall_page + __HYPERVISOR_ ## name * 32; \ -+ .type HYPERCALL_ ## name, STT_FUNC; \ -+ .size HYPERCALL_ ## name, 32 -+ -+DECLARE_HYPERCALL(set_trap_table) -+DECLARE_HYPERCALL(mmu_update) -+DECLARE_HYPERCALL(set_gdt) -+DECLARE_HYPERCALL(stack_switch) -+DECLARE_HYPERCALL(set_callbacks) -+DECLARE_HYPERCALL(fpu_taskswitch) -+DECLARE_HYPERCALL(sched_op_compat) -+DECLARE_HYPERCALL(platform_op) -+DECLARE_HYPERCALL(set_debugreg) -+DECLARE_HYPERCALL(get_debugreg) -+DECLARE_HYPERCALL(update_descriptor) -+DECLARE_HYPERCALL(memory_op) -+DECLARE_HYPERCALL(multicall) -+DECLARE_HYPERCALL(update_va_mapping) -+DECLARE_HYPERCALL(set_timer_op) -+DECLARE_HYPERCALL(event_channel_op_compat) -+DECLARE_HYPERCALL(xen_version) -+DECLARE_HYPERCALL(console_io) -+DECLARE_HYPERCALL(physdev_op_compat) -+DECLARE_HYPERCALL(grant_table_op) -+DECLARE_HYPERCALL(vm_assist) -+DECLARE_HYPERCALL(update_va_mapping_otherdomain) -+DECLARE_HYPERCALL(iret) -+DECLARE_HYPERCALL(vcpu_op) -+DECLARE_HYPERCALL(set_segment_base) -+DECLARE_HYPERCALL(mmuext_op) -+DECLARE_HYPERCALL(xsm_op) -+DECLARE_HYPERCALL(nmi_op) -+DECLARE_HYPERCALL(sched_op) -+DECLARE_HYPERCALL(callback_op) -+DECLARE_HYPERCALL(xenoprof_op) -+DECLARE_HYPERCALL(event_channel_op) -+DECLARE_HYPERCALL(physdev_op) -+DECLARE_HYPERCALL(hvm_op) -+DECLARE_HYPERCALL(sysctl) -+DECLARE_HYPERCALL(domctl) -+DECLARE_HYPERCALL(kexec_op) -+DECLARE_HYPERCALL(tmem_op) -+DECLARE_HYPERCALL(xc_reserved_op) -+DECLARE_HYPERCALL(xenpmu_op) -+ -+DECLARE_HYPERCALL(arch_0) -+DECLARE_HYPERCALL(arch_1) -+DECLARE_HYPERCALL(arch_2) -+DECLARE_HYPERCALL(arch_3) -+DECLARE_HYPERCALL(arch_4) -+DECLARE_HYPERCALL(arch_5) -+DECLARE_HYPERCALL(arch_6) -+DECLARE_HYPERCALL(arch_7) -+ -+/* -+ * Local variables: -+ * tab-width: 8 -+ * indent-tabs-mode: nil -+ * End: -+ */ -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index 8507757841..10b90d0f61 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -22,6 +22,7 @@ - #include - - #include -+#include - #include - - #include -@@ -29,6 +30,7 @@ - bool __read_mostly xen_guest; - - static __read_mostly uint32_t xen_cpuid_base; -+extern char hypercall_page[]; - - static void __init find_xen_leaves(void) - { -@@ -61,6 +63,9 @@ void __init probe_hypervisor(void) - if ( !xen_cpuid_base ) - return; - -+ /* Fill the hypercall page. */ -+ wrmsrl(cpuid_ebx(xen_cpuid_base + 2), __pa(hypercall_page)); -+ - xen_guest = true; - } - -diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S -index 2023f971e4..509f176913 100644 ---- a/xen/arch/x86/xen.lds.S -+++ b/xen/arch/x86/xen.lds.S -@@ -65,6 +65,7 @@ SECTIONS - DECL_SECTION(.text) { - _stext = .; /* Text and read-only data */ - *(.text) -+ *(.text.page_aligned) - *(.text.cold) - *(.text.unlikely) - *(.fixup) -diff --git a/xen/include/asm-x86/guest.h b/xen/include/asm-x86/guest.h -index 8d91f81451..5abdb8c433 100644 ---- a/xen/include/asm-x86/guest.h -+++ b/xen/include/asm-x86/guest.h -@@ -19,6 +19,7 @@ - #ifndef __X86_GUEST_H__ - #define __X86_GUEST_H__ - -+#include - #include - #include - -diff --git a/xen/include/asm-x86/guest/hypercall.h b/xen/include/asm-x86/guest/hypercall.h -new file mode 100644 -index 0000000000..d959c3dd8a ---- /dev/null -+++ b/xen/include/asm-x86/guest/hypercall.h -@@ -0,0 +1,92 @@ -+/****************************************************************************** -+ * asm-x86/guest/hypercall.h -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms and conditions of the GNU General Public -+ * License, version 2, as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public -+ * License along with this program; If not, see . -+ * -+ * Copyright (c) 2017 Citrix Systems Ltd. -+ */ -+ -+#ifndef __X86_XEN_HYPERCALL_H__ -+#define __X86_XEN_HYPERCALL_H__ -+ -+#ifdef CONFIG_XEN_GUEST -+ -+/* -+ * Hypercall primatives for 64bit -+ * -+ * Inputs: %rdi, %rsi, %rdx, %r10, %r8, %r9 (arguments 1-6) -+ */ -+ -+#define _hypercall64_1(type, hcall, a1) \ -+ ({ \ -+ long res, tmp__; \ -+ asm volatile ( \ -+ "call hypercall_page + %c[offset]" \ -+ : "=a" (res), "=D" (tmp__) \ -+ : [offset] "i" (hcall * 32), \ -+ "1" ((long)(a1)) \ -+ : "memory" ); \ -+ (type)res; \ -+ }) -+ -+#define _hypercall64_2(type, hcall, a1, a2) \ -+ ({ \ -+ long res, tmp__; \ -+ asm volatile ( \ -+ "call hypercall_page + %c[offset]" \ -+ : "=a" (res), "=D" (tmp__), "=S" (tmp__) \ -+ : [offset] "i" (hcall * 32), \ -+ "1" ((long)(a1)), "2" ((long)(a2)) \ -+ : "memory" ); \ -+ (type)res; \ -+ }) -+ -+#define _hypercall64_3(type, hcall, a1, a2, a3) \ -+ ({ \ -+ long res, tmp__; \ -+ asm volatile ( \ -+ "call hypercall_page + %c[offset]" \ -+ : "=a" (res), "=D" (tmp__), "=S" (tmp__), "=d" (tmp__) \ -+ : [offset] "i" (hcall * 32), \ -+ "1" ((long)(a1)), "2" ((long)(a2)), "3" ((long)(a3)) \ -+ : "memory" ); \ -+ (type)res; \ -+ }) -+ -+#define _hypercall64_4(type, hcall, a1, a2, a3, a4) \ -+ ({ \ -+ long res, tmp__; \ -+ register long _a4 asm ("r10") = ((long)(a4)); \ -+ asm volatile ( \ -+ "call hypercall_page + %c[offset]" \ -+ : "=a" (res), "=D" (tmp__), "=S" (tmp__), "=d" (tmp__), \ -+ "=&r" (tmp__) \ -+ : [offset] "i" (hcall * 32), \ -+ "1" ((long)(a1)), "2" ((long)(a2)), "3" ((long)(a3)), \ -+ "4" (_a4) \ -+ : "memory" ); \ -+ (type)res; \ -+ }) -+ -+#endif /* CONFIG_XEN_GUEST */ -+#endif /* __X86_XEN_HYPERCALL_H__ */ -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ --- -2.14.3 - - -From 9752c7422b9193e18523d9c443bc0dad7ae0c7c7 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 21 Nov 2017 14:43:32 +0000 -Subject: [PATCH 35/77] x86/shutdown: Support for using - SCHEDOP_{shutdown,reboot} - -Signed-off-by: Andrew Cooper -Signed-off-by: Wei Liu -Reviewed-by: Jan Beulich ---- -v2: -1. Use sched_shutdown -2. Move header inclusion ---- - docs/misc/xen-command-line.markdown | 3 +++ - xen/arch/x86/shutdown.c | 34 ++++++++++++++++++++++++++++++---- - xen/include/asm-x86/guest/hypercall.h | 32 ++++++++++++++++++++++++++++++++ - 3 files changed, 65 insertions(+), 4 deletions(-) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 781110d4b2..e5979bceee 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -1478,6 +1478,9 @@ Specify the host reboot method. - 'efi' instructs Xen to reboot using the EFI reboot call (in EFI mode by - default it will use that method first). - -+`xen` instructs Xen to reboot using Xen's SCHEDOP hypercall (this is the default -+when running nested Xen) -+ - ### rmrr - > '= start<-end>=[s1]bdf1[,[s1]bdf2[,...]];start<-end>=[s2]bdf1[,[s2]bdf2[,...]] - -diff --git a/xen/arch/x86/shutdown.c b/xen/arch/x86/shutdown.c -index a87aa60add..689f6f137d 100644 ---- a/xen/arch/x86/shutdown.c -+++ b/xen/arch/x86/shutdown.c -@@ -25,6 +25,7 @@ - #include - #include - #include -+#include - - enum reboot_type { - BOOT_INVALID, -@@ -34,6 +35,7 @@ enum reboot_type { - BOOT_CF9 = 'p', - BOOT_CF9_PWR = 'P', - BOOT_EFI = 'e', -+ BOOT_XEN = 'x', - }; - - static int reboot_mode; -@@ -49,6 +51,7 @@ static int reboot_mode; - * pci Use the so-called "PCI reset register", CF9 - * Power Like 'pci' but for a full power-cyle reset - * efi Use the EFI reboot (if running under EFI) -+ * xen Use Xen SCHEDOP hypercall (if running under Xen as a guest) - */ - static enum reboot_type reboot_type = BOOT_INVALID; - -@@ -75,6 +78,7 @@ static int __init set_reboot_type(const char *str) - case 'P': - case 'p': - case 't': -+ case 'x': - reboot_type = *str; - break; - default: -@@ -93,6 +97,13 @@ static int __init set_reboot_type(const char *str) - reboot_type = BOOT_INVALID; - } - -+ if ( reboot_type == BOOT_XEN && !xen_guest ) -+ { -+ printk("Xen reboot selected, but Xen hypervisor not detected\n" -+ "Falling back to default\n"); -+ reboot_type = BOOT_INVALID; -+ } -+ - return rc; - } - custom_param("reboot", set_reboot_type); -@@ -109,6 +120,10 @@ static inline void kb_wait(void) - static void noreturn __machine_halt(void *unused) - { - local_irq_disable(); -+ -+ if ( reboot_type == BOOT_XEN ) -+ xen_hypercall_shutdown(SHUTDOWN_poweroff); -+ - for ( ; ; ) - halt(); - } -@@ -129,10 +144,17 @@ void machine_halt(void) - - static void default_reboot_type(void) - { -- if ( reboot_type == BOOT_INVALID ) -- reboot_type = efi_enabled(EFI_RS) ? BOOT_EFI -- : acpi_disabled ? BOOT_KBD -- : BOOT_ACPI; -+ if ( reboot_type != BOOT_INVALID ) -+ return; -+ -+ if ( xen_guest ) -+ reboot_type = BOOT_XEN; -+ else if ( efi_enabled(EFI_RS) ) -+ reboot_type = BOOT_EFI; -+ else if ( acpi_disabled ) -+ reboot_type = BOOT_KBD; -+ else -+ reboot_type = BOOT_ACPI; - } - - static int __init override_reboot(struct dmi_system_id *d) -@@ -618,6 +640,10 @@ void machine_restart(unsigned int delay_millisecs) - } - reboot_type = BOOT_ACPI; - break; -+ -+ case BOOT_XEN: -+ xen_hypercall_shutdown(SHUTDOWN_reboot); -+ break; - } - } - } -diff --git a/xen/include/asm-x86/guest/hypercall.h b/xen/include/asm-x86/guest/hypercall.h -index d959c3dd8a..a05041d30b 100644 ---- a/xen/include/asm-x86/guest/hypercall.h -+++ b/xen/include/asm-x86/guest/hypercall.h -@@ -21,6 +21,11 @@ - - #ifdef CONFIG_XEN_GUEST - -+#include -+ -+#include -+#include -+ - /* - * Hypercall primatives for 64bit - * -@@ -78,6 +83,33 @@ - (type)res; \ - }) - -+/* -+ * Primitive Hypercall wrappers -+ */ -+static inline long xen_hypercall_sched_op(unsigned int cmd, void *arg) -+{ -+ return _hypercall64_2(long, __HYPERVISOR_sched_op, cmd, arg); -+} -+ -+/* -+ * Higher level hypercall helpers -+ */ -+static inline long xen_hypercall_shutdown(unsigned int reason) -+{ -+ struct sched_shutdown s = { .reason = reason }; -+ return xen_hypercall_sched_op(SCHEDOP_shutdown, &s); -+} -+ -+#else /* CONFIG_XEN_GUEST */ -+ -+#include -+ -+static inline long xen_hypercall_shutdown(unsigned int reason) -+{ -+ ASSERT_UNREACHABLE(); -+ return 0; -+} -+ - #endif /* CONFIG_XEN_GUEST */ - #endif /* __X86_XEN_HYPERCALL_H__ */ - --- -2.14.3 - - -From 2f5a0121434559b2f8e5b17dc0119699684e3b17 Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Tue, 14 Nov 2017 18:19:09 +0000 -Subject: [PATCH 36/77] x86/pvh: Retrieve memory map from Xen -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Wei Liu -Signed-off-by: Andrew Cooper -Signed-off-by: Roger Pau Monné ---- -v2: fixed clang build, dropped rb tag ---- - xen/arch/x86/e820.c | 3 +-- - xen/arch/x86/guest/pvh-boot.c | 20 ++++++++++++++++++++ - xen/arch/x86/guest/xen.c | 3 +++ - xen/arch/x86/setup.c | 7 ++++++- - xen/include/asm-x86/e820.h | 1 + - xen/include/asm-x86/guest/hypercall.h | 5 +++++ - 6 files changed, 36 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c -index 7c572bade2..b422a684ee 100644 ---- a/xen/arch/x86/e820.c -+++ b/xen/arch/x86/e820.c -@@ -134,8 +134,7 @@ static struct change_member *change_point[2*E820MAX] __initdata; - static struct e820entry *overlap_list[E820MAX] __initdata; - static struct e820entry new_bios[E820MAX] __initdata; - --static int __init sanitize_e820_map(struct e820entry *biosmap, -- unsigned int *pnr_map) -+int __init sanitize_e820_map(struct e820entry *biosmap, unsigned int *pnr_map) - { - struct change_member *change_tmp; - unsigned long current_type, last_type; -diff --git a/xen/arch/x86/guest/pvh-boot.c b/xen/arch/x86/guest/pvh-boot.c -index 186e332657..be3122b16c 100644 ---- a/xen/arch/x86/guest/pvh-boot.c -+++ b/xen/arch/x86/guest/pvh-boot.c -@@ -22,6 +22,7 @@ - #include - #include - -+#include - #include - - #include -@@ -70,10 +71,29 @@ static void __init convert_pvh_info(void) - } - } - -+static void __init get_memory_map(void) -+{ -+ struct xen_memory_map memmap = { -+ .nr_entries = E820MAX, -+ }; -+ -+ set_xen_guest_handle(memmap.buffer, e820_raw.map); -+ BUG_ON(xen_hypercall_memory_op(XENMEM_memory_map, &memmap)); -+ e820_raw.nr_map = memmap.nr_entries; -+ -+ /* :( Various toolstacks don't sort the memory map. */ -+ sanitize_e820_map(e820_raw.map, &e820_raw.nr_map); -+} -+ - multiboot_info_t *__init pvh_init(void) - { - convert_pvh_info(); - -+ probe_hypervisor(); -+ ASSERT(xen_guest); -+ -+ get_memory_map(); -+ - return &pvh_mbi; - } - -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index 10b90d0f61..c253ebd983 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -54,6 +54,9 @@ static void __init find_xen_leaves(void) - - void __init probe_hypervisor(void) - { -+ if ( xen_guest ) -+ return; -+ - /* Too early to use cpu_has_hypervisor */ - if ( !(cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_HYPERVISOR)) ) - return; -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index d8059f23b5..edb43bf2cb 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -795,7 +795,12 @@ void __init noreturn __start_xen(unsigned long mbi_p) - if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) ) - panic("dom0 kernel not specified. Check bootloader configuration."); - -- if ( efi_enabled(EFI_LOADER) ) -+ if ( pvh_boot ) -+ { -+ /* pvh_init() already filled in e820_raw */ -+ memmap_type = "PVH-e820"; -+ } -+ else if ( efi_enabled(EFI_LOADER) ) - { - set_pdx_range(xen_phys_start >> PAGE_SHIFT, - (xen_phys_start + BOOTSTRAP_MAP_BASE) >> PAGE_SHIFT); -diff --git a/xen/include/asm-x86/e820.h b/xen/include/asm-x86/e820.h -index 28defa8545..ee317b17aa 100644 ---- a/xen/include/asm-x86/e820.h -+++ b/xen/include/asm-x86/e820.h -@@ -23,6 +23,7 @@ struct e820map { - struct e820entry map[E820MAX]; - }; - -+extern int sanitize_e820_map(struct e820entry *biosmap, unsigned int *pnr_map); - extern int e820_all_mapped(u64 start, u64 end, unsigned type); - extern int reserve_e820_ram(struct e820map *e820, uint64_t s, uint64_t e); - extern int e820_change_range_type( -diff --git a/xen/include/asm-x86/guest/hypercall.h b/xen/include/asm-x86/guest/hypercall.h -index a05041d30b..e0b00f97fb 100644 ---- a/xen/include/asm-x86/guest/hypercall.h -+++ b/xen/include/asm-x86/guest/hypercall.h -@@ -91,6 +91,11 @@ static inline long xen_hypercall_sched_op(unsigned int cmd, void *arg) - return _hypercall64_2(long, __HYPERVISOR_sched_op, cmd, arg); - } - -+static inline long xen_hypercall_memory_op(unsigned int cmd, void *arg) -+{ -+ return _hypercall64_2(long, __HYPERVISOR_memory_op, cmd, arg); -+} -+ - /* - * Higher level hypercall helpers - */ --- -2.14.3 - - -From 10128f33aa344f1f57584fd9ea528e1518b0d5fd Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Thu, 11 Jan 2018 10:18:09 +0000 -Subject: [PATCH 37/77] xen/console: Introduce console=xen - -This specifies whether to use Xen specific console output. There are -two variants: one is the hypervisor console, the other is the magic -debug port 0xe9. - -Signed-off-by: Andrew Cooper -Signed-off-by: Wei Liu ---- - xen/drivers/char/console.c | 46 +++++++++++++++++++++++++++++++++++ - xen/include/asm-x86/guest/hypercall.h | 13 ++++++++++ - 2 files changed, 59 insertions(+) - -diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c -index 19d0e74f17..d05ebf9f70 100644 ---- a/xen/drivers/char/console.c -+++ b/xen/drivers/char/console.c -@@ -31,6 +31,10 @@ - #include - #include - -+#ifdef CONFIG_X86 -+#include -+#endif -+ - /* console: comma-separated list of console outputs. */ - static char __initdata opt_console[30] = OPT_CONSOLE_STR; - string_param("console", opt_console); -@@ -83,6 +87,10 @@ static uint32_t conringc, conringp; - - static int __read_mostly sercon_handle = -1; - -+#ifdef CONFIG_X86 -+static bool __read_mostly opt_console_xen; /* console=xen */ -+#endif -+ - static DEFINE_SPINLOCK(console_lock); - - /* -@@ -432,6 +440,16 @@ static void notify_dom0_con_ring(unsigned long unused) - static DECLARE_SOFTIRQ_TASKLET(notify_dom0_con_ring_tasklet, - notify_dom0_con_ring, 0); - -+#ifdef CONFIG_X86 -+static inline void xen_console_write_debug_port(const char *buf, size_t len) -+{ -+ unsigned long tmp; -+ asm volatile ( "rep outsb;" -+ : "=&S" (tmp), "=&c" (tmp) -+ : "0" (buf), "1" (len), "d" (0xe9) ); -+} -+#endif -+ - static long guest_console_write(XEN_GUEST_HANDLE_PARAM(char) buffer, int count) - { - char kbuf[128]; -@@ -458,6 +476,18 @@ static long guest_console_write(XEN_GUEST_HANDLE_PARAM(char) buffer, int count) - sercon_puts(kbuf); - video_puts(kbuf); - -+#ifdef CONFIG_X86 -+ if ( opt_console_xen ) -+ { -+ size_t len = strlen(kbuf); -+ -+ if ( xen_guest ) -+ xen_hypercall_console_write(kbuf, len); -+ else -+ xen_console_write_debug_port(kbuf, len); -+ } -+#endif -+ - if ( opt_console_to_ring ) - { - conring_puts(kbuf); -@@ -567,6 +597,18 @@ static void __putstr(const char *str) - sercon_puts(str); - video_puts(str); - -+#ifdef CONFIG_X86 -+ if ( opt_console_xen ) -+ { -+ size_t len = strlen(str); -+ -+ if ( xen_guest ) -+ xen_hypercall_console_write(str, len); -+ else -+ xen_console_write_debug_port(str, len); -+ } -+#endif -+ - conring_puts(str); - - if ( !console_locks_busted ) -@@ -762,6 +804,10 @@ void __init console_init_preirq(void) - p++; - if ( !strncmp(p, "vga", 3) ) - video_init(); -+#ifdef CONFIG_X86 -+ else if ( !strncmp(p, "xen", 3) ) -+ opt_console_xen = true; -+#endif - else if ( !strncmp(p, "none", 4) ) - continue; - else if ( (sh = serial_parse_handle(p)) >= 0 ) -diff --git a/xen/include/asm-x86/guest/hypercall.h b/xen/include/asm-x86/guest/hypercall.h -index e0b00f97fb..9cd95d2b92 100644 ---- a/xen/include/asm-x86/guest/hypercall.h -+++ b/xen/include/asm-x86/guest/hypercall.h -@@ -99,6 +99,13 @@ static inline long xen_hypercall_memory_op(unsigned int cmd, void *arg) - /* - * Higher level hypercall helpers - */ -+static inline void xen_hypercall_console_write( -+ const char *buf, unsigned int count) -+{ -+ (void)_hypercall64_3(long, __HYPERVISOR_console_io, -+ CONSOLEIO_write, count, buf); -+} -+ - static inline long xen_hypercall_shutdown(unsigned int reason) - { - struct sched_shutdown s = { .reason = reason }; -@@ -109,6 +116,12 @@ static inline long xen_hypercall_shutdown(unsigned int reason) - - #include - -+static inline void xen_hypercall_console_write( -+ const char *buf, unsigned int count) -+{ -+ ASSERT_UNREACHABLE(); -+} -+ - static inline long xen_hypercall_shutdown(unsigned int reason) - { - ASSERT_UNREACHABLE(); --- -2.14.3 - - -From 1fa54448348d6cc36b89bb9e1729ea601013b00f Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Wed, 3 Jan 2018 16:38:54 +0000 -Subject: [PATCH 38/77] xen: introduce rangeset_claim_range - -Reserve a hole in a rangeset. - -Signed-off-by: Roger Pau Monne -Signed-off-by: Wei Liu ---- -Changes since v1: - - Change function name. - - Use a local variable instead of *s. - - Add unlikely to the !prev case. - - Move the function prototype position in the header file. ---- - xen/common/rangeset.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++ - xen/include/xen/rangeset.h | 4 +++- - 2 files changed, 55 insertions(+), 1 deletion(-) - -diff --git a/xen/common/rangeset.c b/xen/common/rangeset.c -index 6c6293c15c..ade34f6a50 100644 ---- a/xen/common/rangeset.c -+++ b/xen/common/rangeset.c -@@ -298,6 +298,58 @@ int rangeset_report_ranges( - return rc; - } - -+int rangeset_claim_range(struct rangeset *r, unsigned long size, -+ unsigned long *s) -+{ -+ struct range *prev, *next; -+ unsigned long start = 0; -+ -+ write_lock(&r->lock); -+ -+ for ( prev = NULL, next = first_range(r); -+ next; -+ prev = next, next = next_range(r, next) ) -+ { -+ if ( (next->s - start) >= size ) -+ goto insert; -+ -+ if ( next->e == ~0UL ) -+ goto out; -+ -+ start = next->e + 1; -+ } -+ -+ if ( (~0UL - start) + 1 >= size ) -+ goto insert; -+ -+ out: -+ write_unlock(&r->lock); -+ return -ENOSPC; -+ -+ insert: -+ if ( unlikely(!prev) ) -+ { -+ next = alloc_range(r); -+ if ( !next ) -+ { -+ write_unlock(&r->lock); -+ return -ENOMEM; -+ } -+ -+ next->s = start; -+ next->e = start + size - 1; -+ insert_range(r, prev, next); -+ } -+ else -+ prev->e += size; -+ -+ write_unlock(&r->lock); -+ -+ *s = start; -+ -+ return 0; -+} -+ - int rangeset_add_singleton( - struct rangeset *r, unsigned long s) - { -diff --git a/xen/include/xen/rangeset.h b/xen/include/xen/rangeset.h -index aa6408248b..1f83b1f44b 100644 ---- a/xen/include/xen/rangeset.h -+++ b/xen/include/xen/rangeset.h -@@ -55,9 +55,11 @@ void rangeset_limit( - bool_t __must_check rangeset_is_empty( - const struct rangeset *r); - --/* Add/remove/query a numeric range. */ -+/* Add/claim/remove/query a numeric range. */ - int __must_check rangeset_add_range( - struct rangeset *r, unsigned long s, unsigned long e); -+int __must_check rangeset_claim_range(struct rangeset *r, unsigned long size, -+ unsigned long *s); - int __must_check rangeset_remove_range( - struct rangeset *r, unsigned long s, unsigned long e); - bool_t __must_check rangeset_contains_range( --- -2.14.3 - - -From 83186a8e6988b8f218fce57db3a62e35d39b529a Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Wed, 3 Jan 2018 16:50:24 +0000 -Subject: [PATCH 39/77] xen/pvshim: keep track of used PFN ranges - -Simple infrastructure to keep track of PFN space usage, so that we can -use unpopulated PFNs to map special pages like shared info and grant -table. - -As rangeset depends on malloc being ready so hypervisor_setup is -introduced for things that can be initialised late in the process. - -Note that the PFN is marked as reserved at least up to 4GiB (or more -if the guest has more memory). This is not a perfect solution but -avoids using the MMIO hole below 4GiB. Ideally the shim (L1) should -have a way to ask the underlying Xen (L0) which memory regions are -populated, unpopulated, or MMIO space. - -Signed-off-by: Roger Pau Monne -Signed-off-by: Wei Liu ---- - xen/arch/x86/guest/xen.c | 56 +++++++++++++++++++++++++++++++++++++++++ - xen/arch/x86/setup.c | 3 +++ - xen/include/asm-x86/guest/xen.h | 7 ++++++ - 3 files changed, 66 insertions(+) - -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index c253ebd983..abf53ebbc6 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -19,8 +19,12 @@ - * Copyright (c) 2017 Citrix Systems Ltd. - */ - #include -+#include -+#include -+#include - #include - -+#include - #include - #include - #include -@@ -31,6 +35,7 @@ bool __read_mostly xen_guest; - - static __read_mostly uint32_t xen_cpuid_base; - extern char hypercall_page[]; -+static struct rangeset *mem; - - static void __init find_xen_leaves(void) - { -@@ -72,6 +77,57 @@ void __init probe_hypervisor(void) - xen_guest = true; - } - -+static void __init init_memmap(void) -+{ -+ unsigned int i; -+ -+ mem = rangeset_new(NULL, "host memory map", 0); -+ if ( !mem ) -+ panic("failed to allocate PFN usage rangeset"); -+ -+ /* -+ * Mark up to the last memory page (or 4GiB) as RAM. This is done because -+ * Xen doesn't know the position of possible MMIO holes, so at least try to -+ * avoid the know MMIO hole below 4GiB. Note that this is subject to future -+ * discussion and improvements. -+ */ -+ if ( rangeset_add_range(mem, 0, max_t(unsigned long, max_page - 1, -+ PFN_DOWN(GB(4) - 1))) ) -+ panic("unable to add RAM to in-use PFN rangeset"); -+ -+ for ( i = 0; i < e820.nr_map; i++ ) -+ { -+ struct e820entry *e = &e820.map[i]; -+ -+ if ( rangeset_add_range(mem, PFN_DOWN(e->addr), -+ PFN_UP(e->addr + e->size - 1)) ) -+ panic("unable to add range [%#lx, %#lx] to in-use PFN rangeset", -+ PFN_DOWN(e->addr), PFN_UP(e->addr + e->size - 1)); -+ } -+} -+ -+void __init hypervisor_setup(void) -+{ -+ init_memmap(); -+} -+ -+int hypervisor_alloc_unused_page(mfn_t *mfn) -+{ -+ unsigned long m; -+ int rc; -+ -+ rc = rangeset_claim_range(mem, 1, &m); -+ if ( !rc ) -+ *mfn = _mfn(m); -+ -+ return rc; -+} -+ -+int hypervisor_free_unused_page(mfn_t mfn) -+{ -+ return rangeset_remove_range(mem, mfn_x(mfn), mfn_x(mfn)); -+} -+ - /* - * Local variables: - * mode: C -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index edb43bf2cb..b9b97d68f5 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -1472,6 +1472,9 @@ void __init noreturn __start_xen(unsigned long mbi_p) - max_cpus = nr_cpu_ids; - } - -+ if ( xen_guest ) -+ hypervisor_setup(); -+ - /* Low mappings were only needed for some BIOS table parsing. */ - zap_low_mappings(); - -diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h -index 97a7c8d531..427837797b 100644 ---- a/xen/include/asm-x86/guest/xen.h -+++ b/xen/include/asm-x86/guest/xen.h -@@ -26,12 +26,19 @@ - extern bool xen_guest; - - void probe_hypervisor(void); -+void hypervisor_setup(void); -+int hypervisor_alloc_unused_page(mfn_t *mfn); -+int hypervisor_free_unused_page(mfn_t mfn); - - #else - - #define xen_guest 0 - - static inline void probe_hypervisor(void) {}; -+static inline void hypervisor_setup(void) -+{ -+ ASSERT_UNREACHABLE(); -+} - - #endif /* CONFIG_XEN_GUEST */ - #endif /* __X86_GUEST_XEN_H__ */ --- -2.14.3 - - -From efa15c993b600e9636cd091c626ee0c989afc62f Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Tue, 9 Jan 2018 11:19:44 +0000 -Subject: [PATCH 40/77] x86/guest: map shared_info page - -Use an unpopulated PFN in order to map it. - -Signed-off-by: Roger Pau Monne -Signed-off-by: Wei Liu -Signed-off-by: Andrew Cooper ---- -Changes since v1: - - Use an unpopulated PFN to map the shared_info page. - - Mask all event channels. - - Report XENMEM_add_to_physmap error code in case of failure. ---- - xen/arch/x86/guest/xen.c | 27 +++++++++++++++++++++++++++ - xen/include/asm-x86/fixmap.h | 3 +++ - xen/include/asm-x86/guest/xen.h | 5 +++++ - 3 files changed, 35 insertions(+) - -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index abf53ebbc6..f62f93af16 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -77,6 +77,31 @@ void __init probe_hypervisor(void) - xen_guest = true; - } - -+static void map_shared_info(void) -+{ -+ mfn_t mfn; -+ struct xen_add_to_physmap xatp = { -+ .domid = DOMID_SELF, -+ .space = XENMAPSPACE_shared_info, -+ }; -+ unsigned int i; -+ unsigned long rc; -+ -+ if ( hypervisor_alloc_unused_page(&mfn) ) -+ panic("unable to reserve shared info memory page"); -+ -+ xatp.gpfn = mfn_x(mfn); -+ rc = xen_hypercall_memory_op(XENMEM_add_to_physmap, &xatp); -+ if ( rc ) -+ panic("failed to map shared_info page: %ld", rc); -+ -+ set_fixmap(FIX_XEN_SHARED_INFO, mfn_x(mfn) << PAGE_SHIFT); -+ -+ /* Mask all upcalls */ -+ for ( i = 0; i < ARRAY_SIZE(XEN_shared_info->evtchn_mask); i++ ) -+ write_atomic(&XEN_shared_info->evtchn_mask[i], ~0ul); -+} -+ - static void __init init_memmap(void) - { - unsigned int i; -@@ -109,6 +134,8 @@ static void __init init_memmap(void) - void __init hypervisor_setup(void) - { - init_memmap(); -+ -+ map_shared_info(); - } - - int hypervisor_alloc_unused_page(mfn_t *mfn) -diff --git a/xen/include/asm-x86/fixmap.h b/xen/include/asm-x86/fixmap.h -index 51b0e7e945..ded4ddf21b 100644 ---- a/xen/include/asm-x86/fixmap.h -+++ b/xen/include/asm-x86/fixmap.h -@@ -45,6 +45,9 @@ enum fixed_addresses { - FIX_COM_BEGIN, - FIX_COM_END, - FIX_EHCI_DBGP, -+#ifdef CONFIG_XEN_GUEST -+ FIX_XEN_SHARED_INFO, -+#endif /* CONFIG_XEN_GUEST */ - /* Everything else should go further down. */ - FIX_APIC_BASE, - FIX_IO_APIC_BASE_0, -diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h -index 427837797b..f25ad4241b 100644 ---- a/xen/include/asm-x86/guest/xen.h -+++ b/xen/include/asm-x86/guest/xen.h -@@ -21,6 +21,11 @@ - - #include - -+#include -+#include -+ -+#define XEN_shared_info ((struct shared_info *)fix_to_virt(FIX_XEN_SHARED_INFO)) -+ - #ifdef CONFIG_XEN_GUEST - - extern bool xen_guest; --- -2.14.3 - - -From d2df09c92bf988af804b65a1db92d8ea82a60350 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Wed, 27 Dec 2017 09:23:01 +0000 -Subject: [PATCH 41/77] xen/guest: fetch vCPU ID from Xen -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -If available. - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -[ wei: fix non-shim build ] -Signed-off-by: Wei Liu ---- - xen/arch/x86/guest/xen.c | 23 +++++++++++++++++++++++ - xen/arch/x86/smpboot.c | 4 ++++ - xen/include/asm-x86/guest/xen.h | 7 +++++++ - 3 files changed, 34 insertions(+) - -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index f62f93af16..de8cfc6e36 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -37,6 +37,8 @@ static __read_mostly uint32_t xen_cpuid_base; - extern char hypercall_page[]; - static struct rangeset *mem; - -+DEFINE_PER_CPU(unsigned int, vcpu_id); -+ - static void __init find_xen_leaves(void) - { - uint32_t eax, ebx, ecx, edx, base; -@@ -102,6 +104,20 @@ static void map_shared_info(void) - write_atomic(&XEN_shared_info->evtchn_mask[i], ~0ul); - } - -+static void set_vcpu_id(void) -+{ -+ uint32_t eax, ebx, ecx, edx; -+ -+ ASSERT(xen_cpuid_base); -+ -+ /* Fetch vcpu id from cpuid. */ -+ cpuid(xen_cpuid_base + 4, &eax, &ebx, &ecx, &edx); -+ if ( eax & XEN_HVM_CPUID_VCPU_ID_PRESENT ) -+ this_cpu(vcpu_id) = ebx; -+ else -+ this_cpu(vcpu_id) = smp_processor_id(); -+} -+ - static void __init init_memmap(void) - { - unsigned int i; -@@ -136,6 +152,13 @@ void __init hypervisor_setup(void) - init_memmap(); - - map_shared_info(); -+ -+ set_vcpu_id(); -+} -+ -+void hypervisor_ap_setup(void) -+{ -+ set_vcpu_id(); - } - - int hypervisor_alloc_unused_page(mfn_t *mfn) -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 1609b627ae..5c7863035e 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -38,6 +38,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -373,6 +374,9 @@ void start_secondary(void *unused) - cpumask_set_cpu(cpu, &cpu_online_map); - unlock_vector_lock(); - -+ if ( xen_guest ) -+ hypervisor_ap_setup(); -+ - /* We can take interrupts now: we're officially "up". */ - local_irq_enable(); - mtrr_ap_init(); -diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h -index f25ad4241b..db35a9e628 100644 ---- a/xen/include/asm-x86/guest/xen.h -+++ b/xen/include/asm-x86/guest/xen.h -@@ -32,9 +32,12 @@ extern bool xen_guest; - - void probe_hypervisor(void); - void hypervisor_setup(void); -+void hypervisor_ap_setup(void); - int hypervisor_alloc_unused_page(mfn_t *mfn); - int hypervisor_free_unused_page(mfn_t mfn); - -+DECLARE_PER_CPU(unsigned int, vcpu_id); -+ - #else - - #define xen_guest 0 -@@ -44,6 +47,10 @@ static inline void hypervisor_setup(void) - { - ASSERT_UNREACHABLE(); - } -+static inline void hypervisor_ap_setup(void) -+{ -+ ASSERT_UNREACHABLE(); -+} - - #endif /* CONFIG_XEN_GUEST */ - #endif /* __X86_GUEST_XEN_H__ */ --- -2.14.3 - - -From 68e7a08436ed50f9ba51f9c9e88819ba0fedcc24 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 28 Dec 2017 15:22:34 +0000 -Subject: [PATCH 42/77] x86/guest: map per-cpu vcpu_info area. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Mapping the per-vcpu vcpu_info area is required in order to use more -than XEN_LEGACY_MAX_VCPUS. - -Signed-off-by: Roger Pau Monné -Signed-off-by: Wei Liu ---- -Changes since v1: - - Make vcpu_info_mapped static. - - Add a BUG_ON in case VCPUOP_register_vcpu_info fails. - - Remove one indentation level in hypervisor_setup. - - Make xen_hypercall_vcpu_op return int. ---- - xen/arch/x86/guest/xen.c | 57 +++++++++++++++++++++++++++++++++++ - xen/include/asm-x86/guest/hypercall.h | 8 +++++ - xen/include/asm-x86/guest/xen.h | 1 + - 3 files changed, 66 insertions(+) - -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index de8cfc6e36..60626ec21c 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -39,6 +39,10 @@ static struct rangeset *mem; - - DEFINE_PER_CPU(unsigned int, vcpu_id); - -+static struct vcpu_info *vcpu_info; -+static unsigned long vcpu_info_mapped[BITS_TO_LONGS(NR_CPUS)]; -+DEFINE_PER_CPU(struct vcpu_info *, vcpu_info); -+ - static void __init find_xen_leaves(void) - { - uint32_t eax, ebx, ecx, edx, base; -@@ -104,6 +108,41 @@ static void map_shared_info(void) - write_atomic(&XEN_shared_info->evtchn_mask[i], ~0ul); - } - -+static int map_vcpuinfo(void) -+{ -+ unsigned int vcpu = this_cpu(vcpu_id); -+ struct vcpu_register_vcpu_info info; -+ int rc; -+ -+ if ( !vcpu_info ) -+ { -+ this_cpu(vcpu_info) = &XEN_shared_info->vcpu_info[vcpu]; -+ return 0; -+ } -+ -+ if ( test_bit(vcpu, vcpu_info_mapped) ) -+ { -+ this_cpu(vcpu_info) = &vcpu_info[vcpu]; -+ return 0; -+ } -+ -+ info.mfn = virt_to_mfn(&vcpu_info[vcpu]); -+ info.offset = (unsigned long)&vcpu_info[vcpu] & ~PAGE_MASK; -+ rc = xen_hypercall_vcpu_op(VCPUOP_register_vcpu_info, vcpu, &info); -+ if ( rc ) -+ { -+ BUG_ON(vcpu >= XEN_LEGACY_MAX_VCPUS); -+ this_cpu(vcpu_info) = &XEN_shared_info->vcpu_info[vcpu]; -+ } -+ else -+ { -+ this_cpu(vcpu_info) = &vcpu_info[vcpu]; -+ set_bit(vcpu, vcpu_info_mapped); -+ } -+ -+ return rc; -+} -+ - static void set_vcpu_id(void) - { - uint32_t eax, ebx, ecx, edx; -@@ -154,11 +193,29 @@ void __init hypervisor_setup(void) - map_shared_info(); - - set_vcpu_id(); -+ vcpu_info = xzalloc_array(struct vcpu_info, nr_cpu_ids); -+ if ( map_vcpuinfo() ) -+ { -+ xfree(vcpu_info); -+ vcpu_info = NULL; -+ } -+ if ( !vcpu_info && nr_cpu_ids > XEN_LEGACY_MAX_VCPUS ) -+ { -+ unsigned int i; -+ -+ for ( i = XEN_LEGACY_MAX_VCPUS; i < nr_cpu_ids; i++ ) -+ __cpumask_clear_cpu(i, &cpu_present_map); -+ nr_cpu_ids = XEN_LEGACY_MAX_VCPUS; -+ printk(XENLOG_WARNING -+ "unable to map vCPU info, limiting vCPUs to: %u\n", -+ XEN_LEGACY_MAX_VCPUS); -+ } - } - - void hypervisor_ap_setup(void) - { - set_vcpu_id(); -+ map_vcpuinfo(); - } - - int hypervisor_alloc_unused_page(mfn_t *mfn) -diff --git a/xen/include/asm-x86/guest/hypercall.h b/xen/include/asm-x86/guest/hypercall.h -index 9cd95d2b92..dbc57a566e 100644 ---- a/xen/include/asm-x86/guest/hypercall.h -+++ b/xen/include/asm-x86/guest/hypercall.h -@@ -26,6 +26,8 @@ - #include - #include - -+#include -+ - /* - * Hypercall primatives for 64bit - * -@@ -96,6 +98,12 @@ static inline long xen_hypercall_memory_op(unsigned int cmd, void *arg) - return _hypercall64_2(long, __HYPERVISOR_memory_op, cmd, arg); - } - -+static inline int xen_hypercall_vcpu_op(unsigned int cmd, unsigned int vcpu, -+ void *arg) -+{ -+ return _hypercall64_3(long, __HYPERVISOR_vcpu_op, cmd, vcpu, arg); -+} -+ - /* - * Higher level hypercall helpers - */ -diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h -index db35a9e628..b3e684f756 100644 ---- a/xen/include/asm-x86/guest/xen.h -+++ b/xen/include/asm-x86/guest/xen.h -@@ -37,6 +37,7 @@ int hypervisor_alloc_unused_page(mfn_t *mfn); - int hypervisor_free_unused_page(mfn_t mfn); - - DECLARE_PER_CPU(unsigned int, vcpu_id); -+DECLARE_PER_CPU(struct vcpu_info *, vcpu_info); - - #else - --- -2.14.3 - - -From f5ca36927e87fd4fee647ca567aca01b7ab78004 Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Thu, 16 Nov 2017 17:56:18 +0000 -Subject: [PATCH 43/77] x86: xen pv clock time source -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -It is a variant of TSC clock source. - -Signed-off-by: Wei Liu -Signed-off-by: Andrew Cooper -Signed-off-by: Roger Pau Monné ---- -Changes since v1: - - Use the mapped vcpu_info. ---- - xen/arch/x86/time.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 89 insertions(+) - -diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c -index 6c20b1036d..ab866ad68d 100644 ---- a/xen/arch/x86/time.c -+++ b/xen/arch/x86/time.c -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -525,6 +526,91 @@ static struct platform_timesource __initdata plt_tsc = - .init = init_tsc, - }; - -+#ifdef CONFIG_XEN_GUEST -+/************************************************************ -+ * PLATFORM TIMER 5: XEN PV CLOCK SOURCE -+ * -+ * Xen clock source is a variant of TSC source. -+ */ -+ -+static uint64_t xen_timer_cpu_frequency(void) -+{ -+ struct vcpu_time_info *info = &this_cpu(vcpu_info)->time; -+ uint64_t freq; -+ -+ freq = 1000000000ULL << 32; -+ do_div(freq, info->tsc_to_system_mul); -+ if ( info->tsc_shift < 0 ) -+ freq <<= -info->tsc_shift; -+ else -+ freq >>= info->tsc_shift; -+ -+ return freq; -+} -+ -+static int64_t __init init_xen_timer(struct platform_timesource *pts) -+{ -+ if ( !xen_guest ) -+ return 0; -+ -+ pts->frequency = xen_timer_cpu_frequency(); -+ -+ return pts->frequency; -+} -+ -+static always_inline uint64_t read_cycle(const struct vcpu_time_info *info, -+ uint64_t tsc) -+{ -+ uint64_t delta = tsc - info->tsc_timestamp; -+ struct time_scale ts = { -+ .shift = info->tsc_shift, -+ .mul_frac = info->tsc_to_system_mul, -+ }; -+ uint64_t offset = scale_delta(delta, &ts); -+ -+ return info->system_time + offset; -+} -+ -+static uint64_t read_xen_timer(void) -+{ -+ struct vcpu_time_info *info = &this_cpu(vcpu_info)->time; -+ uint32_t version; -+ uint64_t ret; -+ uint64_t last; -+ static uint64_t last_value; -+ -+ do { -+ version = info->version & ~1; -+ /* Make sure version is read before the data */ -+ smp_rmb(); -+ -+ ret = read_cycle(info, rdtsc_ordered()); -+ /* Ignore fancy flags for now */ -+ -+ /* Make sure version is reread after the data */ -+ smp_rmb(); -+ } while ( unlikely(version != info->version) ); -+ -+ /* Maintain a monotonic global value */ -+ do { -+ last = read_atomic(&last_value); -+ if ( ret < last ) -+ return last; -+ } while ( unlikely(cmpxchg(&last_value, last, ret) != last) ); -+ -+ return ret; -+} -+ -+static struct platform_timesource __initdata plt_xen_timer = -+{ -+ .id = "xen", -+ .name = "XEN PV CLOCK", -+ .read_counter = read_xen_timer, -+ .init = init_xen_timer, -+ .counter_bits = 63, -+}; -+#endif -+ - /************************************************************ - * GENERIC PLATFORM TIMER INFRASTRUCTURE - */ -@@ -672,6 +758,9 @@ static s64 __init try_platform_timer(struct platform_timesource *pts) - static u64 __init init_platform_timer(void) - { - static struct platform_timesource * __initdata plt_timers[] = { -+#ifdef CONFIG_XEN_GUEST -+ &plt_xen_timer, -+#endif - &plt_hpet, &plt_pmtimer, &plt_pit - }; - --- -2.14.3 - - -From 949eb11d5813466f1456a6229ff01e294fb1cdeb Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Fri, 17 Nov 2017 12:46:41 +0000 -Subject: [PATCH 44/77] x86: APIC timer calibration when running as a guest - -The timer calibration currently depends on PIT. Introduce a variant -to wait for a tick's worth of time to elapse when running as a PVH -guest. - -Signed-off-by: Wei Liu -Reviewed-by: Jan Beulich ---- - xen/arch/x86/apic.c | 38 ++++++++++++++++++++++++++++++-------- - 1 file changed, 30 insertions(+), 8 deletions(-) - -diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c -index ed59440c45..5039173827 100644 ---- a/xen/arch/x86/apic.c -+++ b/xen/arch/x86/apic.c -@@ -36,6 +36,8 @@ - #include - #include - #include -+#include -+#include - - static bool __read_mostly tdt_enabled; - static bool __initdata tdt_enable = true; -@@ -1091,6 +1093,20 @@ static void setup_APIC_timer(void) - local_irq_restore(flags); - } - -+static void wait_tick_pvh(void) -+{ -+ u64 lapse_ns = 1000000000ULL / HZ; -+ s_time_t start, curr_time; -+ -+ start = NOW(); -+ -+ /* Won't wrap around */ -+ do { -+ cpu_relax(); -+ curr_time = NOW(); -+ } while ( curr_time - start < lapse_ns ); -+} -+ - /* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq -@@ -1123,12 +1139,15 @@ static int __init calibrate_APIC_clock(void) - */ - __setup_APIC_LVTT(1000000000); - -- /* -- * The timer chip counts down to zero. Let's wait -- * for a wraparound to start exact measurement: -- * (the current tick might have been already half done) -- */ -- wait_8254_wraparound(); -+ if ( !xen_guest ) -+ /* -+ * The timer chip counts down to zero. Let's wait -+ * for a wraparound to start exact measurement: -+ * (the current tick might have been already half done) -+ */ -+ wait_8254_wraparound(); -+ else -+ wait_tick_pvh(); - - /* - * We wrapped around just now. Let's start: -@@ -1137,10 +1156,13 @@ static int __init calibrate_APIC_clock(void) - tt1 = apic_read(APIC_TMCCT); - - /* -- * Let's wait LOOPS wraprounds: -+ * Let's wait LOOPS ticks: - */ - for (i = 0; i < LOOPS; i++) -- wait_8254_wraparound(); -+ if ( !xen_guest ) -+ wait_8254_wraparound(); -+ else -+ wait_tick_pvh(); - - tt2 = apic_read(APIC_TMCCT); - t2 = rdtsc_ordered(); --- -2.14.3 - - -From 5a543c6f397c9e4f8068e83246967ca7bd92605c Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Fri, 17 Nov 2017 15:19:09 +0000 -Subject: [PATCH 45/77] x86: read wallclock from Xen when running in pvh mode - -Signed-off-by: Wei Liu -Reviewed-by: Jan Beulich ---- - xen/arch/x86/time.c | 32 ++++++++++++++++++++++++++++---- - 1 file changed, 28 insertions(+), 4 deletions(-) - -diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c -index ab866ad68d..2dbf1c7d75 100644 ---- a/xen/arch/x86/time.c -+++ b/xen/arch/x86/time.c -@@ -964,6 +964,30 @@ static unsigned long get_cmos_time(void) - return mktime(rtc.year, rtc.mon, rtc.day, rtc.hour, rtc.min, rtc.sec); - } - -+static unsigned long get_wallclock_time(void) -+{ -+#ifdef CONFIG_XEN_GUEST -+ if ( xen_guest ) -+ { -+ struct shared_info *sh_info = XEN_shared_info; -+ uint32_t wc_version; -+ uint64_t wc_sec; -+ -+ do { -+ wc_version = sh_info->wc_version & ~1; -+ smp_rmb(); -+ -+ wc_sec = sh_info->wc_sec; -+ smp_rmb(); -+ } while ( wc_version != sh_info->wc_version ); -+ -+ return wc_sec + read_xen_timer() / 1000000000; -+ } -+#endif -+ -+ return get_cmos_time(); -+} -+ - /*************************************************************************** - * System Time - ***************************************************************************/ -@@ -1759,8 +1783,8 @@ int __init init_xen_time(void) - - open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration); - -- /* NB. get_cmos_time() can take over one second to execute. */ -- do_settime(get_cmos_time(), 0, NOW()); -+ /* NB. get_wallclock_time() can take over one second to execute. */ -+ do_settime(get_wallclock_time(), 0, NOW()); - - /* Finish platform timer initialization. */ - try_platform_timer_tail(false); -@@ -1870,7 +1894,7 @@ int time_suspend(void) - { - if ( smp_processor_id() == 0 ) - { -- cmos_utc_offset = -get_cmos_time(); -+ cmos_utc_offset = -get_wallclock_time(); - cmos_utc_offset += get_sec(); - kill_timer(&calibration_timer); - -@@ -1897,7 +1921,7 @@ int time_resume(void) - - set_timer(&calibration_timer, NOW() + EPOCH); - -- do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW()); -+ do_settime(get_wallclock_time() + cmos_utc_offset, 0, NOW()); - - update_vcpu_system_time(current); - --- -2.14.3 - - -From 3b058a3eabf24b4b31521a49a600438b6a511739 Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Thu, 11 Jan 2018 13:45:48 +0000 -Subject: [PATCH 46/77] x86: don't swallow the first command line item in guest - mode - -Signed-off-by: Wei Liu ---- - xen/arch/x86/setup.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index b9b97d68f5..c1f4184e06 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -632,8 +632,8 @@ static char * __init cmdline_cook(char *p, const char *loader_name) - while ( *p == ' ' ) - p++; - -- /* GRUB2 does not include image name as first item on command line. */ -- if ( loader_is_grub2(loader_name) ) -+ /* GRUB2 and PVH don't not include image name as first item on command line. */ -+ if ( xen_guest || loader_is_grub2(loader_name) ) - return p; - - /* Strip image name plus whitespace. */ --- -2.14.3 - - -From cb5dc94ba74f06c574390b58695dd2b4d4971571 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Tue, 9 Jan 2018 12:51:37 +0000 -Subject: [PATCH 47/77] x86/guest: setup event channel upcall vector -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -And a dummy event channel upcall handler. - -Note that with the current code the underlying Xen (L0) must support -HVMOP_set_evtchn_upcall_vector or else event channel setup is going to -fail. This limitation can be lifted by implementing more event channel -interrupt injection methods as a backup. - -Register callback_irq to trick toolstack to think the domain is -enlightened. - -Signed-off-by: Sergey Dyasli -Signed-off-by: Andrew Cooper -Signed-off-by: Roger Pau Monné -Signed-off-by: Wei Liu ---- - xen/arch/x86/guest/xen.c | 41 +++++++++++++++++++++++++++++++++++ - xen/include/asm-x86/guest/hypercall.h | 17 +++++++++++++++ - 2 files changed, 58 insertions(+) - -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index 60626ec21c..59871170c8 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -24,6 +24,7 @@ - #include - #include - -+#include - #include - #include - #include -@@ -186,6 +187,43 @@ static void __init init_memmap(void) - } - } - -+static void xen_evtchn_upcall(struct cpu_user_regs *regs) -+{ -+ struct vcpu_info *vcpu_info = this_cpu(vcpu_info); -+ -+ vcpu_info->evtchn_upcall_pending = 0; -+ write_atomic(&vcpu_info->evtchn_pending_sel, 0); -+ -+ ack_APIC_irq(); -+} -+ -+static void init_evtchn(void) -+{ -+ static uint8_t evtchn_upcall_vector; -+ int rc; -+ -+ if ( !evtchn_upcall_vector ) -+ alloc_direct_apic_vector(&evtchn_upcall_vector, xen_evtchn_upcall); -+ -+ ASSERT(evtchn_upcall_vector); -+ -+ rc = xen_hypercall_set_evtchn_upcall_vector(this_cpu(vcpu_id), -+ evtchn_upcall_vector); -+ if ( rc ) -+ panic("Unable to set evtchn upcall vector: %d", rc); -+ -+ /* Trick toolstack to think we are enlightened */ -+ { -+ struct xen_hvm_param a = { -+ .domid = DOMID_SELF, -+ .index = HVM_PARAM_CALLBACK_IRQ, -+ .value = 1, -+ }; -+ -+ BUG_ON(xen_hypercall_hvm_op(HVMOP_set_param, &a)); -+ } -+} -+ - void __init hypervisor_setup(void) - { - init_memmap(); -@@ -210,12 +248,15 @@ void __init hypervisor_setup(void) - "unable to map vCPU info, limiting vCPUs to: %u\n", - XEN_LEGACY_MAX_VCPUS); - } -+ -+ init_evtchn(); - } - - void hypervisor_ap_setup(void) - { - set_vcpu_id(); - map_vcpuinfo(); -+ init_evtchn(); - } - - int hypervisor_alloc_unused_page(mfn_t *mfn) -diff --git a/xen/include/asm-x86/guest/hypercall.h b/xen/include/asm-x86/guest/hypercall.h -index dbc57a566e..b36a1cc189 100644 ---- a/xen/include/asm-x86/guest/hypercall.h -+++ b/xen/include/asm-x86/guest/hypercall.h -@@ -25,6 +25,7 @@ - - #include - #include -+#include - - #include - -@@ -104,6 +105,11 @@ static inline int xen_hypercall_vcpu_op(unsigned int cmd, unsigned int vcpu, - return _hypercall64_3(long, __HYPERVISOR_vcpu_op, cmd, vcpu, arg); - } - -+static inline long xen_hypercall_hvm_op(unsigned int op, void *arg) -+{ -+ return _hypercall64_2(long, __HYPERVISOR_hvm_op, op, arg); -+} -+ - /* - * Higher level hypercall helpers - */ -@@ -120,6 +126,17 @@ static inline long xen_hypercall_shutdown(unsigned int reason) - return xen_hypercall_sched_op(SCHEDOP_shutdown, &s); - } - -+static inline long xen_hypercall_set_evtchn_upcall_vector( -+ unsigned int cpu, unsigned int vector) -+{ -+ struct xen_hvm_evtchn_upcall_vector a = { -+ .vcpu = cpu, -+ .vector = vector, -+ }; -+ -+ return xen_hypercall_hvm_op(HVMOP_set_evtchn_upcall_vector, &a); -+} -+ - #else /* CONFIG_XEN_GUEST */ - - #include --- -2.14.3 - - -From 7477359b9a462d066a4819cefb6d6e60bc4defc5 Mon Sep 17 00:00:00 2001 -From: Sergey Dyasli -Date: Fri, 24 Nov 2017 11:07:32 +0000 -Subject: [PATCH 48/77] x86/guest: add PV console code - -Signed-off-by: Sergey Dyasli -Signed-off-by: Andrew Cooper -Signed-off-by: Wei Liu ---- - xen/drivers/char/Makefile | 1 + - xen/drivers/char/xen_pv_console.c | 205 ++++++++++++++++++++++++++++++++++ - xen/include/asm-x86/fixmap.h | 1 + - xen/include/asm-x86/guest/hypercall.h | 33 ++++++ - xen/include/xen/pv_console.h | 32 ++++++ - 5 files changed, 272 insertions(+) - create mode 100644 xen/drivers/char/xen_pv_console.c - create mode 100644 xen/include/xen/pv_console.h - -diff --git a/xen/drivers/char/Makefile b/xen/drivers/char/Makefile -index aa169d7961..9d48d0f2dc 100644 ---- a/xen/drivers/char/Makefile -+++ b/xen/drivers/char/Makefile -@@ -8,3 +8,4 @@ obj-$(CONFIG_HAS_SCIF) += scif-uart.o - obj-$(CONFIG_HAS_EHCI) += ehci-dbgp.o - obj-$(CONFIG_ARM) += arm-uart.o - obj-y += serial.o -+obj-$(CONFIG_XEN_GUEST) += xen_pv_console.o -diff --git a/xen/drivers/char/xen_pv_console.c b/xen/drivers/char/xen_pv_console.c -new file mode 100644 -index 0000000000..f5aca4c69e ---- /dev/null -+++ b/xen/drivers/char/xen_pv_console.c -@@ -0,0 +1,205 @@ -+/****************************************************************************** -+ * drivers/char/xen_pv_console.c -+ * -+ * A frontend driver for Xen's PV console. -+ * Can be used when Xen is running on top of Xen in pv-in-pvh mode. -+ * (Linux's name for this is hvc console) -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; If not, see . -+ * -+ * Copyright (c) 2017 Citrix Systems Ltd. -+ */ -+ -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+ -+static struct xencons_interface *cons_ring; -+static evtchn_port_t cons_evtchn; -+static serial_rx_fn cons_rx_handler; -+static DEFINE_SPINLOCK(tx_lock); -+ -+void __init pv_console_init(void) -+{ -+ long r; -+ uint64_t raw_pfn = 0, raw_evtchn = 0; -+ -+ if ( !xen_guest ) -+ { -+ printk("PV console init failed: xen_guest mode is not active!\n"); -+ return; -+ } -+ -+ r = xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_PFN, &raw_pfn); -+ if ( r < 0 ) -+ goto error; -+ -+ r = xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_EVTCHN, &raw_evtchn); -+ if ( r < 0 ) -+ goto error; -+ -+ set_fixmap(FIX_PV_CONSOLE, raw_pfn << PAGE_SHIFT); -+ cons_ring = (struct xencons_interface *)fix_to_virt(FIX_PV_CONSOLE); -+ cons_evtchn = raw_evtchn; -+ -+ printk("Initialised PV console at 0x%p with pfn %#lx and evtchn %#x\n", -+ cons_ring, raw_pfn, cons_evtchn); -+ return; -+ -+ error: -+ printk("Couldn't initialise PV console\n"); -+} -+ -+void __init pv_console_set_rx_handler(serial_rx_fn fn) -+{ -+ cons_rx_handler = fn; -+} -+ -+void __init pv_console_init_postirq(void) -+{ -+ if ( !cons_ring ) -+ return; -+ -+ xen_hypercall_evtchn_unmask(cons_evtchn); -+} -+ -+static void notify_daemon(void) -+{ -+ xen_hypercall_evtchn_send(cons_evtchn); -+} -+ -+size_t pv_console_rx(struct cpu_user_regs *regs) -+{ -+ char c; -+ XENCONS_RING_IDX cons, prod; -+ size_t recv = 0; -+ -+ if ( !cons_ring ) -+ return 0; -+ -+ /* TODO: move this somewhere */ -+ if ( !test_bit(cons_evtchn, XEN_shared_info->evtchn_pending) ) -+ return 0; -+ -+ prod = ACCESS_ONCE(cons_ring->in_prod); -+ cons = cons_ring->in_cons; -+ -+ /* -+ * Latch pointers before accessing the ring. Included compiler barrier also -+ * ensures that pointers are really read only once into local variables. -+ */ -+ smp_rmb(); -+ -+ ASSERT((prod - cons) <= sizeof(cons_ring->in)); -+ -+ while ( cons != prod ) -+ { -+ c = cons_ring->in[MASK_XENCONS_IDX(cons++, cons_ring->in)]; -+ if ( cons_rx_handler ) -+ cons_rx_handler(c, regs); -+ recv++; -+ } -+ -+ /* No need for a mem barrier because every character was already consumed */ -+ barrier(); -+ ACCESS_ONCE(cons_ring->in_cons) = cons; -+ notify_daemon(); -+ -+ clear_bit(cons_evtchn, XEN_shared_info->evtchn_pending); -+ -+ return recv; -+} -+ -+static size_t pv_ring_puts(const char *buf) -+{ -+ XENCONS_RING_IDX cons, prod; -+ size_t sent = 0, avail; -+ bool put_r = false; -+ -+ while ( buf[sent] != '\0' || put_r ) -+ { -+ cons = ACCESS_ONCE(cons_ring->out_cons); -+ prod = cons_ring->out_prod; -+ -+ /* -+ * Latch pointers before accessing the ring. Included compiler barrier -+ * ensures that pointers are really read only once into local variables. -+ */ -+ smp_rmb(); -+ -+ ASSERT((prod - cons) <= sizeof(cons_ring->out)); -+ avail = sizeof(cons_ring->out) - (prod - cons); -+ -+ if ( avail == 0 ) -+ { -+ /* Wait for xenconsoled to consume our output */ -+ xen_hypercall_sched_op(SCHEDOP_yield, NULL); -+ continue; -+ } -+ -+ while ( avail && (buf[sent] != '\0' || put_r) ) -+ { -+ if ( put_r ) -+ { -+ cons_ring->out[MASK_XENCONS_IDX(prod++, cons_ring->out)] = '\r'; -+ put_r = false; -+ } -+ else -+ { -+ cons_ring->out[MASK_XENCONS_IDX(prod++, cons_ring->out)] = -+ buf[sent]; -+ -+ /* Send '\r' for every '\n' */ -+ if ( buf[sent] == '\n' ) -+ put_r = true; -+ sent++; -+ } -+ avail--; -+ } -+ -+ /* Write to the ring before updating the pointer */ -+ smp_wmb(); -+ ACCESS_ONCE(cons_ring->out_prod) = prod; -+ notify_daemon(); -+ } -+ -+ return sent; -+} -+ -+void pv_console_puts(const char *buf) -+{ -+ unsigned long flags; -+ -+ if ( !cons_ring ) -+ return; -+ -+ spin_lock_irqsave(&tx_lock, flags); -+ pv_ring_puts(buf); -+ spin_unlock_irqrestore(&tx_lock, flags); -+} -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ -diff --git a/xen/include/asm-x86/fixmap.h b/xen/include/asm-x86/fixmap.h -index ded4ddf21b..16ccaa2c77 100644 ---- a/xen/include/asm-x86/fixmap.h -+++ b/xen/include/asm-x86/fixmap.h -@@ -46,6 +46,7 @@ enum fixed_addresses { - FIX_COM_END, - FIX_EHCI_DBGP, - #ifdef CONFIG_XEN_GUEST -+ FIX_PV_CONSOLE, - FIX_XEN_SHARED_INFO, - #endif /* CONFIG_XEN_GUEST */ - /* Everything else should go further down. */ -diff --git a/xen/include/asm-x86/guest/hypercall.h b/xen/include/asm-x86/guest/hypercall.h -index b36a1cc189..81a955d479 100644 ---- a/xen/include/asm-x86/guest/hypercall.h -+++ b/xen/include/asm-x86/guest/hypercall.h -@@ -105,6 +105,11 @@ static inline int xen_hypercall_vcpu_op(unsigned int cmd, unsigned int vcpu, - return _hypercall64_3(long, __HYPERVISOR_vcpu_op, cmd, vcpu, arg); - } - -+static inline long xen_hypercall_event_channel_op(unsigned int cmd, void *arg) -+{ -+ return _hypercall64_2(long, __HYPERVISOR_event_channel_op, cmd, arg); -+} -+ - static inline long xen_hypercall_hvm_op(unsigned int op, void *arg) - { - return _hypercall64_2(long, __HYPERVISOR_hvm_op, op, arg); -@@ -126,6 +131,34 @@ static inline long xen_hypercall_shutdown(unsigned int reason) - return xen_hypercall_sched_op(SCHEDOP_shutdown, &s); - } - -+static inline long xen_hypercall_evtchn_send(evtchn_port_t port) -+{ -+ struct evtchn_send send = { .port = port }; -+ -+ return xen_hypercall_event_channel_op(EVTCHNOP_send, &send); -+} -+ -+static inline long xen_hypercall_evtchn_unmask(evtchn_port_t port) -+{ -+ struct evtchn_unmask unmask = { .port = port }; -+ -+ return xen_hypercall_event_channel_op(EVTCHNOP_unmask, &unmask); -+} -+ -+static inline long xen_hypercall_hvm_get_param(uint32_t index, uint64_t *value) -+{ -+ struct xen_hvm_param xhv = { -+ .domid = DOMID_SELF, -+ .index = index, -+ }; -+ long ret = xen_hypercall_hvm_op(HVMOP_get_param, &xhv); -+ -+ if ( ret == 0 ) -+ *value = xhv.value; -+ -+ return ret; -+} -+ - static inline long xen_hypercall_set_evtchn_upcall_vector( - unsigned int cpu, unsigned int vector) - { -diff --git a/xen/include/xen/pv_console.h b/xen/include/xen/pv_console.h -new file mode 100644 -index 0000000000..e578b56620 ---- /dev/null -+++ b/xen/include/xen/pv_console.h -@@ -0,0 +1,32 @@ -+#ifndef __XEN_PV_CONSOLE_H__ -+#define __XEN_PV_CONSOLE_H__ -+ -+#include -+ -+#ifdef CONFIG_XEN_GUEST -+ -+void pv_console_init(void); -+void pv_console_set_rx_handler(serial_rx_fn fn); -+void pv_console_init_postirq(void); -+void pv_console_puts(const char *buf); -+size_t pv_console_rx(struct cpu_user_regs *regs); -+ -+#else -+ -+static inline void pv_console_init(void) {} -+static inline void pv_console_set_rx_handler(serial_rx_fn fn) { } -+static inline void pv_console_init_postirq(void) { } -+static inline void pv_console_puts(const char *buf) { } -+static inline size_t pv_console_rx(struct cpu_user_regs *regs) { return 0; } -+ -+#endif /* !CONFIG_XEN_GUEST */ -+#endif /* __XEN_PV_CONSOLE_H__ */ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ --- -2.14.3 - - -From aa96a59dc2290fc3084525659282a59b29eff1d5 Mon Sep 17 00:00:00 2001 -From: Sergey Dyasli -Date: Fri, 24 Nov 2017 11:21:17 +0000 -Subject: [PATCH 49/77] x86/guest: use PV console for Xen/Dom0 I/O - -Signed-off-by: Sergey Dyasli -Signed-off-by: Wei Liu ---- - docs/misc/xen-command-line.markdown | 5 ++++- - xen/arch/x86/guest/xen.c | 3 +++ - xen/drivers/char/console.c | 16 ++++++++++++++++ - 3 files changed, 23 insertions(+), 1 deletion(-) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index e5979bceee..da006dd4f7 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -365,7 +365,7 @@ The following are examples of correct specifications: - Specify the size of the console ring buffer. - - ### console --> `= List of [ vga | com1[H,L] | com2[H,L] | dbgp | none ]` -+> `= List of [ vga | com1[H,L] | com2[H,L] | pv | dbgp | none ]` - - > Default: `console=com1,vga` - -@@ -381,6 +381,9 @@ the converse; transmitted and received characters will have their MSB - cleared. This allows a single port to be shared by two subsystems - (e.g. console and debugger). - -+`pv` indicates that Xen should use Xen's PV console. This option is -+only available when used together with `pv-in-pvh`. -+ - `dbgp` indicates that Xen should use a USB debug port. - - `none` indicates that Xen should not use a console. This option only -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index 59871170c8..d4968b47aa 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -23,6 +23,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -194,6 +195,8 @@ static void xen_evtchn_upcall(struct cpu_user_regs *regs) - vcpu_info->evtchn_upcall_pending = 0; - write_atomic(&vcpu_info->evtchn_pending_sel, 0); - -+ pv_console_rx(regs); -+ - ack_APIC_irq(); - } - -diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c -index d05ebf9f70..8acd358395 100644 ---- a/xen/drivers/char/console.c -+++ b/xen/drivers/char/console.c -@@ -32,6 +32,7 @@ - #include - - #ifdef CONFIG_X86 -+#include - #include - #endif - -@@ -344,6 +345,11 @@ static void sercon_puts(const char *s) - (*serial_steal_fn)(s); - else - serial_puts(sercon_handle, s); -+ -+#ifdef CONFIG_X86 -+ /* Copy all serial output into PV console */ -+ pv_console_puts(s); -+#endif - } - - static void dump_console_ring_key(unsigned char key) -@@ -805,6 +811,8 @@ void __init console_init_preirq(void) - if ( !strncmp(p, "vga", 3) ) - video_init(); - #ifdef CONFIG_X86 -+ else if ( !strncmp(p, "pv", 2) ) -+ pv_console_init(); - else if ( !strncmp(p, "xen", 3) ) - opt_console_xen = true; - #endif -@@ -828,6 +836,10 @@ void __init console_init_preirq(void) - - serial_set_rx_handler(sercon_handle, serial_rx); - -+#ifdef CONFIG_X86 -+ pv_console_set_rx_handler(serial_rx); -+#endif -+ - /* HELLO WORLD --- start-of-day banner text. */ - spin_lock(&console_lock); - __putstr(xen_banner()); -@@ -880,6 +892,10 @@ void __init console_init_postirq(void) - { - serial_init_postirq(); - -+#ifdef CONFIG_X86 -+ pv_console_init_postirq(); -+#endif -+ - if ( conring != _conring ) - return; - --- -2.14.3 - - -From b5ead1fad3930a3e1034f64f9af416ae211e27da Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 10 Nov 2017 16:35:26 +0000 -Subject: [PATCH 50/77] x86/shim: Kconfig and command line options - -Signed-off-by: Andrew Cooper -Signed-off-by: Wei Liu ---- - docs/misc/xen-command-line.markdown | 11 ++++++++++ - xen/arch/x86/Kconfig | 22 +++++++++++++++++++ - xen/arch/x86/pv/Makefile | 1 + - xen/arch/x86/pv/shim.c | 39 ++++++++++++++++++++++++++++++++++ - xen/include/asm-x86/guest.h | 1 + - xen/include/asm-x86/pv/shim.h | 42 +++++++++++++++++++++++++++++++++++++ - 6 files changed, 116 insertions(+) - create mode 100644 xen/arch/x86/pv/shim.c - create mode 100644 xen/include/asm-x86/pv/shim.h - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index da006dd4f7..3a1a9c1fba 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -1445,6 +1445,17 @@ do; there may be other custom operating systems which do. If you're - certain you don't plan on having PV guests which use this feature, - turning it off can reduce the attack surface. - -+### pv-shim (x86) -+> `= ` -+ -+> Default: `false` -+ -+This option is intended for use by a toolstack, when choosing to run a PV -+guest compatibly inside an HVM container. -+ -+In this mode, the kernel and initrd passed as modules to the hypervisor are -+constructed into a plain unprivileged PV domain. -+ - ### rcu-idle-timer-period-ms - > `= ` - -diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig -index c0b0bcdcb3..4953533f16 100644 ---- a/xen/arch/x86/Kconfig -+++ b/xen/arch/x86/Kconfig -@@ -133,6 +133,28 @@ config PVH_GUEST - ---help--- - Support booting using the PVH ABI. - -+ If unsure, say N. -+ -+config PV_SHIM -+ def_bool n -+ prompt "PV Shim" -+ depends on PV && XEN_GUEST -+ ---help--- -+ Build Xen with a mode which acts as a shim to allow PV guest to run -+ in an HVM/PVH container. This mode can only be enabled with command -+ line option. -+ -+ If unsure, say N. -+ -+config PV_SHIM_EXCLUSIVE -+ def_bool n -+ prompt "PV Shim Exclusive" -+ depends on PV_SHIM -+ ---help--- -+ Build Xen in a way which unconditionally assumes PV_SHIM mode. This -+ option is only intended for use when building a dedicated PV Shim -+ firmware, and will not function correctly in other scenarios. -+ - If unsure, say N. - endmenu - -diff --git a/xen/arch/x86/pv/Makefile b/xen/arch/x86/pv/Makefile -index bac2792aa2..65bca04175 100644 ---- a/xen/arch/x86/pv/Makefile -+++ b/xen/arch/x86/pv/Makefile -@@ -11,6 +11,7 @@ obj-y += iret.o - obj-y += misc-hypercalls.o - obj-y += mm.o - obj-y += ro-page-fault.o -+obj-$(CONFIG_PV_SHIM) += shim.o - obj-y += traps.o - - obj-bin-y += dom0_build.init.o -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -new file mode 100644 -index 0000000000..4d037355db ---- /dev/null -+++ b/xen/arch/x86/pv/shim.c -@@ -0,0 +1,39 @@ -+/****************************************************************************** -+ * arch/x86/pv/shim.c -+ * -+ * Functionaltiy for PV Shim mode -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; If not, see . -+ * -+ * Copyright (c) 2017 Citrix Systems Ltd. -+ */ -+#include -+#include -+ -+#include -+ -+#ifndef CONFIG_PV_SHIM_EXCLUSIVE -+bool pv_shim; -+boolean_param("pv-shim", pv_shim); -+#endif -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ -diff --git a/xen/include/asm-x86/guest.h b/xen/include/asm-x86/guest.h -index 5abdb8c433..a38c6b5b3f 100644 ---- a/xen/include/asm-x86/guest.h -+++ b/xen/include/asm-x86/guest.h -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - - #endif /* __X86_GUEST_H__ */ - -diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h -new file mode 100644 -index 0000000000..1468cfd498 ---- /dev/null -+++ b/xen/include/asm-x86/pv/shim.h -@@ -0,0 +1,42 @@ -+/****************************************************************************** -+ * asm-x86/guest/shim.h -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms and conditions of the GNU General Public -+ * License, version 2, as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public -+ * License along with this program; If not, see . -+ * -+ * Copyright (c) 2017 Citrix Systems Ltd. -+ */ -+ -+#ifndef __X86_PV_SHIM_H__ -+#define __X86_PV_SHIM_H__ -+ -+#include -+ -+#if defined(CONFIG_PV_SHIM_EXCLUSIVE) -+# define pv_shim 1 -+#elif defined(CONFIG_PV_SHIM) -+extern bool pv_shim; -+#else -+# define pv_shim 0 -+#endif /* CONFIG_PV_SHIM{,_EXCLUSIVE} */ -+ -+#endif /* __X86_PV_SHIM_H__ */ -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ --- -2.14.3 - - -From 378425686619e5fae65988cfedd23d5883206c2b Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 22 Nov 2017 13:31:26 +0000 -Subject: [PATCH 51/77] tools/firmware: Build and install xen-shim -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Link a minimum set of files to build the shim. The linkfarm rune can -handle creation and deletion of files. Introduce build-shim and -install-shim targets in xen/Makefile. - -We can do better by properly generate the dependency from the list of -files but that's an improvement for later. - -Signed-off-by: Andrew Cooper -Signed-off-by: Wei Liu -[change default scheduler to credit] -Signed-off-by: Roger Pau Monné ---- -v2: Introduce a top-level build-shim target. Split the xen-shim build - with normal build. ---- - .gitignore | 4 ++ - tools/firmware/Makefile | 9 ++++ - tools/firmware/xen-dir/Makefile | 59 ++++++++++++++++++++++++++ - tools/firmware/xen-dir/shim.config | 87 ++++++++++++++++++++++++++++++++++++++ - xen/Makefile | 16 +++++-- - 5 files changed, 172 insertions(+), 3 deletions(-) - create mode 100644 tools/firmware/xen-dir/Makefile - create mode 100644 tools/firmware/xen-dir/shim.config - -diff --git a/.gitignore b/.gitignore -index 8da67daf31..f6cc61a701 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -155,6 +155,10 @@ tools/firmware/rombios/rombios[^/]*.s - tools/firmware/rombios/32bit/32bitbios_flat.h - tools/firmware/vgabios/vbetables-gen - tools/firmware/vgabios/vbetables.h -+tools/firmware/xen-dir/*.old -+tools/firmware/xen-dir/linkfarm.stamp* -+tools/firmware/xen-dir/xen-root -+tools/firmware/xen-dir/xen-shim - tools/flask/utils/flask-getenforce - tools/flask/utils/flask-get-bool - tools/flask/utils/flask-loadpolicy -diff --git a/tools/firmware/Makefile b/tools/firmware/Makefile -index 868b506920..9387cc0878 100644 ---- a/tools/firmware/Makefile -+++ b/tools/firmware/Makefile -@@ -1,6 +1,8 @@ - XEN_ROOT = $(CURDIR)/../.. - include $(XEN_ROOT)/tools/Rules.mk - -+CONFIG_PV_SHIM := y -+ - # hvmloader is a 32-bit protected mode binary. - TARGET := hvmloader/hvmloader - INST_DIR := $(DESTDIR)$(XENFIRMWAREDIR) -@@ -11,6 +13,7 @@ SUBDIRS-$(CONFIG_SEABIOS) += seabios-dir - SUBDIRS-$(CONFIG_ROMBIOS) += rombios - SUBDIRS-$(CONFIG_ROMBIOS) += vgabios - SUBDIRS-$(CONFIG_ROMBIOS) += etherboot -+SUBDIRS-$(CONFIG_PV_SHIM) += xen-dir - SUBDIRS-y += hvmloader - - LD32BIT-$(CONFIG_FreeBSD) := LD32BIT_FLAG=-melf_i386_fbsd -@@ -48,6 +51,9 @@ endif - ifeq ($(CONFIG_OVMF),y) - $(INSTALL_DATA) ovmf-dir/ovmf.bin $(INST_DIR)/ovmf.bin - endif -+ifeq ($(CONFIG_PV_SHIM),y) -+ $(INSTALL_DATA) xen-dir/xen-shim $(INST_DIR)/xen-shim -+endif - - .PHONY: uninstall - uninstall: -@@ -58,6 +64,9 @@ endif - ifeq ($(CONFIG_OVMF),y) - rm -f $(INST_DIR)/ovmf.bin - endif -+ifeq ($(CONFIG_PV_SHIM),y) -+ rm -f $(INST_DIR)/xen-shim -+endif - - .PHONY: clean - clean: subdirs-clean -diff --git a/tools/firmware/xen-dir/Makefile b/tools/firmware/xen-dir/Makefile -new file mode 100644 -index 0000000000..adf6c31e8d ---- /dev/null -+++ b/tools/firmware/xen-dir/Makefile -@@ -0,0 +1,59 @@ -+XEN_ROOT = $(CURDIR)/../../.. -+ -+all: xen-shim -+ -+.PHONY: FORCE -+FORCE: -+ -+D=xen-root -+ -+# Minimun set of files / directories go get Xen to build -+LINK_DIRS=config xen -+LINK_FILES=Config.mk -+ -+DEP_DIRS=$(foreach i, $(LINK_DIRS), $(XEN_ROOT)/$(i)) -+DEP_FILES=$(foreach i, $(LINK_FILES), $(XEN_ROOT)/$(i)) -+ -+linkfarm.stamp: $(DEP_DIRS) $(DEP_FILES) FORCE -+ mkdir -p $(D) -+ set -e -+ rm -f linkfarm.stamp.tmp -+ $(foreach d, $(LINK_DIRS), \ -+ (mkdir -p $(D)/$(d); \ -+ cd $(D)/$(d); \ -+ find $(XEN_ROOT)/$(d)/ -type d -printf "./%P\n" | xargs mkdir -p);) -+ $(foreach d, $(LINK_DIRS), \ -+ (cd $(XEN_ROOT); \ -+ find $(d) ! -type l -type f \ -+ $(addprefix ! -path , '*.[oda1]' '*.d[12]')) \ -+ >> linkfarm.stamp.tmp ; ) -+ $(foreach f, $(LINK_FILES), \ -+ echo $(f) >> linkfarm.stamp.tmp ;) -+ cmp -s linkfarm.stamp.tmp linkfarm.stamp && \ -+ rm linkfarm.stamp.tmp || { \ -+ mv linkfarm.stamp.tmp linkfarm.stamp; \ -+ cat linkfarm.stamp | while read f; \ -+ do rm -f "$(D)/$$f"; ln -s "$(XEN_ROOT)/$$f" "$(D)/$$f"; done \ -+ } -+ -+# Copy enough of the tree to build the shim hypervisor -+$(D): linkfarm.stamp -+ $(MAKE) -C $(D)/xen distclean -+ -+.PHONY: shim-%config -+shim-%config: $(D) FORCE -+ $(MAKE) -C $(D)/xen $*config \ -+ XEN_CONFIG_EXPERT=y \ -+ KCONFIG_CONFIG=$(CURDIR)/shim.config -+ -+xen-shim: $(D) shim-olddefconfig -+ $(MAKE) -C $(D)/xen install-shim \ -+ XEN_CONFIG_EXPERT=y \ -+ KCONFIG_CONFIG=$(CURDIR)/shim.config \ -+ DESTDIR=$(CURDIR) -+ -+.PHONY: distclean clean -+distclean clean: -+ rm -f xen-shim *.old -+ rm -rf $(D) -+ rm -f linkfarm.stamp* -diff --git a/tools/firmware/xen-dir/shim.config b/tools/firmware/xen-dir/shim.config -new file mode 100644 -index 0000000000..151a8b41e5 ---- /dev/null -+++ b/tools/firmware/xen-dir/shim.config -@@ -0,0 +1,87 @@ -+# -+# Automatically generated file; DO NOT EDIT. -+# Xen/x86 4.11-unstable Configuration -+# -+CONFIG_X86_64=y -+CONFIG_X86=y -+CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" -+ -+# -+# Architecture Features -+# -+CONFIG_NR_CPUS=32 -+CONFIG_PV=y -+CONFIG_PV_LINEAR_PT=y -+CONFIG_HVM=y -+# CONFIG_SHADOW_PAGING is not set -+# CONFIG_BIGMEM is not set -+# CONFIG_HVM_FEP is not set -+# CONFIG_TBOOT is not set -+CONFIG_XEN_GUEST=y -+CONFIG_PVH_GUEST=y -+CONFIG_PV_SHIM=y -+CONFIG_PV_SHIM_EXCLUSIVE=y -+ -+# -+# Common Features -+# -+CONFIG_COMPAT=y -+CONFIG_CORE_PARKING=y -+CONFIG_HAS_ALTERNATIVE=y -+CONFIG_HAS_EX_TABLE=y -+CONFIG_HAS_MEM_ACCESS=y -+CONFIG_HAS_MEM_PAGING=y -+CONFIG_HAS_MEM_SHARING=y -+CONFIG_HAS_PDX=y -+CONFIG_HAS_UBSAN=y -+CONFIG_HAS_KEXEC=y -+CONFIG_HAS_GDBSX=y -+CONFIG_HAS_IOPORTS=y -+# CONFIG_KEXEC is not set -+# CONFIG_TMEM is not set -+# CONFIG_XENOPROF is not set -+# CONFIG_XSM is not set -+ -+# -+# Schedulers -+# -+CONFIG_SCHED_CREDIT=y -+# CONFIG_SCHED_CREDIT2 is not set -+# CONFIG_SCHED_RTDS is not set -+# CONFIG_SCHED_ARINC653 is not set -+# CONFIG_SCHED_NULL is not set -+# CONFIG_SCHED_CREDIT_DEFAULT is not set -+CONFIG_SCHED_CREDIT_DEFAULT=y -+CONFIG_SCHED_DEFAULT="credit" -+# CONFIG_LIVEPATCH is not set -+# CONFIG_SUPPRESS_DUPLICATE_SYMBOL_WARNINGS is not set -+CONFIG_CMDLINE="" -+ -+# -+# Device Drivers -+# -+CONFIG_ACPI=y -+CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y -+CONFIG_NUMA=y -+CONFIG_HAS_NS16550=y -+CONFIG_HAS_EHCI=y -+CONFIG_HAS_CPUFREQ=y -+CONFIG_HAS_PASSTHROUGH=y -+CONFIG_HAS_PCI=y -+CONFIG_VIDEO=y -+CONFIG_VGA=y -+CONFIG_DEFCONFIG_LIST="$ARCH_DEFCONFIG" -+CONFIG_ARCH_SUPPORTS_INT128=y -+ -+# -+# Debugging Options -+# -+# CONFIG_DEBUG is not set -+# CONFIG_CRASH_DEBUG is not set -+# CONFIG_FRAME_POINTER is not set -+# CONFIG_GCOV is not set -+# CONFIG_LOCK_PROFILE is not set -+# CONFIG_PERF_COUNTERS is not set -+# CONFIG_VERBOSE_DEBUG is not set -+# CONFIG_SCRUB_DEBUG is not set -+# CONFIG_UBSAN is not set -diff --git a/xen/Makefile b/xen/Makefile -index 58a1f97d7d..623f889082 100644 ---- a/xen/Makefile -+++ b/xen/Makefile -@@ -37,10 +37,10 @@ default: build - .PHONY: dist - dist: install - --build install:: include/config/auto.conf -+build install build-shim:: include/config/auto.conf - --.PHONY: build install uninstall clean distclean cscope TAGS tags MAP gtags tests --build install uninstall debug clean distclean cscope TAGS tags MAP gtags tests:: -+.PHONY: build install uninstall clean distclean cscope TAGS tags MAP gtags tests install-shim build-shim -+build install uninstall debug clean distclean cscope TAGS tags MAP gtags tests install-shim build-shim:: - ifneq ($(XEN_TARGET_ARCH),x86_32) - $(MAKE) -f Rules.mk _$@ - else -@@ -80,6 +80,13 @@ _install: $(TARGET)$(CONFIG_XEN_INSTALL_SUFFIX) - fi; \ - fi - -+.PHONY: _build-shim -+_build-shim: $(TARGET)-shim -+ -+.PHONY: _install-shim -+_install-shim: build-shim -+ $(INSTALL_DATA) $(TARGET)-shim $(DESTDIR) -+ - .PHONY: _tests - _tests: - $(MAKE) -f $(BASEDIR)/Rules.mk -C test tests -@@ -144,6 +151,9 @@ $(TARGET): delete-unfresh-files - $(MAKE) -f $(BASEDIR)/Rules.mk include/asm-$(TARGET_ARCH)/asm-offsets.h - $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) $(TARGET) - -+$(TARGET)-shim: $(TARGET) -+ $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) $(TARGET)-shim -+ - # drivers/char/console.o contains static banner/compile info. Blow it away. - # Don't refresh these files during e.g., 'sudo make install' - .PHONY: delete-unfresh-files --- -2.14.3 - - -From 2b8a95a2961ba4a5e54b45b49cb6528068a3c0b3 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Tue, 28 Nov 2017 09:54:17 +0000 -Subject: [PATCH 52/77] xen/x86: make VGA support selectable -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Through a Kconfig option. Enable it by default, and disable it for the -PV-in-PVH shim. - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich ---- -Changes since v1: - - Make the VGA option dependent on the shim one. ---- - tools/firmware/xen-dir/shim.config | 3 +-- - xen/arch/x86/Kconfig | 1 - - xen/arch/x86/boot/build32.mk | 1 + - xen/arch/x86/boot/cmdline.c | 5 ++++- - xen/arch/x86/boot/trampoline.S | 7 +++++++ - xen/arch/x86/efi/efi-boot.h | 4 ++++ - xen/arch/x86/platform_hypercall.c | 2 ++ - xen/arch/x86/pv/dom0_build.c | 2 ++ - xen/arch/x86/setup.c | 6 ++++++ - xen/drivers/video/Kconfig | 8 +++++++- - xen/include/asm-x86/setup.h | 6 ++++++ - 11 files changed, 40 insertions(+), 5 deletions(-) - -diff --git a/tools/firmware/xen-dir/shim.config b/tools/firmware/xen-dir/shim.config -index 151a8b41e5..d22c2fd2f4 100644 ---- a/tools/firmware/xen-dir/shim.config -+++ b/tools/firmware/xen-dir/shim.config -@@ -68,8 +68,7 @@ CONFIG_HAS_EHCI=y - CONFIG_HAS_CPUFREQ=y - CONFIG_HAS_PASSTHROUGH=y - CONFIG_HAS_PCI=y --CONFIG_VIDEO=y --CONFIG_VGA=y -+# CONFIG_VGA is not set - CONFIG_DEFCONFIG_LIST="$ARCH_DEFCONFIG" - CONFIG_ARCH_SUPPORTS_INT128=y - -diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig -index 4953533f16..f621e799ed 100644 ---- a/xen/arch/x86/Kconfig -+++ b/xen/arch/x86/Kconfig -@@ -24,7 +24,6 @@ config X86 - select HAS_PDX - select HAS_UBSAN - select NUMA -- select VGA - - config ARCH_DEFCONFIG - string -diff --git a/xen/arch/x86/boot/build32.mk b/xen/arch/x86/boot/build32.mk -index f7e8ebe67d..48c7407c00 100644 ---- a/xen/arch/x86/boot/build32.mk -+++ b/xen/arch/x86/boot/build32.mk -@@ -5,6 +5,7 @@ include $(XEN_ROOT)/Config.mk - $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS)) - - CFLAGS += -Werror -fno-asynchronous-unwind-tables -fno-builtin -g0 -msoft-float -+CFLAGS += -I$(XEN_ROOT)/xen/include - CFLAGS := $(filter-out -flto,$(CFLAGS)) - - # NB. awk invocation is a portable alternative to 'head -n -1' -diff --git a/xen/arch/x86/boot/cmdline.c b/xen/arch/x86/boot/cmdline.c -index 06aa064e72..51b0659a04 100644 ---- a/xen/arch/x86/boot/cmdline.c -+++ b/xen/arch/x86/boot/cmdline.c -@@ -30,6 +30,7 @@ asm ( - " jmp cmdline_parse_early \n" - ); - -+#include - #include "defs.h" - #include "video.h" - -@@ -336,5 +337,7 @@ void __stdcall cmdline_parse_early(const char *cmdline, early_boot_opts_t *ebo) - ebo->skip_realmode = skip_realmode(cmdline); - ebo->opt_edd = edd_parse(cmdline); - ebo->opt_edid = edid_parse(cmdline); -- vga_parse(cmdline, ebo); -+ -+ if ( IS_ENABLED(CONFIG_VIDEO) ) -+ vga_parse(cmdline, ebo); - } -diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S -index 4d640f3fcd..a17a90df5e 100644 ---- a/xen/arch/x86/boot/trampoline.S -+++ b/xen/arch/x86/boot/trampoline.S -@@ -219,7 +219,9 @@ trampoline_boot_cpu_entry: - */ - call get_memory_map - call get_edd -+#ifdef CONFIG_VIDEO - call video -+#endif - - mov $0x0200,%ax - int $0x16 -@@ -267,10 +269,13 @@ opt_edid: - .byte 0 /* EDID parsing option (force/no/default). */ - /* Padding. */ - .byte 0 -+ -+#ifdef CONFIG_VIDEO - GLOBAL(boot_vid_mode) - .word VIDEO_80x25 /* If we don't run at all, assume basic video mode 3 at 80x25. */ - vesa_size: - .word 0,0,0 /* width x depth x height */ -+#endif - - GLOBAL(kbd_shift_flags) - .byte 0 -@@ -279,4 +284,6 @@ rm_idt: .word 256*4-1, 0, 0 - - #include "mem.S" - #include "edd.S" -+#ifdef CONFIG_VIDEO - #include "video.S" -+#endif -diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h -index d30f688a5a..5789d2cb70 100644 ---- a/xen/arch/x86/efi/efi-boot.h -+++ b/xen/arch/x86/efi/efi-boot.h -@@ -479,16 +479,19 @@ static void __init efi_arch_edd(void) - - static void __init efi_arch_console_init(UINTN cols, UINTN rows) - { -+#ifdef CONFIG_VIDEO - vga_console_info.video_type = XEN_VGATYPE_TEXT_MODE_3; - vga_console_info.u.text_mode_3.columns = cols; - vga_console_info.u.text_mode_3.rows = rows; - vga_console_info.u.text_mode_3.font_height = 16; -+#endif - } - - static void __init efi_arch_video_init(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop, - UINTN info_size, - EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *mode_info) - { -+#ifdef CONFIG_VIDEO - int bpp = 0; - - switch ( mode_info->PixelFormat ) -@@ -550,6 +553,7 @@ static void __init efi_arch_video_init(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop, - vga_console_info.u.vesa_lfb.lfb_size = - (gop->Mode->FrameBufferSize + 0xffff) >> 16; - } -+#endif - } - - static void __init efi_arch_memory_setup(void) -diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c -index ebc2f394ee..ea18c3215a 100644 ---- a/xen/arch/x86/platform_hypercall.c -+++ b/xen/arch/x86/platform_hypercall.c -@@ -388,6 +388,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op) - } - case XEN_FW_VBEDDC_INFO: - ret = -ESRCH; -+#ifdef CONFIG_VIDEO - if ( op->u.firmware_info.index != 0 ) - break; - if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 ) -@@ -406,6 +407,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op) - copy_to_compat(op->u.firmware_info.u.vbeddc_info.edid, - bootsym(boot_edid_info), 128) ) - ret = -EFAULT; -+#endif - break; - case XEN_FW_EFI_INFO: - ret = efi_get_info(op->u.firmware_info.index, -diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c -index a13412efb9..a3be335b0b 100644 ---- a/xen/arch/x86/pv/dom0_build.c -+++ b/xen/arch/x86/pv/dom0_build.c -@@ -832,11 +832,13 @@ int __init dom0_construct_pv(struct domain *d, - if ( cmdline != NULL ) - strlcpy((char *)si->cmd_line, cmdline, sizeof(si->cmd_line)); - -+#ifdef CONFIG_VIDEO - if ( fill_console_start_info((void *)(si + 1)) ) - { - si->console.dom0.info_off = sizeof(struct start_info); - si->console.dom0.info_size = sizeof(struct dom0_vga_console_info); - } -+#endif - - if ( is_pv_32bit_domain(d) ) - xlat_start_info(si, XLAT_start_info_console_dom0); -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index c1f4184e06..2279014f74 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -456,6 +456,7 @@ static void __init setup_max_pdx(unsigned long top_page) - /* A temporary copy of the e820 map that we can mess with during bootstrap. */ - static struct e820map __initdata boot_e820; - -+#ifdef CONFIG_VIDEO - struct boot_video_info { - u8 orig_x; /* 0x00 */ - u8 orig_y; /* 0x01 */ -@@ -486,9 +487,11 @@ struct boot_video_info { - u16 vesa_attrib; /* 0x28 */ - }; - extern struct boot_video_info boot_vid_info; -+#endif - - static void __init parse_video_info(void) - { -+#ifdef CONFIG_VIDEO - struct boot_video_info *bvi = &bootsym(boot_vid_info); - - /* vga_console_info is filled directly on EFI platform. */ -@@ -524,6 +527,7 @@ static void __init parse_video_info(void) - vga_console_info.u.vesa_lfb.gbl_caps = bvi->capabilities; - vga_console_info.u.vesa_lfb.mode_attrs = bvi->vesa_attrib; - } -+#endif - } - - static void __init kexec_reserve_area(struct e820map *e820) -@@ -741,6 +745,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) - - printk("Xen image load base address: %#lx\n", xen_phys_start); - -+#ifdef CONFIG_VIDEO - printk("Video information:\n"); - - /* Print VGA display mode information. */ -@@ -784,6 +789,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) - printk("of reasons unknown\n"); - } - } -+#endif - - printk("Disc information:\n"); - printk(" Found %d MBR signatures\n", -diff --git a/xen/drivers/video/Kconfig b/xen/drivers/video/Kconfig -index 0ffbbd9a88..52e8ce6c15 100644 ---- a/xen/drivers/video/Kconfig -+++ b/xen/drivers/video/Kconfig -@@ -3,8 +3,14 @@ config VIDEO - bool - - config VGA -- bool -+ bool "VGA support" if !PV_SHIM_EXCLUSIVE - select VIDEO -+ depends on X86 -+ default y if !PV_SHIM_EXCLUSIVE -+ ---help--- -+ Enable VGA output for the Xen hypervisor. -+ -+ If unsure, say Y. - - config HAS_ARM_HDLCD - bool -diff --git a/xen/include/asm-x86/setup.h b/xen/include/asm-x86/setup.h -index c5b3d4ef18..b68ec9de4d 100644 ---- a/xen/include/asm-x86/setup.h -+++ b/xen/include/asm-x86/setup.h -@@ -31,8 +31,14 @@ void arch_init_memory(void); - void subarch_init_memory(void); - - void init_IRQ(void); -+ -+#ifdef CONFIG_VIDEO - void vesa_init(void); - void vesa_mtrr_init(void); -+#else -+static inline void vesa_init(void) {}; -+static inline void vesa_mtrr_init(void) {}; -+#endif - - int construct_dom0( - struct domain *d, --- -2.14.3 - - -From 4ba6447e7ddbee91c3781c2630ca1d28e080857c Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:18 +0000 -Subject: [PATCH 53/77] xen/pvh: do not mark the low 1MB as IO mem -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -On PVH there's nothing special on the low 1MB. - -This is an optional patch that doesn't affect the functionality of the -shim. - -Signed-off-by: Roger Pau Monné -Signed-off-by: Andrew Cooper ---- - xen/arch/x86/mm.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 0569342200..371c764027 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -122,6 +122,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -288,8 +289,12 @@ void __init arch_init_memory(void) - dom_cow = domain_create(DOMID_COW, DOMCRF_dummy, 0, NULL); - BUG_ON(IS_ERR(dom_cow)); - -- /* First 1MB of RAM is historically marked as I/O. */ -- for ( i = 0; i < 0x100; i++ ) -+ /* -+ * First 1MB of RAM is historically marked as I/O. If we booted PVH, -+ * reclaim the space. Irrespective, leave MFN 0 as special for the sake -+ * of 0 being a very common default value. -+ */ -+ for ( i = 0; i < (pvh_boot ? 1 : 0x100); i++ ) - share_xen_page_with_guest(mfn_to_page(_mfn(i)), - dom_io, XENSHARE_writable); - --- -2.14.3 - - -From 0ba5d8c27509ba2011591cfab2715e8ca6b7b402 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:18 +0000 -Subject: [PATCH 54/77] xen/pvshim: skip Dom0-only domain builder parts -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Do not allow access to any iomem or ioport by the shim, and also -remove the check for Dom0 kernel support. - -Signed-off-by: Roger Pau Monné -Acked-by: Jan Beulich ---- - xen/arch/x86/dom0_build.c | 4 ++++ - xen/arch/x86/pv/dom0_build.c | 3 ++- - 2 files changed, 6 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c -index bf992fef6d..e2bf81b4e7 100644 ---- a/xen/arch/x86/dom0_build.c -+++ b/xen/arch/x86/dom0_build.c -@@ -13,6 +13,7 @@ - #include - - #include -+#include - #include - #include - #include -@@ -385,6 +386,9 @@ int __init dom0_setup_permissions(struct domain *d) - unsigned int i; - int rc; - -+ if ( pv_shim ) -+ return 0; -+ - /* The hardware domain is initially permitted full I/O capabilities. */ - rc = ioports_permit_access(d, 0, 0xFFFF); - rc |= iomem_permit_access(d, 0UL, (1UL << (paddr_bits - PAGE_SHIFT)) - 1); -diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c -index a3be335b0b..852d00a8be 100644 ---- a/xen/arch/x86/pv/dom0_build.c -+++ b/xen/arch/x86/pv/dom0_build.c -@@ -17,6 +17,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -373,7 +374,7 @@ int __init dom0_construct_pv(struct domain *d, - - if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE ) - { -- if ( !test_bit(XENFEAT_dom0, parms.f_supported) ) -+ if ( !pv_shim && !test_bit(XENFEAT_dom0, parms.f_supported) ) - { - printk("Kernel does not support Dom0 operation\n"); - rc = -EINVAL; --- -2.14.3 - - -From 60dd95357cca09c5ed3c4f3d57c11b732ea8befd Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:18 +0000 -Subject: [PATCH 55/77] xen: mark xenstore/console pages as RAM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This si required so that later they can be shared with the guest if -Xen is running in shim mode. - -Also prevent them from being used by Xen by marking them as bad pages -in init_boot_pages. - -Signed-off-by: Roger Pau Monné -Signed-off-by: Wei Liu ---- -Changes since v1: - - Remove adding the pages to dom_io, there's no need since they are - already marked as bad pages. - - Use a static global array to store the memory address of this - special pages, so Xen avoids having to call - xen_hypercall_hvm_get_param twice. ---- - xen/arch/x86/e820.c | 4 ++++ - xen/arch/x86/guest/xen.c | 43 +++++++++++++++++++++++++++++++++++++++ - xen/common/page_alloc.c | 15 ++++++++++++++ - xen/drivers/char/xen_pv_console.c | 4 ++++ - xen/include/asm-x86/guest/xen.h | 14 +++++++++++++ - 5 files changed, 80 insertions(+) - -diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c -index b422a684ee..590ea985ef 100644 ---- a/xen/arch/x86/e820.c -+++ b/xen/arch/x86/e820.c -@@ -9,6 +9,7 @@ - #include - #include - #include -+#include - - /* - * opt_mem: Limit maximum address of physical RAM. -@@ -699,6 +700,9 @@ unsigned long __init init_e820(const char *str, struct e820map *raw) - - machine_specific_memory_setup(raw); - -+ if ( xen_guest ) -+ hypervisor_fixup_e820(&e820); -+ - printk("%s RAM map:\n", str); - print_e820_memory_map(e820.map, e820.nr_map); - -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index d4968b47aa..27a6c47753 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -32,12 +32,14 @@ - #include - - #include -+#include - - bool __read_mostly xen_guest; - - static __read_mostly uint32_t xen_cpuid_base; - extern char hypercall_page[]; - static struct rangeset *mem; -+static unsigned long __initdata reserved_pages[2]; - - DEFINE_PER_CPU(unsigned int, vcpu_id); - -@@ -279,6 +281,47 @@ int hypervisor_free_unused_page(mfn_t mfn) - return rangeset_remove_range(mem, mfn_x(mfn), mfn_x(mfn)); - } - -+static void __init mark_pfn_as_ram(struct e820map *e820, uint64_t pfn) -+{ -+ if ( !e820_add_range(e820, pfn << PAGE_SHIFT, -+ (pfn << PAGE_SHIFT) + PAGE_SIZE, E820_RAM) ) -+ if ( !e820_change_range_type(e820, pfn << PAGE_SHIFT, -+ (pfn << PAGE_SHIFT) + PAGE_SIZE, -+ E820_RESERVED, E820_RAM) ) -+ panic("Unable to add/change memory type of pfn %#lx to RAM", pfn); -+} -+ -+void __init hypervisor_fixup_e820(struct e820map *e820) -+{ -+ uint64_t pfn = 0; -+ unsigned int i = 0; -+ long rc; -+ -+ ASSERT(xen_guest); -+ -+#define MARK_PARAM_RAM(p) ({ \ -+ rc = xen_hypercall_hvm_get_param(p, &pfn); \ -+ if ( rc ) \ -+ panic("Unable to get " #p); \ -+ mark_pfn_as_ram(e820, pfn); \ -+ ASSERT(i < ARRAY_SIZE(reserved_pages)); \ -+ reserved_pages[i++] = pfn << PAGE_SHIFT; \ -+}) -+ MARK_PARAM_RAM(HVM_PARAM_STORE_PFN); -+ if ( !pv_console ) -+ MARK_PARAM_RAM(HVM_PARAM_CONSOLE_PFN); -+#undef MARK_PARAM_RAM -+} -+ -+const unsigned long *__init hypervisor_reserved_pages(unsigned int *size) -+{ -+ ASSERT(xen_guest); -+ -+ *size = ARRAY_SIZE(reserved_pages); -+ -+ return reserved_pages; -+} -+ - /* - * Local variables: - * mode: C -diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c -index 5616a82263..49b2a91751 100644 ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -143,6 +143,7 @@ - #include - #include - #ifdef CONFIG_X86 -+#include - #include - #include /* for highmem_start only */ - #else -@@ -303,6 +304,20 @@ void __init init_boot_pages(paddr_t ps, paddr_t pe) - badpage++; - } - } -+ -+ if ( xen_guest ) -+ { -+ badpage = hypervisor_reserved_pages(&array_size); -+ if ( badpage ) -+ { -+ for ( i = 0; i < array_size; i++ ) -+ { -+ bootmem_region_zap(*badpage >> PAGE_SHIFT, -+ (*badpage >> PAGE_SHIFT) + 1); -+ badpage++; -+ } -+ } -+ } - #endif - - /* Check new pages against the bad-page list. */ -diff --git a/xen/drivers/char/xen_pv_console.c b/xen/drivers/char/xen_pv_console.c -index f5aca4c69e..d4f0532101 100644 ---- a/xen/drivers/char/xen_pv_console.c -+++ b/xen/drivers/char/xen_pv_console.c -@@ -35,6 +35,8 @@ static evtchn_port_t cons_evtchn; - static serial_rx_fn cons_rx_handler; - static DEFINE_SPINLOCK(tx_lock); - -+bool pv_console; -+ - void __init pv_console_init(void) - { - long r; -@@ -60,6 +62,8 @@ void __init pv_console_init(void) - - printk("Initialised PV console at 0x%p with pfn %#lx and evtchn %#x\n", - cons_ring, raw_pfn, cons_evtchn); -+ pv_console = true; -+ - return; - - error: -diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h -index b3e684f756..62255fda8b 100644 ---- a/xen/include/asm-x86/guest/xen.h -+++ b/xen/include/asm-x86/guest/xen.h -@@ -29,12 +29,15 @@ - #ifdef CONFIG_XEN_GUEST - - extern bool xen_guest; -+extern bool pv_console; - - void probe_hypervisor(void); - void hypervisor_setup(void); - void hypervisor_ap_setup(void); - int hypervisor_alloc_unused_page(mfn_t *mfn); - int hypervisor_free_unused_page(mfn_t mfn); -+void hypervisor_fixup_e820(struct e820map *e820); -+const unsigned long *hypervisor_reserved_pages(unsigned int *size); - - DECLARE_PER_CPU(unsigned int, vcpu_id); - DECLARE_PER_CPU(struct vcpu_info *, vcpu_info); -@@ -42,6 +45,7 @@ DECLARE_PER_CPU(struct vcpu_info *, vcpu_info); - #else - - #define xen_guest 0 -+#define pv_console 0 - - static inline void probe_hypervisor(void) {}; - static inline void hypervisor_setup(void) -@@ -53,6 +57,16 @@ static inline void hypervisor_ap_setup(void) - ASSERT_UNREACHABLE(); - } - -+static inline void hypervisor_fixup_e820(struct e820map *e820) -+{ -+ ASSERT_UNREACHABLE(); -+} -+static inline const unsigned long *hypervisor_reserved_pages(unsigned int *size) -+{ -+ ASSERT_UNREACHABLE(); -+ return NULL; -+}; -+ - #endif /* CONFIG_XEN_GUEST */ - #endif /* __X86_GUEST_XEN_H__ */ - --- -2.14.3 - - -From 1cd703979f73778403d0b0cf5c77c87534c544db Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:18 +0000 -Subject: [PATCH 56/77] xen/pvshim: modify Dom0 builder in order to build a - DomU -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -According to the PV ABI the initial virtual memory regions should -contain the xenstore and console pages after the start_info. Also set -the correct values in the start_info for DomU operation. - -Signed-off-by: Roger Pau Monné ---- -Changes since v1: - - Modify the position of the __init attribute in dom0_update_physmap. - - Move the addition of sizeof(struct dom0_vga_console_info) to - vstartinfo_end with an existing if branch. - - Add a TODO item for fill_console_start_info in the !CONFIG_VIDEO - case. - - s/replace_va/replace_va_mapping/. - - Remove call to free_domheap_pages in replace_va_mapping. - put_page_and_type should already take care of freeing the page. - - Use PFN_DOWN in SET_AND_MAP_PARAM macro. - - Parenthesize va in SET_AND_MAP_PARAM macro when required. ---- - xen/arch/x86/pv/dom0_build.c | 48 +++++++++++++++++++++++------- - xen/arch/x86/pv/shim.c | 63 ++++++++++++++++++++++++++++++++++++++++ - xen/include/asm-x86/dom0_build.h | 4 +++ - xen/include/asm-x86/pv/shim.h | 21 ++++++++++++++ - 4 files changed, 126 insertions(+), 10 deletions(-) - -diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c -index 852d00a8be..72752b8656 100644 ---- a/xen/arch/x86/pv/dom0_build.c -+++ b/xen/arch/x86/pv/dom0_build.c -@@ -31,9 +31,8 @@ - #define L3_PROT (BASE_PROT|_PAGE_DIRTY) - #define L4_PROT (BASE_PROT|_PAGE_DIRTY) - --static __init void dom0_update_physmap(struct domain *d, unsigned long pfn, -- unsigned long mfn, -- unsigned long vphysmap_s) -+void __init dom0_update_physmap(struct domain *d, unsigned long pfn, -+ unsigned long mfn, unsigned long vphysmap_s) - { - if ( !is_pv_32bit_domain(d) ) - ((unsigned long *)vphysmap_s)[pfn] = mfn; -@@ -316,6 +315,10 @@ int __init dom0_construct_pv(struct domain *d, - unsigned long vphysmap_end; - unsigned long vstartinfo_start; - unsigned long vstartinfo_end; -+ unsigned long vxenstore_start = 0; -+ unsigned long vxenstore_end = 0; -+ unsigned long vconsole_start = 0; -+ unsigned long vconsole_end = 0; - unsigned long vstack_start; - unsigned long vstack_end; - unsigned long vpt_start; -@@ -441,11 +444,22 @@ int __init dom0_construct_pv(struct domain *d, - if ( parms.p2m_base != UNSET_ADDR ) - vphysmap_end = vphysmap_start; - vstartinfo_start = round_pgup(vphysmap_end); -- vstartinfo_end = (vstartinfo_start + -- sizeof(struct start_info) + -- sizeof(struct dom0_vga_console_info)); -+ vstartinfo_end = vstartinfo_start + sizeof(struct start_info); -+ -+ if ( pv_shim ) -+ { -+ vxenstore_start = round_pgup(vstartinfo_end); -+ vxenstore_end = vxenstore_start + PAGE_SIZE; -+ vconsole_start = vxenstore_end; -+ vconsole_end = vconsole_start + PAGE_SIZE; -+ vpt_start = vconsole_end; -+ } -+ else -+ { -+ vpt_start = round_pgup(vstartinfo_end); -+ vstartinfo_end += sizeof(struct dom0_vga_console_info); -+ } - -- vpt_start = round_pgup(vstartinfo_end); - for ( nr_pt_pages = 2; ; nr_pt_pages++ ) - { - vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); -@@ -538,6 +552,8 @@ int __init dom0_construct_pv(struct domain *d, - " Init. ramdisk: %p->%p\n" - " Phys-Mach map: %p->%p\n" - " Start info: %p->%p\n" -+ " Xenstore ring: %p->%p\n" -+ " Console ring: %p->%p\n" - " Page tables: %p->%p\n" - " Boot stack: %p->%p\n" - " TOTAL: %p->%p\n", -@@ -545,6 +561,8 @@ int __init dom0_construct_pv(struct domain *d, - _p(vinitrd_start), _p(vinitrd_end), - _p(vphysmap_start), _p(vphysmap_end), - _p(vstartinfo_start), _p(vstartinfo_end), -+ _p(vxenstore_start), _p(vxenstore_end), -+ _p(vconsole_start), _p(vconsole_end), - _p(vpt_start), _p(vpt_end), - _p(vstack_start), _p(vstack_end), - _p(v_start), _p(v_end)); -@@ -742,7 +760,8 @@ int __init dom0_construct_pv(struct domain *d, - - si->shared_info = virt_to_maddr(d->shared_info); - -- si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; -+ if ( !pv_shim ) -+ si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; - if ( !vinitrd_start && initrd_len ) - si->flags |= SIF_MOD_START_PFN; - si->flags |= (xen_processor_pmbits << 8) & SIF_PM_MASK; -@@ -834,15 +853,24 @@ int __init dom0_construct_pv(struct domain *d, - strlcpy((char *)si->cmd_line, cmdline, sizeof(si->cmd_line)); - - #ifdef CONFIG_VIDEO -- if ( fill_console_start_info((void *)(si + 1)) ) -+ if ( !pv_shim && fill_console_start_info((void *)(si + 1)) ) - { - si->console.dom0.info_off = sizeof(struct start_info); - si->console.dom0.info_size = sizeof(struct dom0_vga_console_info); - } - #endif - -+ /* -+ * TODO: provide an empty stub for fill_console_start_info in the -+ * !CONFIG_VIDEO case so the logic here can be simplified. -+ */ -+ if ( pv_shim ) -+ pv_shim_setup_dom(d, l4start, v_start, vxenstore_start, vconsole_start, -+ vphysmap_start, si); -+ - if ( is_pv_32bit_domain(d) ) -- xlat_start_info(si, XLAT_start_info_console_dom0); -+ xlat_start_info(si, pv_shim ? XLAT_start_info_console_domU -+ : XLAT_start_info_console_dom0); - - /* Return to idle domain's page tables. */ - mapcache_override_current(NULL); -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -index 4d037355db..75365b0697 100644 ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -18,16 +18,79 @@ - * - * Copyright (c) 2017 Citrix Systems Ltd. - */ -+#include - #include - #include - - #include -+#include -+#include -+#include - - #ifndef CONFIG_PV_SHIM_EXCLUSIVE - bool pv_shim; - boolean_param("pv-shim", pv_shim); - #endif - -+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER| \ -+ _PAGE_GUEST_KERNEL) -+#define COMPAT_L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) -+ -+static void __init replace_va_mapping(struct domain *d, l4_pgentry_t *l4start, -+ unsigned long va, unsigned long mfn) -+{ -+ struct page_info *page; -+ l4_pgentry_t *pl4e; -+ l3_pgentry_t *pl3e; -+ l2_pgentry_t *pl2e; -+ l1_pgentry_t *pl1e; -+ -+ pl4e = l4start + l4_table_offset(va); -+ pl3e = l4e_to_l3e(*pl4e); -+ pl3e += l3_table_offset(va); -+ pl2e = l3e_to_l2e(*pl3e); -+ pl2e += l2_table_offset(va); -+ pl1e = l2e_to_l1e(*pl2e); -+ pl1e += l1_table_offset(va); -+ -+ page = mfn_to_page(l1e_get_pfn(*pl1e)); -+ put_page_and_type(page); -+ -+ *pl1e = l1e_from_pfn(mfn, (!is_pv_32bit_domain(d) ? L1_PROT -+ : COMPAT_L1_PROT)); -+} -+ -+void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, -+ unsigned long va_start, unsigned long store_va, -+ unsigned long console_va, unsigned long vphysmap, -+ start_info_t *si) -+{ -+ uint64_t param = 0; -+ long rc; -+ -+#define SET_AND_MAP_PARAM(p, si, va) ({ \ -+ rc = xen_hypercall_hvm_get_param(p, ¶m); \ -+ if ( rc ) \ -+ panic("Unable to get " #p "\n"); \ -+ (si) = param; \ -+ if ( va ) \ -+ { \ -+ share_xen_page_with_guest(mfn_to_page(param), d, XENSHARE_writable); \ -+ replace_va_mapping(d, l4start, va, param); \ -+ dom0_update_physmap(d, PFN_DOWN((va) - va_start), param, vphysmap); \ -+ } \ -+}) -+ SET_AND_MAP_PARAM(HVM_PARAM_STORE_PFN, si->store_mfn, store_va); -+ SET_AND_MAP_PARAM(HVM_PARAM_STORE_EVTCHN, si->store_evtchn, 0); -+ if ( !pv_console ) -+ { -+ SET_AND_MAP_PARAM(HVM_PARAM_CONSOLE_PFN, si->console.domU.mfn, -+ console_va); -+ SET_AND_MAP_PARAM(HVM_PARAM_CONSOLE_EVTCHN, si->console.domU.evtchn, 0); -+ } -+#undef SET_AND_MAP_PARAM -+} -+ - /* - * Local variables: - * mode: C -diff --git a/xen/include/asm-x86/dom0_build.h b/xen/include/asm-x86/dom0_build.h -index d83d2b4387..d985406503 100644 ---- a/xen/include/asm-x86/dom0_build.h -+++ b/xen/include/asm-x86/dom0_build.h -@@ -1,6 +1,7 @@ - #ifndef _DOM0_BUILD_H_ - #define _DOM0_BUILD_H_ - -+#include - #include - - #include -@@ -29,6 +30,9 @@ int dom0_construct_pvh(struct domain *d, const module_t *image, - unsigned long dom0_paging_pages(const struct domain *d, - unsigned long nr_pages); - -+void dom0_update_physmap(struct domain *d, unsigned long pfn, -+ unsigned long mfn, unsigned long vphysmap_s); -+ - #endif /* _DOM0_BUILD_H_ */ - - /* -diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h -index 1468cfd498..b0c361cba1 100644 ---- a/xen/include/asm-x86/pv/shim.h -+++ b/xen/include/asm-x86/pv/shim.h -@@ -29,6 +29,27 @@ extern bool pv_shim; - # define pv_shim 0 - #endif /* CONFIG_PV_SHIM{,_EXCLUSIVE} */ - -+#ifdef CONFIG_PV_SHIM -+ -+void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, -+ unsigned long va_start, unsigned long store_va, -+ unsigned long console_va, unsigned long vphysmap, -+ start_info_t *si); -+ -+#else -+ -+static inline void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, -+ unsigned long va_start, -+ unsigned long store_va, -+ unsigned long console_va, -+ unsigned long vphysmap, -+ start_info_t *si) -+{ -+ ASSERT_UNREACHABLE(); -+} -+ -+#endif -+ - #endif /* __X86_PV_SHIM_H__ */ - - /* --- -2.14.3 - - -From da4518c5595c048a5c030225533e44e021fffaab Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:19 +0000 -Subject: [PATCH 57/77] xen/pvshim: set correct domid value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -If domid is not provided by L0 set domid to 1 by default. Note that L0 -not provinding the domid can cause trouble if the guest tries to use -it's domid instead of DOMID_SELF when performing hypercalls that are -forwarded to the L0 hypervisor. - -Since the domain created is no longer the hardware domain add a hook -to the domain shutdown path in order to forward shutdown operations to -the L0 hypervisor. - -Signed-off-by: Roger Pau Monné -Signed-off-by: Sergey Dyasli ---- -Changes since v1: - - s/get_dom0_domid/get_initial_domain_id/. - - Add a comment regarding why dom0 needs to be global. - - Fix compilation of xen/common/domain.c on ARM. ---- - xen/arch/x86/dom0_build.c | 2 +- - xen/arch/x86/guest/xen.c | 5 +++++ - xen/arch/x86/pv/shim.c | 21 +++++++++++++++++++++ - xen/arch/x86/setup.c | 16 +++++++++++----- - xen/common/domain.c | 12 ++++++++++++ - xen/include/asm-x86/guest/xen.h | 6 ++++++ - xen/include/asm-x86/pv/shim.h | 10 ++++++++++ - 7 files changed, 66 insertions(+), 6 deletions(-) - -diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c -index e2bf81b4e7..452298c624 100644 ---- a/xen/arch/x86/dom0_build.c -+++ b/xen/arch/x86/dom0_build.c -@@ -473,7 +473,7 @@ int __init construct_dom0(struct domain *d, const module_t *image, - int rc; - - /* Sanity! */ -- BUG_ON(d->domain_id != 0); -+ BUG_ON(!pv_shim && d->domain_id != 0); - BUG_ON(d->vcpu[0] == NULL); - BUG_ON(d->vcpu[0]->is_initialised); - -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index 27a6c47753..aff16a0e35 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -322,6 +322,11 @@ const unsigned long *__init hypervisor_reserved_pages(unsigned int *size) - return reserved_pages; - } - -+uint32_t hypervisor_cpuid_base(void) -+{ -+ return xen_cpuid_base; -+} -+ - /* - * Local variables: - * mode: C -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -index 75365b0697..78351c9ee0 100644 ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -20,6 +20,7 @@ - */ - #include - #include -+#include - #include - - #include -@@ -27,6 +28,8 @@ - #include - #include - -+#include -+ - #ifndef CONFIG_PV_SHIM_EXCLUSIVE - bool pv_shim; - boolean_param("pv-shim", pv_shim); -@@ -91,6 +94,24 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - #undef SET_AND_MAP_PARAM - } - -+void pv_shim_shutdown(uint8_t reason) -+{ -+ /* XXX: handle suspend */ -+ xen_hypercall_shutdown(reason); -+} -+ -+domid_t get_initial_domain_id(void) -+{ -+ uint32_t eax, ebx, ecx, edx; -+ -+ if ( !pv_shim ) -+ return 0; -+ -+ cpuid(hypervisor_cpuid_base() + 4, &eax, &ebx, &ecx, &edx); -+ -+ return (eax & XEN_HVM_CPUID_DOMID_PRESENT) ? ecx : 1; -+} -+ - /* - * Local variables: - * mode: C -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index 2279014f74..7091c38047 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -104,6 +104,12 @@ unsigned long __read_mostly mmu_cr4_features = XEN_MINIMAL_CR4; - #define SMEP_HVM_ONLY (-1) - static s8 __initdata opt_smep = 1; - -+/* -+ * Initial domain place holder. Needs to be global so it can be created in -+ * __start_xen and unpaused in init_done. -+ */ -+static struct domain *__initdata dom0; -+ - static int __init parse_smep_param(const char *s) - { - if ( !*s ) -@@ -576,11 +582,11 @@ static void noinline init_done(void) - - system_state = SYS_STATE_active; - -+ domain_unpause_by_systemcontroller(dom0); -+ - /* MUST be done prior to removing .init data. */ - unregister_init_virtual_region(); - -- domain_unpause_by_systemcontroller(hardware_domain); -- - /* Zero the .init code and data. */ - for ( va = __init_begin; va < _p(__init_end); va += PAGE_SIZE ) - clear_page(va); -@@ -659,7 +665,6 @@ void __init noreturn __start_xen(unsigned long mbi_p) - unsigned long nr_pages, raw_max_page, modules_headroom, *module_map; - int i, j, e820_warn = 0, bytes = 0; - bool acpi_boot_table_init_done = false, relocated = false; -- struct domain *dom0; - struct ns16550_defaults ns16550 = { - .data_bits = 8, - .parity = 'n', -@@ -1617,11 +1622,12 @@ void __init noreturn __start_xen(unsigned long mbi_p) - } - - /* Create initial domain 0. */ -- dom0 = domain_create(0, domcr_flags, 0, &config); -+ dom0 = domain_create(get_initial_domain_id(), domcr_flags, 0, &config); - if ( IS_ERR(dom0) || (alloc_dom0_vcpu0(dom0) == NULL) ) - panic("Error creating domain 0"); - -- dom0->is_privileged = 1; -+ if ( !pv_shim ) -+ dom0->is_privileged = 1; - dom0->target = NULL; - - /* Grab the DOM0 command line. */ -diff --git a/xen/common/domain.c b/xen/common/domain.c -index 7484693a87..1ba05fa3a1 100644 ---- a/xen/common/domain.c -+++ b/xen/common/domain.c -@@ -43,6 +43,10 @@ - #include - #include - -+#ifdef CONFIG_X86 -+#include -+#endif -+ - /* Linux config option: propageted to domain0 */ - /* xen_processor_pmbits: xen control Cx, Px, ... */ - unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX; -@@ -689,6 +693,14 @@ void domain_shutdown(struct domain *d, u8 reason) - { - struct vcpu *v; - -+#ifdef CONFIG_X86 -+ if ( pv_shim ) -+ { -+ pv_shim_shutdown(reason); -+ return; -+ } -+#endif -+ - spin_lock(&d->shutdown_lock); - - if ( d->shutdown_code == SHUTDOWN_CODE_INVALID ) -diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h -index 62255fda8b..ac48dcbe44 100644 ---- a/xen/include/asm-x86/guest/xen.h -+++ b/xen/include/asm-x86/guest/xen.h -@@ -38,6 +38,7 @@ int hypervisor_alloc_unused_page(mfn_t *mfn); - int hypervisor_free_unused_page(mfn_t mfn); - void hypervisor_fixup_e820(struct e820map *e820); - const unsigned long *hypervisor_reserved_pages(unsigned int *size); -+uint32_t hypervisor_cpuid_base(void); - - DECLARE_PER_CPU(unsigned int, vcpu_id); - DECLARE_PER_CPU(struct vcpu_info *, vcpu_info); -@@ -66,6 +67,11 @@ static inline const unsigned long *hypervisor_reserved_pages(unsigned int *size) - ASSERT_UNREACHABLE(); - return NULL; - }; -+static inline uint32_t hypervisor_cpuid_base(void) -+{ -+ ASSERT_UNREACHABLE(); -+ return 0; -+}; - - #endif /* CONFIG_XEN_GUEST */ - #endif /* __X86_GUEST_XEN_H__ */ -diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h -index b0c361cba1..ff7c050dc6 100644 ---- a/xen/include/asm-x86/pv/shim.h -+++ b/xen/include/asm-x86/pv/shim.h -@@ -35,6 +35,8 @@ void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - unsigned long va_start, unsigned long store_va, - unsigned long console_va, unsigned long vphysmap, - start_info_t *si); -+void pv_shim_shutdown(uint8_t reason); -+domid_t get_initial_domain_id(void); - - #else - -@@ -47,6 +49,14 @@ static inline void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - { - ASSERT_UNREACHABLE(); - } -+static inline void pv_shim_shutdown(uint8_t reason) -+{ -+ ASSERT_UNREACHABLE(); -+} -+static inline domid_t get_initial_domain_id(void) -+{ -+ return 0; -+} - - #endif - --- -2.14.3 - - -From bbad376ab1c1c57ba31059bd2269aa9f213579d6 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:19 +0000 -Subject: [PATCH 58/77] xen/pvshim: forward evtchn ops between L0 Xen and L2 - DomU -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Note that the unmask and the virq operations are handled by the shim -itself, and that FIFO event channels are not exposed to the guest. - -Signed-off-by: Roger Pau Monné -Signed-off-by: Anthony Liguori -Signed-off-by: Sergey Dyasli ---- -Changes since v1: - - Use find_first_set_bit instead of ffsl. - - Indent macro one more level. - - Have a single evtchn_close struct in pv_shim_event_channel_op. - - Add blank lines between switch cases. - - Use -EOPNOTSUPP in order to signal lack of FIFO or PIRQ support. - - Switch evtchn_bind_virq parameter to evtchn_port_t and use 0 signal - allocation needed. - - Switch evtchn helpers return type to int instead of long. - - Re-write event channel hypercall table handler instead of adding - hooks. - - Remove the pv_domain variable and instead use a static variable in - shim code. ---- - xen/arch/x86/compat.c | 4 +- - xen/arch/x86/guest/xen.c | 25 +++- - xen/arch/x86/pv/hypercall.c | 17 +++ - xen/arch/x86/pv/shim.c | 263 ++++++++++++++++++++++++++++++++++++++ - xen/common/event_channel.c | 99 ++++++++------ - xen/drivers/char/xen_pv_console.c | 11 +- - xen/include/asm-x86/hypercall.h | 3 + - xen/include/asm-x86/pv/shim.h | 5 + - xen/include/xen/event.h | 15 +++ - xen/include/xen/pv_console.h | 6 + - 10 files changed, 402 insertions(+), 46 deletions(-) - -diff --git a/xen/arch/x86/compat.c b/xen/arch/x86/compat.c -index f417cd5034..9d376a4589 100644 ---- a/xen/arch/x86/compat.c -+++ b/xen/arch/x86/compat.c -@@ -69,8 +69,8 @@ long do_event_channel_op_compat(XEN_GUEST_HANDLE_PARAM(evtchn_op_t) uop) - case EVTCHNOP_bind_ipi: - case EVTCHNOP_bind_vcpu: - case EVTCHNOP_unmask: -- return do_event_channel_op(op.cmd, -- guest_handle_from_ptr(&uop.p->u, void)); -+ return pv_get_hypercall_handler(__HYPERVISOR_event_channel_op, false) -+ (op.cmd, (unsigned long)&uop.p->u, 0, 0, 0, 0); - - default: - return -ENOSYS; -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index aff16a0e35..57b297ad47 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -18,6 +18,7 @@ - * - * Copyright (c) 2017 Citrix Systems Ltd. - */ -+#include - #include - #include - #include -@@ -193,11 +194,31 @@ static void __init init_memmap(void) - static void xen_evtchn_upcall(struct cpu_user_regs *regs) - { - struct vcpu_info *vcpu_info = this_cpu(vcpu_info); -+ unsigned long pending; - - vcpu_info->evtchn_upcall_pending = 0; -- write_atomic(&vcpu_info->evtchn_pending_sel, 0); -+ pending = xchg(&vcpu_info->evtchn_pending_sel, 0); - -- pv_console_rx(regs); -+ while ( pending ) -+ { -+ unsigned int l1 = find_first_set_bit(pending); -+ unsigned long evtchn = xchg(&XEN_shared_info->evtchn_pending[l1], 0); -+ -+ __clear_bit(l1, &pending); -+ evtchn &= ~XEN_shared_info->evtchn_mask[l1]; -+ while ( evtchn ) -+ { -+ unsigned int port = find_first_set_bit(evtchn); -+ -+ __clear_bit(port, &evtchn); -+ port += l1 * BITS_PER_LONG; -+ -+ if ( pv_console && port == pv_console_evtchn() ) -+ pv_console_rx(regs); -+ else if ( pv_shim ) -+ pv_shim_inject_evtchn(port); -+ } -+ } - - ack_APIC_irq(); - } -diff --git a/xen/arch/x86/pv/hypercall.c b/xen/arch/x86/pv/hypercall.c -index f79f7eef62..3b72d6a44d 100644 ---- a/xen/arch/x86/pv/hypercall.c -+++ b/xen/arch/x86/pv/hypercall.c -@@ -320,6 +320,23 @@ void hypercall_page_initialise_ring1_kernel(void *hypercall_page) - *(u16 *)(p+ 6) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int $xx */ - } - -+void __init pv_hypercall_table_replace(unsigned int hypercall, -+ hypercall_fn_t * native, -+ hypercall_fn_t *compat) -+{ -+#define HANDLER_POINTER(f) \ -+ ((unsigned long *)__va(__pa(&pv_hypercall_table[hypercall].f))) -+ write_atomic(HANDLER_POINTER(native), (unsigned long)native); -+ write_atomic(HANDLER_POINTER(compat), (unsigned long)compat); -+#undef HANDLER_POINTER -+} -+ -+hypercall_fn_t *pv_get_hypercall_handler(unsigned int hypercall, bool compat) -+{ -+ return compat ? pv_hypercall_table[hypercall].compat -+ : pv_hypercall_table[hypercall].native; -+} -+ - /* - * Local variables: - * mode: C -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -index 78351c9ee0..36f3a366d3 100644 ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -18,6 +18,8 @@ - * - * Copyright (c) 2017 Citrix Systems Ltd. - */ -+#include -+#include - #include - #include - #include -@@ -35,6 +37,10 @@ bool pv_shim; - boolean_param("pv-shim", pv_shim); - #endif - -+static struct domain *guest; -+ -+static long pv_shim_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg); -+ - #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER| \ - _PAGE_GUEST_KERNEL) - #define COMPAT_L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) -@@ -63,6 +69,27 @@ static void __init replace_va_mapping(struct domain *d, l4_pgentry_t *l4start, - : COMPAT_L1_PROT)); - } - -+static void evtchn_reserve(struct domain *d, unsigned int port) -+{ -+ ASSERT(port_is_valid(d, port)); -+ evtchn_from_port(d, port)->state = ECS_RESERVED; -+ BUG_ON(xen_hypercall_evtchn_unmask(port)); -+} -+ -+static bool evtchn_handled(struct domain *d, unsigned int port) -+{ -+ ASSERT(port_is_valid(d, port)); -+ /* The shim manages VIRQs, the rest is forwarded to L0. */ -+ return evtchn_from_port(d, port)->state == ECS_VIRQ; -+} -+ -+static void evtchn_assign_vcpu(struct domain *d, unsigned int port, -+ unsigned int vcpu) -+{ -+ ASSERT(port_is_valid(d, port)); -+ evtchn_from_port(d, port)->notify_vcpu_id = vcpu; -+} -+ - void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - unsigned long va_start, unsigned long store_va, - unsigned long console_va, unsigned long vphysmap, -@@ -82,6 +109,11 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - replace_va_mapping(d, l4start, va, param); \ - dom0_update_physmap(d, PFN_DOWN((va) - va_start), param, vphysmap); \ - } \ -+ else \ -+ { \ -+ BUG_ON(evtchn_allocate_port(d, param)); \ -+ evtchn_reserve(d, param); \ -+ } \ - }) - SET_AND_MAP_PARAM(HVM_PARAM_STORE_PFN, si->store_mfn, store_va); - SET_AND_MAP_PARAM(HVM_PARAM_STORE_EVTCHN, si->store_evtchn, 0); -@@ -92,6 +124,10 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - SET_AND_MAP_PARAM(HVM_PARAM_CONSOLE_EVTCHN, si->console.domU.evtchn, 0); - } - #undef SET_AND_MAP_PARAM -+ pv_hypercall_table_replace(__HYPERVISOR_event_channel_op, -+ (hypercall_fn_t *)pv_shim_event_channel_op, -+ (hypercall_fn_t *)pv_shim_event_channel_op); -+ guest = d; - } - - void pv_shim_shutdown(uint8_t reason) -@@ -100,6 +136,233 @@ void pv_shim_shutdown(uint8_t reason) - xen_hypercall_shutdown(reason); - } - -+static long pv_shim_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) -+{ -+ struct domain *d = current->domain; -+ struct evtchn_close close; -+ long rc; -+ -+ switch ( cmd ) -+ { -+#define EVTCHN_FORWARD(cmd, port_field) \ -+ case EVTCHNOP_##cmd: { \ -+ struct evtchn_##cmd op; \ -+ \ -+ if ( copy_from_guest(&op, arg, 1) != 0 ) \ -+ return -EFAULT; \ -+ \ -+ rc = xen_hypercall_event_channel_op(EVTCHNOP_##cmd, &op); \ -+ if ( rc ) \ -+ break; \ -+ \ -+ spin_lock(&d->event_lock); \ -+ rc = evtchn_allocate_port(d, op.port_field); \ -+ if ( rc ) \ -+ { \ -+ close.port = op.port_field; \ -+ BUG_ON(xen_hypercall_event_channel_op(EVTCHNOP_close, &close)); \ -+ } \ -+ else \ -+ evtchn_reserve(d, op.port_field); \ -+ spin_unlock(&d->event_lock); \ -+ \ -+ if ( !rc && __copy_to_guest(arg, &op, 1) ) \ -+ rc = -EFAULT; \ -+ \ -+ break; \ -+ } -+ -+ EVTCHN_FORWARD(alloc_unbound, port) -+ EVTCHN_FORWARD(bind_interdomain, local_port) -+#undef EVTCHN_FORWARD -+ -+ case EVTCHNOP_bind_virq: { -+ struct evtchn_bind_virq virq; -+ struct evtchn_alloc_unbound alloc = { -+ .dom = DOMID_SELF, -+ .remote_dom = DOMID_SELF, -+ }; -+ -+ if ( copy_from_guest(&virq, arg, 1) != 0 ) -+ return -EFAULT; -+ /* -+ * The event channel space is actually controlled by L0 Xen, so -+ * allocate a port from L0 and then force the VIRQ to be bound to that -+ * specific port. -+ * -+ * This is only required for VIRQ because the rest of the event channel -+ * operations are handled directly by L0. -+ */ -+ rc = xen_hypercall_event_channel_op(EVTCHNOP_alloc_unbound, &alloc); -+ if ( rc ) -+ break; -+ -+ /* Force L1 to use the event channel port allocated on L0. */ -+ rc = evtchn_bind_virq(&virq, alloc.port); -+ if ( rc ) -+ { -+ close.port = alloc.port; -+ BUG_ON(xen_hypercall_event_channel_op(EVTCHNOP_close, &close)); -+ } -+ -+ if ( !rc && __copy_to_guest(arg, &virq, 1) ) -+ rc = -EFAULT; -+ -+ break; -+ } -+ -+ case EVTCHNOP_status: { -+ struct evtchn_status status; -+ -+ if ( copy_from_guest(&status, arg, 1) != 0 ) -+ return -EFAULT; -+ -+ /* -+ * NB: if the event channel is not handled by the shim, just forward -+ * the status request to L0, even if the port is not valid. -+ */ -+ if ( port_is_valid(d, status.port) && evtchn_handled(d, status.port) ) -+ rc = evtchn_status(&status); -+ else -+ rc = xen_hypercall_event_channel_op(EVTCHNOP_status, &status); -+ -+ break; -+ } -+ -+ case EVTCHNOP_bind_vcpu: { -+ struct evtchn_bind_vcpu vcpu; -+ -+ if ( copy_from_guest(&vcpu, arg, 1) != 0 ) -+ return -EFAULT; -+ -+ if ( !port_is_valid(d, vcpu.port) ) -+ return -EINVAL; -+ -+ if ( evtchn_handled(d, vcpu.port) ) -+ rc = evtchn_bind_vcpu(vcpu.port, vcpu.vcpu); -+ else -+ { -+ rc = xen_hypercall_event_channel_op(EVTCHNOP_bind_vcpu, &vcpu); -+ if ( !rc ) -+ evtchn_assign_vcpu(d, vcpu.port, vcpu.vcpu); -+ } -+ -+ break; -+ } -+ -+ case EVTCHNOP_close: { -+ if ( copy_from_guest(&close, arg, 1) != 0 ) -+ return -EFAULT; -+ -+ if ( !port_is_valid(d, close.port) ) -+ return -EINVAL; -+ -+ set_bit(close.port, XEN_shared_info->evtchn_mask); -+ -+ if ( evtchn_handled(d, close.port) ) -+ { -+ rc = evtchn_close(d, close.port, true); -+ if ( rc ) -+ break; -+ } -+ else -+ evtchn_free(d, evtchn_from_port(d, close.port)); -+ -+ rc = xen_hypercall_event_channel_op(EVTCHNOP_close, &close); -+ if ( rc ) -+ /* -+ * If the port cannot be closed on the L0 mark it as reserved -+ * in the shim to avoid re-using it. -+ */ -+ evtchn_reserve(d, close.port); -+ -+ break; -+ } -+ -+ case EVTCHNOP_bind_ipi: { -+ struct evtchn_bind_ipi ipi; -+ -+ if ( copy_from_guest(&ipi, arg, 1) != 0 ) -+ return -EFAULT; -+ -+ rc = xen_hypercall_event_channel_op(EVTCHNOP_bind_ipi, &ipi); -+ if ( rc ) -+ break; -+ -+ spin_lock(&d->event_lock); -+ rc = evtchn_allocate_port(d, ipi.port); -+ if ( rc ) -+ { -+ spin_unlock(&d->event_lock); -+ -+ close.port = ipi.port; -+ BUG_ON(xen_hypercall_event_channel_op(EVTCHNOP_close, &close)); -+ break; -+ } -+ -+ evtchn_assign_vcpu(d, ipi.port, ipi.vcpu); -+ evtchn_reserve(d, ipi.port); -+ spin_unlock(&d->event_lock); -+ -+ if ( __copy_to_guest(arg, &ipi, 1) ) -+ rc = -EFAULT; -+ -+ break; -+ } -+ -+ case EVTCHNOP_unmask: { -+ struct evtchn_unmask unmask; -+ -+ if ( copy_from_guest(&unmask, arg, 1) != 0 ) -+ return -EFAULT; -+ -+ /* Unmask is handled in L1 */ -+ rc = evtchn_unmask(unmask.port); -+ -+ break; -+ } -+ -+ case EVTCHNOP_send: { -+ struct evtchn_send send; -+ -+ if ( copy_from_guest(&send, arg, 1) != 0 ) -+ return -EFAULT; -+ -+ rc = xen_hypercall_event_channel_op(EVTCHNOP_send, &send); -+ -+ break; -+ } -+ -+ case EVTCHNOP_reset: { -+ struct evtchn_reset reset; -+ -+ if ( copy_from_guest(&reset, arg, 1) != 0 ) -+ return -EFAULT; -+ -+ rc = xen_hypercall_event_channel_op(EVTCHNOP_reset, &reset); -+ -+ break; -+ } -+ -+ default: -+ /* No FIFO or PIRQ support for now */ -+ rc = -EOPNOTSUPP; -+ break; -+ } -+ -+ return rc; -+} -+ -+void pv_shim_inject_evtchn(unsigned int port) -+{ -+ if ( port_is_valid(guest, port) ) -+ { -+ struct evtchn *chn = evtchn_from_port(guest, port); -+ -+ evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn); -+ } -+} -+ - domid_t get_initial_domain_id(void) - { - uint32_t eax, ebx, ecx, edx; -diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c -index c69f9db6db..be834c5c78 100644 ---- a/xen/common/event_channel.c -+++ b/xen/common/event_channel.c -@@ -156,46 +156,62 @@ static void free_evtchn_bucket(struct domain *d, struct evtchn *bucket) - xfree(bucket); - } - -+int evtchn_allocate_port(struct domain *d, evtchn_port_t port) -+{ -+ if ( port > d->max_evtchn_port || port >= d->max_evtchns ) -+ return -ENOSPC; -+ -+ if ( port_is_valid(d, port) ) -+ { -+ if ( evtchn_from_port(d, port)->state != ECS_FREE || -+ evtchn_port_is_busy(d, port) ) -+ return -EBUSY; -+ } -+ else -+ { -+ struct evtchn *chn; -+ struct evtchn **grp; -+ -+ if ( !group_from_port(d, port) ) -+ { -+ grp = xzalloc_array(struct evtchn *, BUCKETS_PER_GROUP); -+ if ( !grp ) -+ return -ENOMEM; -+ group_from_port(d, port) = grp; -+ } -+ -+ chn = alloc_evtchn_bucket(d, port); -+ if ( !chn ) -+ return -ENOMEM; -+ bucket_from_port(d, port) = chn; -+ -+ write_atomic(&d->valid_evtchns, d->valid_evtchns + EVTCHNS_PER_BUCKET); -+ } -+ -+ return 0; -+} -+ - static int get_free_port(struct domain *d) - { -- struct evtchn *chn; -- struct evtchn **grp; - int port; - - if ( d->is_dying ) - return -EINVAL; - -- for ( port = 0; port_is_valid(d, port); port++ ) -+ for ( port = 0; port <= d->max_evtchn_port; port++ ) - { -- if ( port > d->max_evtchn_port ) -- return -ENOSPC; -- if ( evtchn_from_port(d, port)->state == ECS_FREE -- && !evtchn_port_is_busy(d, port) ) -- return port; -- } -+ int rc = evtchn_allocate_port(d, port); - -- if ( port == d->max_evtchns || port > d->max_evtchn_port ) -- return -ENOSPC; -+ if ( rc == -EBUSY ) -+ continue; - -- if ( !group_from_port(d, port) ) -- { -- grp = xzalloc_array(struct evtchn *, BUCKETS_PER_GROUP); -- if ( !grp ) -- return -ENOMEM; -- group_from_port(d, port) = grp; -+ return port; - } - -- chn = alloc_evtchn_bucket(d, port); -- if ( !chn ) -- return -ENOMEM; -- bucket_from_port(d, port) = chn; -- -- write_atomic(&d->valid_evtchns, d->valid_evtchns + EVTCHNS_PER_BUCKET); -- -- return port; -+ return -ENOSPC; - } - --static void free_evtchn(struct domain *d, struct evtchn *chn) -+void evtchn_free(struct domain *d, struct evtchn *chn) - { - /* Clear pending event to avoid unexpected behavior on re-bind. */ - evtchn_port_clear_pending(d, chn); -@@ -345,13 +361,13 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) - } - - --static long evtchn_bind_virq(evtchn_bind_virq_t *bind) -+int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port) - { - struct evtchn *chn; - struct vcpu *v; - struct domain *d = current->domain; -- int port, virq = bind->virq, vcpu = bind->vcpu; -- long rc = 0; -+ int virq = bind->virq, vcpu = bind->vcpu; -+ int rc = 0; - - if ( (virq < 0) || (virq >= ARRAY_SIZE(v->virq_to_evtchn)) ) - return -EINVAL; -@@ -368,8 +384,19 @@ static long evtchn_bind_virq(evtchn_bind_virq_t *bind) - if ( v->virq_to_evtchn[virq] != 0 ) - ERROR_EXIT(-EEXIST); - -- if ( (port = get_free_port(d)) < 0 ) -- ERROR_EXIT(port); -+ if ( port != 0 ) -+ { -+ if ( (rc = evtchn_allocate_port(d, port)) != 0 ) -+ ERROR_EXIT(rc); -+ } -+ else -+ { -+ int alloc_port = get_free_port(d); -+ -+ if ( alloc_port < 0 ) -+ ERROR_EXIT(alloc_port); -+ port = alloc_port; -+ } - - chn = evtchn_from_port(d, port); - -@@ -511,7 +538,7 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind) - } - - --static long evtchn_close(struct domain *d1, int port1, bool_t guest) -+int evtchn_close(struct domain *d1, int port1, bool guest) - { - struct domain *d2 = NULL; - struct vcpu *v; -@@ -619,7 +646,7 @@ static long evtchn_close(struct domain *d1, int port1, bool_t guest) - - double_evtchn_lock(chn1, chn2); - -- free_evtchn(d1, chn1); -+ evtchn_free(d1, chn1); - - chn2->state = ECS_UNBOUND; - chn2->u.unbound.remote_domid = d1->domain_id; -@@ -633,7 +660,7 @@ static long evtchn_close(struct domain *d1, int port1, bool_t guest) - } - - spin_lock(&chn1->lock); -- free_evtchn(d1, chn1); -+ evtchn_free(d1, chn1); - spin_unlock(&chn1->lock); - - out: -@@ -839,7 +866,7 @@ static void clear_global_virq_handlers(struct domain *d) - } - } - --static long evtchn_status(evtchn_status_t *status) -+int evtchn_status(evtchn_status_t *status) - { - struct domain *d; - domid_t dom = status->dom; -@@ -1056,7 +1083,7 @@ long do_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) - struct evtchn_bind_virq bind_virq; - if ( copy_from_guest(&bind_virq, arg, 1) != 0 ) - return -EFAULT; -- rc = evtchn_bind_virq(&bind_virq); -+ rc = evtchn_bind_virq(&bind_virq, 0); - if ( !rc && __copy_to_guest(arg, &bind_virq, 1) ) - rc = -EFAULT; /* Cleaning up here would be a mess! */ - break; -diff --git a/xen/drivers/char/xen_pv_console.c b/xen/drivers/char/xen_pv_console.c -index d4f0532101..948343303e 100644 ---- a/xen/drivers/char/xen_pv_console.c -+++ b/xen/drivers/char/xen_pv_console.c -@@ -88,6 +88,11 @@ static void notify_daemon(void) - xen_hypercall_evtchn_send(cons_evtchn); - } - -+evtchn_port_t pv_console_evtchn(void) -+{ -+ return cons_evtchn; -+} -+ - size_t pv_console_rx(struct cpu_user_regs *regs) - { - char c; -@@ -97,10 +102,6 @@ size_t pv_console_rx(struct cpu_user_regs *regs) - if ( !cons_ring ) - return 0; - -- /* TODO: move this somewhere */ -- if ( !test_bit(cons_evtchn, XEN_shared_info->evtchn_pending) ) -- return 0; -- - prod = ACCESS_ONCE(cons_ring->in_prod); - cons = cons_ring->in_cons; - -@@ -125,8 +126,6 @@ size_t pv_console_rx(struct cpu_user_regs *regs) - ACCESS_ONCE(cons_ring->in_cons) = cons; - notify_daemon(); - -- clear_bit(cons_evtchn, XEN_shared_info->evtchn_pending); -- - return recv; - } - -diff --git a/xen/include/asm-x86/hypercall.h b/xen/include/asm-x86/hypercall.h -index 3eb4a8db89..b9f3ecf9a3 100644 ---- a/xen/include/asm-x86/hypercall.h -+++ b/xen/include/asm-x86/hypercall.h -@@ -28,6 +28,9 @@ extern const hypercall_args_t hypercall_args_table[NR_hypercalls]; - void pv_hypercall(struct cpu_user_regs *regs); - void hypercall_page_initialise_ring3_kernel(void *hypercall_page); - void hypercall_page_initialise_ring1_kernel(void *hypercall_page); -+void pv_hypercall_table_replace(unsigned int hypercall, hypercall_fn_t * native, -+ hypercall_fn_t *compat); -+hypercall_fn_t *pv_get_hypercall_handler(unsigned int hypercall, bool compat); - - /* - * Both do_mmuext_op() and do_mmu_update(): -diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h -index ff7c050dc6..ab656fd854 100644 ---- a/xen/include/asm-x86/pv/shim.h -+++ b/xen/include/asm-x86/pv/shim.h -@@ -36,6 +36,7 @@ void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - unsigned long console_va, unsigned long vphysmap, - start_info_t *si); - void pv_shim_shutdown(uint8_t reason); -+void pv_shim_inject_evtchn(unsigned int port); - domid_t get_initial_domain_id(void); - - #else -@@ -53,6 +54,10 @@ static inline void pv_shim_shutdown(uint8_t reason) - { - ASSERT_UNREACHABLE(); - } -+static inline void pv_shim_inject_evtchn(unsigned int port) -+{ -+ ASSERT_UNREACHABLE(); -+} - static inline domid_t get_initial_domain_id(void) - { - return 0; -diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h -index 87915ead69..ebb879e88d 100644 ---- a/xen/include/xen/event.h -+++ b/xen/include/xen/event.h -@@ -48,6 +48,21 @@ int evtchn_send(struct domain *d, unsigned int lport); - /* Bind a local event-channel port to the specified VCPU. */ - long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id); - -+/* Bind a VIRQ. */ -+int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port); -+ -+/* Get the status of an event channel port. */ -+int evtchn_status(evtchn_status_t *status); -+ -+/* Close an event channel. */ -+int evtchn_close(struct domain *d1, int port1, bool guest); -+ -+/* Free an event channel. */ -+void evtchn_free(struct domain *d, struct evtchn *chn); -+ -+/* Allocate a specific event channel port. */ -+int evtchn_allocate_port(struct domain *d, unsigned int port); -+ - /* Unmask a local event-channel port. */ - int evtchn_unmask(unsigned int port); - -diff --git a/xen/include/xen/pv_console.h b/xen/include/xen/pv_console.h -index e578b56620..cb92539666 100644 ---- a/xen/include/xen/pv_console.h -+++ b/xen/include/xen/pv_console.h -@@ -10,6 +10,7 @@ void pv_console_set_rx_handler(serial_rx_fn fn); - void pv_console_init_postirq(void); - void pv_console_puts(const char *buf); - size_t pv_console_rx(struct cpu_user_regs *regs); -+evtchn_port_t pv_console_evtchn(void); - - #else - -@@ -18,6 +19,11 @@ static inline void pv_console_set_rx_handler(serial_rx_fn fn) { } - static inline void pv_console_init_postirq(void) { } - static inline void pv_console_puts(const char *buf) { } - static inline size_t pv_console_rx(struct cpu_user_regs *regs) { return 0; } -+evtchn_port_t pv_console_evtchn(void) -+{ -+ ASSERT_UNREACHABLE(); -+ return 0; -+} - - #endif /* !CONFIG_XEN_GUEST */ - #endif /* __XEN_PV_CONSOLE_H__ */ --- -2.14.3 - - -From 7f5eb7d04ef2616051b82437d3c9595208a7dec1 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:19 +0000 -Subject: [PATCH 59/77] xen/pvshim: add grant table operations -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Roger Pau Monné -Signed-off-by: Anthony Liguori -Signed-off-by: Andrew Cooper ---- -Changes since v1: - - Use __ of copy_to_guest. - - Return EOPNOTSUPP for not implemented grant table hypercalls. - - Forward user provided buffer in GNTTABOP_query_size. - - Rewrite grant table hypercall handler. ---- - xen/arch/x86/pv/shim.c | 164 ++++++++++++++++++++++++++++++++++ - xen/include/asm-x86/guest/hypercall.h | 6 ++ - 2 files changed, 170 insertions(+) - -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -index 36f3a366d3..eb8b146785 100644 ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -32,6 +33,8 @@ - - #include - -+#include -+ - #ifndef CONFIG_PV_SHIM_EXCLUSIVE - bool pv_shim; - boolean_param("pv-shim", pv_shim); -@@ -39,7 +42,14 @@ boolean_param("pv-shim", pv_shim); - - static struct domain *guest; - -+static unsigned int nr_grant_list; -+static unsigned long *grant_frames; -+static DEFINE_SPINLOCK(grant_lock); -+ - static long pv_shim_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg); -+static long pv_shim_grant_table_op(unsigned int cmd, -+ XEN_GUEST_HANDLE_PARAM(void) uop, -+ unsigned int count); - - #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER| \ - _PAGE_GUEST_KERNEL) -@@ -127,6 +137,9 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - pv_hypercall_table_replace(__HYPERVISOR_event_channel_op, - (hypercall_fn_t *)pv_shim_event_channel_op, - (hypercall_fn_t *)pv_shim_event_channel_op); -+ pv_hypercall_table_replace(__HYPERVISOR_grant_table_op, -+ (hypercall_fn_t *)pv_shim_grant_table_op, -+ (hypercall_fn_t *)pv_shim_grant_table_op); - guest = d; - } - -@@ -363,6 +376,157 @@ void pv_shim_inject_evtchn(unsigned int port) - } - } - -+static long pv_shim_grant_table_op(unsigned int cmd, -+ XEN_GUEST_HANDLE_PARAM(void) uop, -+ unsigned int count) -+{ -+ struct domain *d = current->domain; -+ long rc = 0; -+ -+ if ( count != 1 ) -+ return -EINVAL; -+ -+ switch ( cmd ) -+ { -+ case GNTTABOP_setup_table: -+ { -+ bool compat = is_pv_32bit_domain(d); -+ struct gnttab_setup_table nat; -+ struct compat_gnttab_setup_table cmp; -+ unsigned int i; -+ -+ if ( unlikely(compat ? copy_from_guest(&cmp, uop, 1) -+ : copy_from_guest(&nat, uop, 1)) || -+ unlikely(compat ? !compat_handle_okay(cmp.frame_list, -+ cmp.nr_frames) -+ : !guest_handle_okay(nat.frame_list, -+ nat.nr_frames)) ) -+ { -+ rc = -EFAULT; -+ break; -+ } -+ if ( compat ) -+#define XLAT_gnttab_setup_table_HNDL_frame_list(d, s) -+ XLAT_gnttab_setup_table(&nat, &cmp); -+#undef XLAT_gnttab_setup_table_HNDL_frame_list -+ -+ nat.status = GNTST_okay; -+ -+ spin_lock(&grant_lock); -+ if ( !nr_grant_list ) -+ { -+ struct gnttab_query_size query_size = { -+ .dom = DOMID_SELF, -+ }; -+ -+ rc = xen_hypercall_grant_table_op(GNTTABOP_query_size, -+ &query_size, 1); -+ if ( rc ) -+ { -+ spin_unlock(&grant_lock); -+ break; -+ } -+ -+ ASSERT(!grant_frames); -+ grant_frames = xzalloc_array(unsigned long, -+ query_size.max_nr_frames); -+ if ( !grant_frames ) -+ { -+ spin_unlock(&grant_lock); -+ rc = -ENOMEM; -+ break; -+ } -+ -+ nr_grant_list = query_size.max_nr_frames; -+ } -+ -+ if ( nat.nr_frames > nr_grant_list ) -+ { -+ spin_unlock(&grant_lock); -+ rc = -EINVAL; -+ break; -+ } -+ -+ for ( i = 0; i < nat.nr_frames; i++ ) -+ { -+ if ( !grant_frames[i] ) -+ { -+ struct xen_add_to_physmap xatp = { -+ .domid = DOMID_SELF, -+ .idx = i, -+ .space = XENMAPSPACE_grant_table, -+ }; -+ mfn_t mfn; -+ -+ rc = hypervisor_alloc_unused_page(&mfn); -+ if ( rc ) -+ { -+ gprintk(XENLOG_ERR, -+ "unable to get memory for grant table\n"); -+ break; -+ } -+ -+ xatp.gpfn = mfn_x(mfn); -+ rc = xen_hypercall_memory_op(XENMEM_add_to_physmap, &xatp); -+ if ( rc ) -+ { -+ hypervisor_free_unused_page(mfn); -+ break; -+ } -+ -+ BUG_ON(iomem_permit_access(d, mfn_x(mfn), mfn_x(mfn))); -+ grant_frames[i] = mfn_x(mfn); -+ } -+ -+ ASSERT(grant_frames[i]); -+ if ( compat ) -+ { -+ compat_pfn_t pfn = grant_frames[i]; -+ -+ if ( __copy_to_compat_offset(cmp.frame_list, i, &pfn, 1) ) -+ { -+ nat.status = GNTST_bad_virt_addr; -+ rc = -EFAULT; -+ break; -+ } -+ } -+ else if ( __copy_to_guest_offset(nat.frame_list, i, -+ &grant_frames[i], 1) ) -+ { -+ nat.status = GNTST_bad_virt_addr; -+ rc = -EFAULT; -+ break; -+ } -+ } -+ spin_unlock(&grant_lock); -+ -+ if ( compat ) -+#define XLAT_gnttab_setup_table_HNDL_frame_list(d, s) -+ XLAT_gnttab_setup_table(&cmp, &nat); -+#undef XLAT_gnttab_setup_table_HNDL_frame_list -+ -+ if ( unlikely(compat ? __copy_to_guest(uop, &cmp, 1) -+ : __copy_to_guest(uop, &nat, 1)) ) -+ { -+ rc = -EFAULT; -+ break; -+ } -+ -+ break; -+ } -+ -+ case GNTTABOP_query_size: -+ rc = xen_hypercall_grant_table_op(GNTTABOP_query_size, uop.p, count); -+ break; -+ -+ default: -+ rc = -EOPNOTSUPP; -+ break; -+ } -+ -+ return rc; -+} -+ - domid_t get_initial_domain_id(void) - { - uint32_t eax, ebx, ecx, edx; -diff --git a/xen/include/asm-x86/guest/hypercall.h b/xen/include/asm-x86/guest/hypercall.h -index 81a955d479..e9e626b474 100644 ---- a/xen/include/asm-x86/guest/hypercall.h -+++ b/xen/include/asm-x86/guest/hypercall.h -@@ -110,6 +110,12 @@ static inline long xen_hypercall_event_channel_op(unsigned int cmd, void *arg) - return _hypercall64_2(long, __HYPERVISOR_event_channel_op, cmd, arg); - } - -+static inline long xen_hypercall_grant_table_op(unsigned int cmd, void *arg, -+ unsigned int count) -+{ -+ return _hypercall64_3(long, __HYPERVISOR_grant_table_op, cmd, arg, count); -+} -+ - static inline long xen_hypercall_hvm_op(unsigned int op, void *arg) - { - return _hypercall64_2(long, __HYPERVISOR_hvm_op, op, arg); --- -2.14.3 - - -From cc7d96b98cf02540edf6f387286100a50d6f3d04 Mon Sep 17 00:00:00 2001 -From: Sergey Dyasli -Date: Thu, 11 Jan 2018 11:45:23 +0000 -Subject: [PATCH 60/77] x86/pv-shim: shadow PV console's page for L2 DomU -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Sergey Dyasli -Signed-off-by: Wei Liu -[remove notify_guest helper and directly use pv_shim_inject_evtchn] -Signed-off-by: Roger Pau Monné -Signed-off-by: Wei Liu ---- -Changes since v1: - - Use pv_shim_inject_evtchn. ---- - xen/arch/x86/pv/shim.c | 31 ++++++++-- - xen/drivers/char/Makefile | 1 + - xen/drivers/char/console.c | 6 ++ - xen/drivers/char/consoled.c | 148 ++++++++++++++++++++++++++++++++++++++++++++ - xen/include/xen/consoled.h | 27 ++++++++ - 5 files changed, 209 insertions(+), 4 deletions(-) - create mode 100644 xen/drivers/char/consoled.c - create mode 100644 xen/include/xen/consoled.h - -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -index eb8b146785..986f9da58a 100644 ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -25,6 +25,8 @@ - #include - #include - #include -+#include -+#include - - #include - #include -@@ -127,13 +129,28 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - }) - SET_AND_MAP_PARAM(HVM_PARAM_STORE_PFN, si->store_mfn, store_va); - SET_AND_MAP_PARAM(HVM_PARAM_STORE_EVTCHN, si->store_evtchn, 0); -+ SET_AND_MAP_PARAM(HVM_PARAM_CONSOLE_EVTCHN, si->console.domU.evtchn, 0); - if ( !pv_console ) -- { - SET_AND_MAP_PARAM(HVM_PARAM_CONSOLE_PFN, si->console.domU.mfn, - console_va); -- SET_AND_MAP_PARAM(HVM_PARAM_CONSOLE_EVTCHN, si->console.domU.evtchn, 0); -- } - #undef SET_AND_MAP_PARAM -+ else -+ { -+ /* Allocate a new page for DomU's PV console */ -+ void *page = alloc_xenheap_pages(0, MEMF_bits(32)); -+ uint64_t console_mfn; -+ -+ ASSERT(page); -+ clear_page(page); -+ console_mfn = virt_to_mfn(page); -+ si->console.domU.mfn = console_mfn; -+ share_xen_page_with_guest(mfn_to_page(console_mfn), d, -+ XENSHARE_writable); -+ replace_va_mapping(d, l4start, console_va, console_mfn); -+ dom0_update_physmap(d, (console_va - va_start) >> PAGE_SHIFT, -+ console_mfn, vphysmap); -+ consoled_set_ring_addr(page); -+ } - pv_hypercall_table_replace(__HYPERVISOR_event_channel_op, - (hypercall_fn_t *)pv_shim_event_channel_op, - (hypercall_fn_t *)pv_shim_event_channel_op); -@@ -341,7 +358,13 @@ static long pv_shim_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) - if ( copy_from_guest(&send, arg, 1) != 0 ) - return -EFAULT; - -- rc = xen_hypercall_event_channel_op(EVTCHNOP_send, &send); -+ if ( pv_console && send.port == pv_console_evtchn() ) -+ { -+ consoled_guest_rx(); -+ rc = 0; -+ } -+ else -+ rc = xen_hypercall_event_channel_op(EVTCHNOP_send, &send); - - break; - } -diff --git a/xen/drivers/char/Makefile b/xen/drivers/char/Makefile -index 9d48d0f2dc..0d48b16e8d 100644 ---- a/xen/drivers/char/Makefile -+++ b/xen/drivers/char/Makefile -@@ -9,3 +9,4 @@ obj-$(CONFIG_HAS_EHCI) += ehci-dbgp.o - obj-$(CONFIG_ARM) += arm-uart.o - obj-y += serial.o - obj-$(CONFIG_XEN_GUEST) += xen_pv_console.o -+obj-$(CONFIG_PV_SHIM) += consoled.o -diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c -index 8acd358395..18f5b7f7aa 100644 ---- a/xen/drivers/char/console.c -+++ b/xen/drivers/char/console.c -@@ -32,6 +32,7 @@ - #include - - #ifdef CONFIG_X86 -+#include - #include - #include - #endif -@@ -415,6 +416,11 @@ static void __serial_rx(char c, struct cpu_user_regs *regs) - serial_rx_ring[SERIAL_RX_MASK(serial_rx_prod++)] = c; - /* Always notify the guest: prevents receive path from getting stuck. */ - send_global_virq(VIRQ_CONSOLE); -+ -+#ifdef CONFIG_X86 -+ if ( pv_shim && pv_console ) -+ consoled_guest_tx(c); -+#endif - } - - static void serial_rx(char c, struct cpu_user_regs *regs) -diff --git a/xen/drivers/char/consoled.c b/xen/drivers/char/consoled.c -new file mode 100644 -index 0000000000..552abf5766 ---- /dev/null -+++ b/xen/drivers/char/consoled.c -@@ -0,0 +1,148 @@ -+/****************************************************************************** -+ * drivers/char/consoled.c -+ * -+ * A backend driver for Xen's PV console. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; If not, see . -+ * -+ * Copyright (c) 2017 Citrix Systems Ltd. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#include -+ -+static struct xencons_interface *cons_ring; -+static DEFINE_SPINLOCK(rx_lock); -+ -+void consoled_set_ring_addr(struct xencons_interface *ring) -+{ -+ cons_ring = ring; -+} -+ -+struct xencons_interface *consoled_get_ring_addr(void) -+{ -+ return cons_ring; -+} -+ -+#define BUF_SZ 255 -+static char buf[BUF_SZ + 1]; -+ -+/* Receives characters from a domain's PV console */ -+size_t consoled_guest_rx(void) -+{ -+ size_t recv = 0, idx = 0; -+ XENCONS_RING_IDX cons, prod; -+ -+ if ( !cons_ring ) -+ return 0; -+ -+ spin_lock(&rx_lock); -+ -+ cons = cons_ring->out_cons; -+ prod = ACCESS_ONCE(cons_ring->out_prod); -+ -+ /* -+ * Latch pointers before accessing the ring. Included compiler barrier also -+ * ensures that pointers are really read only once into local variables. -+ */ -+ smp_rmb(); -+ -+ ASSERT((prod - cons) <= sizeof(cons_ring->out)); -+ -+ /* Is the ring empty? */ -+ if ( cons == prod ) -+ goto out; -+ -+ while ( cons != prod ) -+ { -+ char c = cons_ring->out[MASK_XENCONS_IDX(cons++, cons_ring->out)]; -+ -+ buf[idx++] = c; -+ recv++; -+ -+ if ( idx >= BUF_SZ ) -+ { -+ pv_console_puts(buf); -+ idx = 0; -+ } -+ } -+ -+ if ( idx ) -+ { -+ buf[idx] = '\0'; -+ pv_console_puts(buf); -+ } -+ -+ /* No need for a mem barrier because every character was already consumed */ -+ barrier(); -+ ACCESS_ONCE(cons_ring->out_cons) = cons; -+ pv_shim_inject_evtchn(pv_console_evtchn()); -+ -+ out: -+ spin_unlock(&rx_lock); -+ -+ return recv; -+} -+ -+/* Sends a character into a domain's PV console */ -+size_t consoled_guest_tx(char c) -+{ -+ size_t sent = 0; -+ XENCONS_RING_IDX cons, prod; -+ -+ if ( !cons_ring ) -+ return 0; -+ -+ cons = ACCESS_ONCE(cons_ring->in_cons); -+ prod = cons_ring->in_prod; -+ -+ /* -+ * Latch pointers before accessing the ring. Included compiler barrier also -+ * ensures that pointers are really read only once into local variables. -+ */ -+ smp_rmb(); -+ -+ ASSERT((prod - cons) <= sizeof(cons_ring->in)); -+ -+ /* Is the ring out of space? */ -+ if ( sizeof(cons_ring->in) - (prod - cons) == 0 ) -+ goto notify; -+ -+ cons_ring->in[MASK_XENCONS_IDX(prod++, cons_ring->in)] = c; -+ sent++; -+ -+ /* Write to the ring before updating the pointer */ -+ smp_wmb(); -+ ACCESS_ONCE(cons_ring->in_prod) = prod; -+ -+ notify: -+ /* Always notify the guest: prevents receive path from getting stuck. */ -+ pv_shim_inject_evtchn(pv_console_evtchn()); -+ -+ return sent; -+} -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ -diff --git a/xen/include/xen/consoled.h b/xen/include/xen/consoled.h -new file mode 100644 -index 0000000000..fd5d220a8a ---- /dev/null -+++ b/xen/include/xen/consoled.h -@@ -0,0 +1,27 @@ -+#ifndef __XEN_CONSOLED_H__ -+#define __XEN_CONSOLED_H__ -+ -+#include -+ -+#ifdef CONFIG_PV_SHIM -+ -+void consoled_set_ring_addr(struct xencons_interface *ring); -+struct xencons_interface *consoled_get_ring_addr(void); -+size_t consoled_guest_rx(void); -+size_t consoled_guest_tx(char c); -+ -+#else -+ -+size_t consoled_guest_tx(char c) { return 0; } -+ -+#endif /* !CONFIG_PV_SHIM */ -+#endif /* __XEN_CONSOLED_H__ */ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ --- -2.14.3 - - -From 83c838c9f853712ac5d36c9dc001eb8903b1e1e2 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:19 +0000 -Subject: [PATCH 61/77] xen/pvshim: add migration support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Roger Pau Monné ---- -Changes since v1: - - Use bitmap_zero instead of memset. - - Don't drop the __init attribute of unshare_xen_page_with_guest, - it's not needed for migration. - - Remove BUG_ON to check correct mapping, map_domain_page cannot - fail. - - Reduce indentation level of pv_shim_shutdown. ---- - xen/arch/x86/guest/xen.c | 29 +++++++ - xen/arch/x86/pv/shim.c | 155 +++++++++++++++++++++++++++++++++++++- - xen/common/domain.c | 11 ++- - xen/common/schedule.c | 3 +- - xen/drivers/char/xen_pv_console.c | 2 +- - xen/include/asm-x86/guest/xen.h | 5 ++ - xen/include/asm-x86/pv/shim.h | 5 +- - xen/include/xen/sched.h | 2 +- - 8 files changed, 197 insertions(+), 15 deletions(-) - -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index 57b297ad47..2a5554ab26 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -348,6 +348,35 @@ uint32_t hypervisor_cpuid_base(void) - return xen_cpuid_base; - } - -+static void ap_resume(void *unused) -+{ -+ map_vcpuinfo(); -+ init_evtchn(); -+} -+ -+void hypervisor_resume(void) -+{ -+ /* Reset shared info page. */ -+ map_shared_info(); -+ -+ /* -+ * Reset vcpu_info. Just clean the mapped bitmap and try to map the vcpu -+ * area again. On failure to map (when it was previously mapped) panic -+ * since it's impossible to safely shut down running guest vCPUs in order -+ * to meet the new XEN_LEGACY_MAX_VCPUS requirement. -+ */ -+ bitmap_zero(vcpu_info_mapped, NR_CPUS); -+ if ( map_vcpuinfo() && nr_cpu_ids > XEN_LEGACY_MAX_VCPUS ) -+ panic("unable to remap vCPU info and vCPUs > legacy limit"); -+ -+ /* Setup event channel upcall vector. */ -+ init_evtchn(); -+ smp_call_function(ap_resume, NULL, 1); -+ -+ if ( pv_console ) -+ pv_console_init(); -+} -+ - /* - * Local variables: - * mode: C -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -index 986f9da58a..c53a4ca407 100644 ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -160,10 +160,159 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - guest = d; - } - --void pv_shim_shutdown(uint8_t reason) -+static void write_start_info(struct domain *d) - { -- /* XXX: handle suspend */ -- xen_hypercall_shutdown(reason); -+ struct cpu_user_regs *regs = guest_cpu_user_regs(); -+ start_info_t *si = map_domain_page(_mfn(is_pv_32bit_domain(d) ? regs->edx -+ : regs->rdx)); -+ uint64_t param; -+ -+ snprintf(si->magic, sizeof(si->magic), "xen-3.0-x86_%s", -+ is_pv_32bit_domain(d) ? "32p" : "64"); -+ si->nr_pages = d->tot_pages; -+ si->shared_info = virt_to_maddr(d->shared_info); -+ si->flags = 0; -+ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_STORE_PFN, &si->store_mfn)); -+ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_STORE_EVTCHN, ¶m)); -+ si->store_evtchn = param; -+ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_EVTCHN, ¶m)); -+ si->console.domU.evtchn = param; -+ if ( pv_console ) -+ si->console.domU.mfn = virt_to_mfn(consoled_get_ring_addr()); -+ else if ( xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_PFN, -+ &si->console.domU.mfn) ) -+ BUG(); -+ -+ if ( is_pv_32bit_domain(d) ) -+ xlat_start_info(si, XLAT_start_info_console_domU); -+ -+ unmap_domain_page(si); -+} -+ -+int pv_shim_shutdown(uint8_t reason) -+{ -+ struct domain *d = current->domain; -+ struct vcpu *v; -+ unsigned int i; -+ uint64_t old_store_pfn, old_console_pfn = 0, store_pfn, console_pfn; -+ uint64_t store_evtchn, console_evtchn; -+ long rc; -+ -+ if ( reason != SHUTDOWN_suspend ) -+ /* Forward to L0. */ -+ return xen_hypercall_shutdown(reason); -+ -+ BUG_ON(current->vcpu_id != 0); -+ -+ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_STORE_PFN, &old_store_pfn)); -+ if ( !pv_console ) -+ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_PFN, -+ &old_console_pfn)); -+ -+ /* Pause the other vcpus before starting the migration. */ -+ for_each_vcpu(d, v) -+ if ( v != current ) -+ vcpu_pause_by_systemcontroller(v); -+ -+ rc = xen_hypercall_shutdown(SHUTDOWN_suspend); -+ if ( rc ) -+ { -+ for_each_vcpu(d, v) -+ if ( v != current ) -+ vcpu_unpause_by_systemcontroller(v); -+ -+ return rc; -+ } -+ -+ /* Resume the shim itself first. */ -+ hypervisor_resume(); -+ -+ /* -+ * ATM there's nothing Xen can do if the console/store pfn changes, -+ * because Xen won't have a page_info struct for it. -+ */ -+ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_STORE_PFN, &store_pfn)); -+ BUG_ON(old_store_pfn != store_pfn); -+ if ( !pv_console ) -+ { -+ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_PFN, -+ &console_pfn)); -+ BUG_ON(old_console_pfn != console_pfn); -+ } -+ -+ /* Update domain id. */ -+ d->domain_id = get_initial_domain_id(); -+ -+ /* Clean the iomem range. */ -+ BUG_ON(iomem_deny_access(d, 0, ~0UL)); -+ -+ /* Clean grant frames. */ -+ xfree(grant_frames); -+ grant_frames = NULL; -+ nr_grant_list = 0; -+ -+ /* Clean event channels. */ -+ for ( i = 0; i < EVTCHN_2L_NR_CHANNELS; i++ ) -+ { -+ if ( !port_is_valid(d, i) ) -+ continue; -+ -+ if ( evtchn_handled(d, i) ) -+ evtchn_close(d, i, false); -+ else -+ evtchn_free(d, evtchn_from_port(d, i)); -+ } -+ -+ /* Reserve store/console event channel. */ -+ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_STORE_EVTCHN, &store_evtchn)); -+ BUG_ON(evtchn_allocate_port(d, store_evtchn)); -+ evtchn_reserve(d, store_evtchn); -+ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_EVTCHN, -+ &console_evtchn)); -+ BUG_ON(evtchn_allocate_port(d, console_evtchn)); -+ evtchn_reserve(d, console_evtchn); -+ -+ /* Clean watchdogs. */ -+ watchdog_domain_destroy(d); -+ watchdog_domain_init(d); -+ -+ /* Clean the PIRQ EOI page. */ -+ if ( d->arch.pirq_eoi_map != NULL ) -+ { -+ unmap_domain_page_global(d->arch.pirq_eoi_map); -+ put_page_and_type(mfn_to_page(d->arch.pirq_eoi_map_mfn)); -+ d->arch.pirq_eoi_map = NULL; -+ d->arch.pirq_eoi_map_mfn = 0; -+ d->arch.auto_unmask = 0; -+ } -+ -+ /* -+ * NB: there's no need to fixup the p2m, since the mfns assigned -+ * to the PV guest have not changed at all. Just re-write the -+ * start_info fields with the appropriate value. -+ */ -+ write_start_info(d); -+ -+ for_each_vcpu(d, v) -+ { -+ /* Unmap guest vcpu_info pages. */ -+ unmap_vcpu_info(v); -+ -+ /* Reset the periodic timer to the default value. */ -+ v->periodic_period = MILLISECS(10); -+ /* Stop the singleshot timer. */ -+ stop_timer(&v->singleshot_timer); -+ -+ if ( test_bit(_VPF_down, &v->pause_flags) ) -+ BUG_ON(vcpu_reset(v)); -+ -+ if ( v != current ) -+ vcpu_unpause_by_systemcontroller(v); -+ else -+ vcpu_force_reschedule(v); -+ } -+ -+ return 0; - } - - static long pv_shim_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) -diff --git a/xen/common/domain.c b/xen/common/domain.c -index 1ba05fa3a1..9a703734eb 100644 ---- a/xen/common/domain.c -+++ b/xen/common/domain.c -@@ -689,16 +689,13 @@ void __domain_crash_synchronous(void) - } - - --void domain_shutdown(struct domain *d, u8 reason) -+int domain_shutdown(struct domain *d, u8 reason) - { - struct vcpu *v; - - #ifdef CONFIG_X86 - if ( pv_shim ) -- { -- pv_shim_shutdown(reason); -- return; -- } -+ return pv_shim_shutdown(reason); - #endif - - spin_lock(&d->shutdown_lock); -@@ -713,7 +710,7 @@ void domain_shutdown(struct domain *d, u8 reason) - if ( d->is_shutting_down ) - { - spin_unlock(&d->shutdown_lock); -- return; -+ return 0; - } - - d->is_shutting_down = 1; -@@ -735,6 +732,8 @@ void domain_shutdown(struct domain *d, u8 reason) - __domain_finalise_shutdown(d); - - spin_unlock(&d->shutdown_lock); -+ -+ return 0; - } - - void domain_resume(struct domain *d) -diff --git a/xen/common/schedule.c b/xen/common/schedule.c -index 88279213e8..b7884263f2 100644 ---- a/xen/common/schedule.c -+++ b/xen/common/schedule.c -@@ -1149,11 +1149,10 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) - if ( copy_from_guest(&sched_shutdown, arg, 1) ) - break; - -- ret = 0; - TRACE_3D(TRC_SCHED_SHUTDOWN, - current->domain->domain_id, current->vcpu_id, - sched_shutdown.reason); -- domain_shutdown(current->domain, (u8)sched_shutdown.reason); -+ ret = domain_shutdown(current->domain, (u8)sched_shutdown.reason); - - break; - } -diff --git a/xen/drivers/char/xen_pv_console.c b/xen/drivers/char/xen_pv_console.c -index 948343303e..cc1c1d743f 100644 ---- a/xen/drivers/char/xen_pv_console.c -+++ b/xen/drivers/char/xen_pv_console.c -@@ -37,7 +37,7 @@ static DEFINE_SPINLOCK(tx_lock); - - bool pv_console; - --void __init pv_console_init(void) -+void pv_console_init(void) - { - long r; - uint64_t raw_pfn = 0, raw_evtchn = 0; -diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h -index ac48dcbe44..11243fe60d 100644 ---- a/xen/include/asm-x86/guest/xen.h -+++ b/xen/include/asm-x86/guest/xen.h -@@ -39,6 +39,7 @@ int hypervisor_free_unused_page(mfn_t mfn); - void hypervisor_fixup_e820(struct e820map *e820); - const unsigned long *hypervisor_reserved_pages(unsigned int *size); - uint32_t hypervisor_cpuid_base(void); -+void hypervisor_resume(void); - - DECLARE_PER_CPU(unsigned int, vcpu_id); - DECLARE_PER_CPU(struct vcpu_info *, vcpu_info); -@@ -72,6 +73,10 @@ static inline uint32_t hypervisor_cpuid_base(void) - ASSERT_UNREACHABLE(); - return 0; - }; -+static inline void hypervisor_resume(void) -+{ -+ ASSERT_UNREACHABLE(); -+}; - - #endif /* CONFIG_XEN_GUEST */ - #endif /* __X86_GUEST_XEN_H__ */ -diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h -index ab656fd854..4d5f0b43fc 100644 ---- a/xen/include/asm-x86/pv/shim.h -+++ b/xen/include/asm-x86/pv/shim.h -@@ -35,7 +35,7 @@ void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - unsigned long va_start, unsigned long store_va, - unsigned long console_va, unsigned long vphysmap, - start_info_t *si); --void pv_shim_shutdown(uint8_t reason); -+int pv_shim_shutdown(uint8_t reason); - void pv_shim_inject_evtchn(unsigned int port); - domid_t get_initial_domain_id(void); - -@@ -50,9 +50,10 @@ static inline void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - { - ASSERT_UNREACHABLE(); - } --static inline void pv_shim_shutdown(uint8_t reason) -+static inline int pv_shim_shutdown(uint8_t reason) - { - ASSERT_UNREACHABLE(); -+ return 0; - } - static inline void pv_shim_inject_evtchn(unsigned int port) - { -diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h -index 64abc1df6c..2541ecb04f 100644 ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -603,7 +603,7 @@ static inline struct domain *rcu_lock_current_domain(void) - struct domain *get_domain_by_id(domid_t dom); - void domain_destroy(struct domain *d); - int domain_kill(struct domain *d); --void domain_shutdown(struct domain *d, u8 reason); -+int domain_shutdown(struct domain *d, u8 reason); - void domain_resume(struct domain *d); - void domain_pause_for_debugger(void); - --- -2.14.3 - - -From 7dcc20e0c8cf6fa30f483b0c91c8566a97a61031 Mon Sep 17 00:00:00 2001 -From: Sergey Dyasli -Date: Thu, 11 Jan 2018 11:41:20 +0000 -Subject: [PATCH 62/77] xen/pvshim: add shim_mem cmdline parameter - -Signed-off-by: Sergey Dyasli ---- - docs/misc/xen-command-line.markdown | 16 +++++++++++++ - xen/arch/x86/dom0_build.c | 18 ++++++++++++++- - xen/arch/x86/pv/shim.c | 46 +++++++++++++++++++++++++++++++++++++ - xen/include/asm-x86/pv/shim.h | 6 +++++ - 4 files changed, 85 insertions(+), 1 deletion(-) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 3a1a9c1fba..9f51710a46 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -686,6 +686,8 @@ any dom0 autoballooning feature present in your toolstack. See the - _xl.conf(5)_ man page or [Xen Best - Practices](http://wiki.xen.org/wiki/Xen_Best_Practices#Xen_dom0_dedicated_memory_and_preventing_dom0_memory_ballooning). - -+This option doesn't have effect if pv-shim mode is enabled. -+ - ### dom0\_nodes - - > `= List of [ | relaxed | strict ]` -@@ -1456,6 +1458,20 @@ guest compatibly inside an HVM container. - In this mode, the kernel and initrd passed as modules to the hypervisor are - constructed into a plain unprivileged PV domain. - -+### shim\_mem (x86) -+> `= List of ( min: | max: | )` -+ -+Set the amount of memory that xen-shim reserves for itself. Only has effect -+if pv-shim mode is enabled. -+ -+* `min:` specifies the minimum amount of memory. Ignored if greater -+ than max. Default: 10M. -+* `max:` specifies the maximum amount of memory. Default: 128M. -+* `` specifies the exact amount of memory. Overrides both min and max. -+ -+By default, 1/16th of total HVM container's memory is reserved for xen-shim -+with minimum amount being 10MB and maximum amount 128MB. -+ - ### rcu-idle-timer-period-ms - > `= ` - -diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c -index 452298c624..bc713fb2b5 100644 ---- a/xen/arch/x86/dom0_build.c -+++ b/xen/arch/x86/dom0_build.c -@@ -51,6 +51,13 @@ static long __init parse_amt(const char *s, const char **ps) - - static int __init parse_dom0_mem(const char *s) - { -+ /* xen-shim uses shim_mem parameter instead of dom0_mem */ -+ if ( pv_shim ) -+ { -+ printk("Ignoring dom0_mem param in pv-shim mode\n"); -+ return 0; -+ } -+ - do { - if ( !strncmp(s, "min:", 4) ) - dom0_min_nrpages = parse_amt(s+4, &s); -@@ -284,7 +291,16 @@ unsigned long __init dom0_compute_nr_pages( - * maximum of 128MB. - */ - if ( nr_pages == 0 ) -- nr_pages = -min(avail / 16, 128UL << (20 - PAGE_SHIFT)); -+ { -+ uint64_t rsvd = min(avail / 16, 128UL << (20 - PAGE_SHIFT)); -+ if ( pv_shim ) -+ { -+ rsvd = pv_shim_mem(avail); -+ printk("Reserved %lu pages for xen-shim\n", rsvd); -+ -+ } -+ nr_pages = -rsvd; -+ } - - /* Negative specification means "all memory - specified amount". */ - if ( (long)nr_pages < 0 ) nr_pages += avail; -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -index c53a4ca407..6dc1ee45d7 100644 ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -53,6 +53,52 @@ static long pv_shim_grant_table_op(unsigned int cmd, - XEN_GUEST_HANDLE_PARAM(void) uop, - unsigned int count); - -+/* -+ * By default, 1/16th of total HVM container's memory is reserved for xen-shim -+ * with minimum amount being 10MB and maximum amount 128MB. Some users may wish -+ * to tune this constants for better memory utilization. This can be achieved -+ * using the following xen-shim's command line option: -+ * -+ * shim_mem=[min:,][max:,][] -+ * -+ * : The minimum amount of memory that should be allocated for xen-shim -+ * (ignored if greater than max) -+ * : The maximum amount of memory that should be allocated for xen-shim -+ * : The precise amount of memory to allocate for xen-shim -+ * (overrides both min and max) -+ */ -+static uint64_t __initdata shim_nrpages; -+static uint64_t __initdata shim_min_nrpages = 10UL << (20 - PAGE_SHIFT); -+static uint64_t __initdata shim_max_nrpages = 128UL << (20 - PAGE_SHIFT); -+ -+static int __init parse_shim_mem(const char *s) -+{ -+ do { -+ if ( !strncmp(s, "min:", 4) ) -+ shim_min_nrpages = parse_size_and_unit(s+4, &s) >> PAGE_SHIFT; -+ else if ( !strncmp(s, "max:", 4) ) -+ shim_max_nrpages = parse_size_and_unit(s+4, &s) >> PAGE_SHIFT; -+ else -+ shim_nrpages = parse_size_and_unit(s, &s) >> PAGE_SHIFT; -+ } while ( *s++ == ',' ); -+ -+ return s[-1] ? -EINVAL : 0; -+} -+custom_param("shim_mem", parse_shim_mem); -+ -+uint64_t pv_shim_mem(uint64_t avail) -+{ -+ uint64_t rsvd = min(avail / 16, shim_max_nrpages); -+ -+ if ( shim_nrpages ) -+ return shim_nrpages; -+ -+ if ( shim_min_nrpages <= shim_max_nrpages ) -+ rsvd = max(rsvd, shim_min_nrpages); -+ -+ return rsvd; -+} -+ - #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER| \ - _PAGE_GUEST_KERNEL) - #define COMPAT_L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) -diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h -index 4d5f0b43fc..0ef452158e 100644 ---- a/xen/include/asm-x86/pv/shim.h -+++ b/xen/include/asm-x86/pv/shim.h -@@ -38,6 +38,7 @@ void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - int pv_shim_shutdown(uint8_t reason); - void pv_shim_inject_evtchn(unsigned int port); - domid_t get_initial_domain_id(void); -+uint64_t pv_shim_mem(uint64_t avail); - - #else - -@@ -63,6 +64,11 @@ static inline domid_t get_initial_domain_id(void) - { - return 0; - } -+static inline uint64_t pv_shim_mem(uint64_t avail) -+{ -+ ASSERT_UNREACHABLE(); -+ return 0; -+} - - #endif - --- -2.14.3 - - -From 004646a1dd4ff2f768d942689545dd3b6e2135e2 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:20 +0000 -Subject: [PATCH 63/77] xen/pvshim: set max_pages to the value of tot_pages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -So that the guest is not able to deplete the memory pool of the shim -itself by trying to balloon up. - -Signed-off-by: Roger Pau Monné -Acked-by: Jan Beulich ---- - xen/arch/x86/pv/shim.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -index 6dc1ee45d7..e3e101a5b1 100644 ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -204,6 +204,12 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - (hypercall_fn_t *)pv_shim_grant_table_op, - (hypercall_fn_t *)pv_shim_grant_table_op); - guest = d; -+ -+ /* -+ * Set the max pages to the current number of pages to prevent the -+ * guest from depleting the shim memory pool. -+ */ -+ d->max_pages = d->tot_pages; - } - - static void write_start_info(struct domain *d) --- -2.14.3 - - -From 5b6c3ffa1d291724a329b57658783fc30b93b479 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:20 +0000 -Subject: [PATCH 64/77] xen/pvshim: support vCPU hotplug -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Roger Pau Monné ---- -Changes since v1: - - Fix hotplug so that v->initialized is checked before attempting to - bring up the physical CPU. - - Fix ARM compilation. ---- - xen/arch/x86/pv/shim.c | 63 +++++++++++++++++++++++++++++++++++++++++++ - xen/common/domain.c | 38 +++++++++++++++++--------- - xen/include/asm-x86/pv/shim.h | 12 +++++++++ - xen/include/xen/domain.h | 1 + - 4 files changed, 102 insertions(+), 12 deletions(-) - -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -index e3e101a5b1..68ec7bed8e 100644 ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -751,6 +751,69 @@ static long pv_shim_grant_table_op(unsigned int cmd, - return rc; - } - -+long pv_shim_cpu_up(void *data) -+{ -+ struct vcpu *v = data; -+ struct domain *d = v->domain; -+ bool wake; -+ -+ BUG_ON(smp_processor_id() != 0); -+ -+ domain_lock(d); -+ if ( !v->is_initialised ) -+ { -+ domain_unlock(d); -+ return -EINVAL; -+ } -+ -+ if ( !cpu_online(v->vcpu_id) ) -+ { -+ long rc = cpu_up_helper((void *)(unsigned long)v->vcpu_id); -+ -+ if ( rc ) -+ { -+ domain_unlock(d); -+ gprintk(XENLOG_ERR, "Failed to bring up CPU#%u: %ld\n", -+ v->vcpu_id, rc); -+ return rc; -+ } -+ } -+ -+ wake = test_and_clear_bit(_VPF_down, &v->pause_flags); -+ domain_unlock(d); -+ if ( wake ) -+ vcpu_wake(v); -+ -+ return 0; -+} -+ -+long pv_shim_cpu_down(void *data) -+{ -+ struct vcpu *v = data; -+ long rc; -+ -+ BUG_ON(smp_processor_id() != 0); -+ -+ if ( !test_and_set_bit(_VPF_down, &v->pause_flags) ) -+ vcpu_sleep_sync(v); -+ -+ if ( cpu_online(v->vcpu_id) ) -+ { -+ rc = cpu_down_helper((void *)(unsigned long)v->vcpu_id); -+ if ( rc ) -+ gprintk(XENLOG_ERR, "Failed to bring down CPU#%u: %ld\n", -+ v->vcpu_id, rc); -+ /* -+ * NB: do not propagate errors from cpu_down_helper failing. The shim -+ * is going to run with extra CPUs, but that's not going to prevent -+ * normal operation. OTOH most guests are not prepared to handle an -+ * error on VCPUOP_down failing, and will likely panic. -+ */ -+ } -+ -+ return 0; -+} -+ - domid_t get_initial_domain_id(void) - { - uint32_t eax, ebx, ecx, edx; -diff --git a/xen/common/domain.c b/xen/common/domain.c -index 9a703734eb..8fbd33d4c6 100644 ---- a/xen/common/domain.c -+++ b/xen/common/domain.c -@@ -1293,22 +1293,36 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg) - - break; - -- case VCPUOP_up: { -- bool_t wake = 0; -- domain_lock(d); -- if ( !v->is_initialised ) -- rc = -EINVAL; -+ case VCPUOP_up: -+#ifdef CONFIG_X86 -+ if ( pv_shim ) -+ rc = continue_hypercall_on_cpu(0, pv_shim_cpu_up, v); - else -- wake = test_and_clear_bit(_VPF_down, &v->pause_flags); -- domain_unlock(d); -- if ( wake ) -- vcpu_wake(v); -+#endif -+ { -+ bool wake = false; -+ -+ domain_lock(d); -+ if ( !v->is_initialised ) -+ rc = -EINVAL; -+ else -+ wake = test_and_clear_bit(_VPF_down, &v->pause_flags); -+ domain_unlock(d); -+ if ( wake ) -+ vcpu_wake(v); -+ } -+ - break; -- } - - case VCPUOP_down: -- if ( !test_and_set_bit(_VPF_down, &v->pause_flags) ) -- vcpu_sleep_nosync(v); -+#ifdef CONFIG_X86 -+ if ( pv_shim ) -+ rc = continue_hypercall_on_cpu(0, pv_shim_cpu_down, v); -+ else -+#endif -+ if ( !test_and_set_bit(_VPF_down, &v->pause_flags) ) -+ vcpu_sleep_nosync(v); -+ - break; - - case VCPUOP_is_up: -diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h -index 0ef452158e..eb59ddd38a 100644 ---- a/xen/include/asm-x86/pv/shim.h -+++ b/xen/include/asm-x86/pv/shim.h -@@ -37,6 +37,8 @@ void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start, - start_info_t *si); - int pv_shim_shutdown(uint8_t reason); - void pv_shim_inject_evtchn(unsigned int port); -+long pv_shim_cpu_up(void *data); -+long pv_shim_cpu_down(void *data); - domid_t get_initial_domain_id(void); - uint64_t pv_shim_mem(uint64_t avail); - -@@ -60,6 +62,16 @@ static inline void pv_shim_inject_evtchn(unsigned int port) - { - ASSERT_UNREACHABLE(); - } -+static inline long pv_shim_cpu_up(void *data) -+{ -+ ASSERT_UNREACHABLE(); -+ return 0; -+} -+static inline long pv_shim_cpu_down(void *data) -+{ -+ ASSERT_UNREACHABLE(); -+ return 0; -+} - static inline domid_t get_initial_domain_id(void) - { - return 0; -diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h -index 347f264047..eb62f1dab1 100644 ---- a/xen/include/xen/domain.h -+++ b/xen/include/xen/domain.h -@@ -17,6 +17,7 @@ struct vcpu *alloc_vcpu( - struct domain *d, unsigned int vcpu_id, unsigned int cpu_id); - struct vcpu *alloc_dom0_vcpu0(struct domain *dom0); - int vcpu_reset(struct vcpu *); -+int vcpu_up(struct vcpu *v); - - struct xen_domctl_getdomaininfo; - void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info); --- -2.14.3 - - -From 29dd3142bf7115d45836a6de7a72c17a4dac7cc8 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:20 +0000 -Subject: [PATCH 65/77] xen/pvshim: memory hotplug -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Roger Pau Monné ---- -Changes since v1: - - Add an order parameter to batch_memory_op. - - Add a TODO item regarding high order memory chunks to - pv_shim_online_memory. - - Use page_list_splice. - - Make sure the shim handlers are not called multiple times when - the hypercall is preempted. ---- - xen/arch/x86/pv/shim.c | 112 ++++++++++++++++++++++++++++++++++++++++++ - xen/common/memory.c | 21 ++++++++ - xen/include/asm-x86/pv/shim.h | 10 ++++ - 3 files changed, 143 insertions(+) - -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -index 68ec7bed8e..4120cc550e 100644 ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -48,6 +48,9 @@ static unsigned int nr_grant_list; - static unsigned long *grant_frames; - static DEFINE_SPINLOCK(grant_lock); - -+static PAGE_LIST_HEAD(balloon); -+static DEFINE_SPINLOCK(balloon_lock); -+ - static long pv_shim_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg); - static long pv_shim_grant_table_op(unsigned int cmd, - XEN_GUEST_HANDLE_PARAM(void) uop, -@@ -814,6 +817,115 @@ long pv_shim_cpu_down(void *data) - return 0; - } - -+static unsigned long batch_memory_op(unsigned int cmd, unsigned int order, -+ const struct page_list_head *list) -+{ -+ struct xen_memory_reservation xmr = { -+ .domid = DOMID_SELF, -+ .extent_order = order, -+ }; -+ unsigned long pfns[64]; -+ const struct page_info *pg; -+ unsigned long done = 0; -+ -+ set_xen_guest_handle(xmr.extent_start, pfns); -+ page_list_for_each ( pg, list ) -+ { -+ pfns[xmr.nr_extents++] = page_to_mfn(pg); -+ if ( xmr.nr_extents == ARRAY_SIZE(pfns) || !page_list_next(pg, list) ) -+ { -+ long nr = xen_hypercall_memory_op(cmd, &xmr); -+ -+ done += nr > 0 ? nr : 0; -+ if ( nr != xmr.nr_extents ) -+ break; -+ xmr.nr_extents = 0; -+ } -+ } -+ -+ return done; -+} -+ -+void pv_shim_online_memory(unsigned int nr, unsigned int order) -+{ -+ struct page_info *page, *tmp; -+ PAGE_LIST_HEAD(list); -+ -+ spin_lock(&balloon_lock); -+ page_list_for_each_safe ( page, tmp, &balloon ) -+ { -+ /* TODO: add support for splitting high order memory chunks. */ -+ if ( page->v.free.order != order ) -+ continue; -+ -+ page_list_del(page, &balloon); -+ page_list_add_tail(page, &list); -+ if ( !--nr ) -+ break; -+ } -+ spin_unlock(&balloon_lock); -+ -+ if ( nr ) -+ gprintk(XENLOG_WARNING, -+ "failed to allocate %u extents of order %u for onlining\n", -+ nr, order); -+ -+ nr = batch_memory_op(XENMEM_populate_physmap, order, &list); -+ while ( nr-- ) -+ { -+ BUG_ON((page = page_list_remove_head(&list)) == NULL); -+ free_domheap_pages(page, order); -+ } -+ -+ if ( !page_list_empty(&list) ) -+ { -+ gprintk(XENLOG_WARNING, -+ "failed to online some of the memory regions\n"); -+ spin_lock(&balloon_lock); -+ page_list_splice(&list, &balloon); -+ spin_unlock(&balloon_lock); -+ } -+} -+ -+void pv_shim_offline_memory(unsigned int nr, unsigned int order) -+{ -+ struct page_info *page; -+ PAGE_LIST_HEAD(list); -+ -+ while ( nr-- ) -+ { -+ page = alloc_domheap_pages(NULL, order, 0); -+ if ( !page ) -+ break; -+ -+ page_list_add_tail(page, &list); -+ page->v.free.order = order; -+ } -+ -+ if ( nr + 1 ) -+ gprintk(XENLOG_WARNING, -+ "failed to reserve %u extents of order %u for offlining\n", -+ nr + 1, order); -+ -+ -+ nr = batch_memory_op(XENMEM_decrease_reservation, order, &list); -+ spin_lock(&balloon_lock); -+ while ( nr-- ) -+ { -+ BUG_ON((page = page_list_remove_head(&list)) == NULL); -+ page_list_add_tail(page, &balloon); -+ } -+ spin_unlock(&balloon_lock); -+ -+ if ( !page_list_empty(&list) ) -+ { -+ gprintk(XENLOG_WARNING, -+ "failed to offline some of the memory regions\n"); -+ while ( (page = page_list_remove_head(&list)) != NULL ) -+ free_domheap_pages(page, order); -+ } -+} -+ - domid_t get_initial_domain_id(void) - { - uint32_t eax, ebx, ecx, edx; -diff --git a/xen/common/memory.c b/xen/common/memory.c -index a6ba33fdcb..9eed96a9ce 100644 ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -29,6 +29,10 @@ - #include - #include - -+#ifdef CONFIG_X86 -+#include -+#endif -+ - struct memop_args { - /* INPUT */ - struct domain *domain; /* Domain to be affected. */ -@@ -1019,6 +1023,12 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) - return start_extent; - } - -+#ifdef CONFIG_X86 -+ if ( pv_shim && op != XENMEM_decrease_reservation && !args.preempted ) -+ /* Avoid calling pv_shim_online_memory when preempted. */ -+ pv_shim_online_memory(args.nr_extents, args.extent_order); -+#endif -+ - switch ( op ) - { - case XENMEM_increase_reservation: -@@ -1041,6 +1051,17 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) - __HYPERVISOR_memory_op, "lh", - op | (rc << MEMOP_EXTENT_SHIFT), arg); - -+#ifdef CONFIG_X86 -+ if ( pv_shim && op == XENMEM_decrease_reservation ) -+ /* -+ * Only call pv_shim_offline_memory when the hypercall has -+ * finished. Note that nr_done is used to cope in case the -+ * hypercall has failed and only part of the extents where -+ * processed. -+ */ -+ pv_shim_offline_memory(args.nr_extents, args.nr_done); -+#endif -+ - break; - - case XENMEM_exchange: -diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h -index eb59ddd38a..fb739772df 100644 ---- a/xen/include/asm-x86/pv/shim.h -+++ b/xen/include/asm-x86/pv/shim.h -@@ -39,6 +39,8 @@ int pv_shim_shutdown(uint8_t reason); - void pv_shim_inject_evtchn(unsigned int port); - long pv_shim_cpu_up(void *data); - long pv_shim_cpu_down(void *data); -+void pv_shim_online_memory(unsigned int nr, unsigned int order); -+void pv_shim_offline_memory(unsigned int nr, unsigned int order); - domid_t get_initial_domain_id(void); - uint64_t pv_shim_mem(uint64_t avail); - -@@ -72,6 +74,14 @@ static inline long pv_shim_cpu_down(void *data) - ASSERT_UNREACHABLE(); - return 0; - } -+static inline void pv_shim_online_memory(unsigned int nr, unsigned int order) -+{ -+ ASSERT_UNREACHABLE(); -+} -+static inline void pv_shim_offline_memory(unsigned int nr, unsigned int order) -+{ -+ ASSERT_UNREACHABLE(); -+} - static inline domid_t get_initial_domain_id(void) - { - return 0; --- -2.14.3 - - -From 9d60bc96bef01444e30a9653ebf06b24c5bc8be5 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:20 +0000 -Subject: [PATCH 66/77] xen/shim: modify shim_mem parameter behaviour -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -shim_mem will now account for both the memory used by the hypervisor -loaded in memory and the free memory slack given to the shim for -runtime usage. - -From experimental testing it seems like the total amount of MiB used -by the shim (giving it ~1MB of free memory for runtime) is: - -memory/113 + 20 - -Signed-off-by: Roger Pau Monné ---- - docs/misc/xen-command-line.markdown | 13 +++++++------ - xen/arch/x86/dom0_build.c | 14 +++----------- - xen/arch/x86/pv/shim.c | 30 +++++++++++++++++++----------- - 3 files changed, 29 insertions(+), 28 deletions(-) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 9f51710a46..68ec52b5c2 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -1461,16 +1461,17 @@ constructed into a plain unprivileged PV domain. - ### shim\_mem (x86) - > `= List of ( min: | max: | )` - --Set the amount of memory that xen-shim reserves for itself. Only has effect --if pv-shim mode is enabled. -+Set the amount of memory that xen-shim uses. Only has effect if pv-shim mode is -+enabled. Note that this value accounts for the memory used by the shim itself -+plus the free memory slack given to the shim for runtime allocations. - - * `min:` specifies the minimum amount of memory. Ignored if greater -- than max. Default: 10M. --* `max:` specifies the maximum amount of memory. Default: 128M. -+ than max. -+* `max:` specifies the maximum amount of memory. - * `` specifies the exact amount of memory. Overrides both min and max. - --By default, 1/16th of total HVM container's memory is reserved for xen-shim --with minimum amount being 10MB and maximum amount 128MB. -+By default, the amount of free memory slack given to the shim for runtime usage -+is 1MB. - - ### rcu-idle-timer-period-ms - > `= ` -diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c -index bc713fb2b5..d77c6b40de 100644 ---- a/xen/arch/x86/dom0_build.c -+++ b/xen/arch/x86/dom0_build.c -@@ -290,17 +290,9 @@ unsigned long __init dom0_compute_nr_pages( - * for things like DMA buffers. This reservation is clamped to a - * maximum of 128MB. - */ -- if ( nr_pages == 0 ) -- { -- uint64_t rsvd = min(avail / 16, 128UL << (20 - PAGE_SHIFT)); -- if ( pv_shim ) -- { -- rsvd = pv_shim_mem(avail); -- printk("Reserved %lu pages for xen-shim\n", rsvd); -- -- } -- nr_pages = -rsvd; -- } -+ if ( !nr_pages ) -+ nr_pages = -(pv_shim ? pv_shim_mem(avail) -+ : min(avail / 16, 128UL << (20 - PAGE_SHIFT))); - - /* Negative specification means "all memory - specified amount". */ - if ( (long)nr_pages < 0 ) nr_pages += avail; -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -index 4120cc550e..702249719e 100644 ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -57,9 +57,8 @@ static long pv_shim_grant_table_op(unsigned int cmd, - unsigned int count); - - /* -- * By default, 1/16th of total HVM container's memory is reserved for xen-shim -- * with minimum amount being 10MB and maximum amount 128MB. Some users may wish -- * to tune this constants for better memory utilization. This can be achieved -+ * By default give the shim 1MB of free memory slack. Some users may wish to -+ * tune this constants for better memory utilization. This can be achieved - * using the following xen-shim's command line option: - * - * shim_mem=[min:,][max:,][] -@@ -71,8 +70,8 @@ static long pv_shim_grant_table_op(unsigned int cmd, - * (overrides both min and max) - */ - static uint64_t __initdata shim_nrpages; --static uint64_t __initdata shim_min_nrpages = 10UL << (20 - PAGE_SHIFT); --static uint64_t __initdata shim_max_nrpages = 128UL << (20 - PAGE_SHIFT); -+static uint64_t __initdata shim_min_nrpages; -+static uint64_t __initdata shim_max_nrpages; - - static int __init parse_shim_mem(const char *s) - { -@@ -91,15 +90,24 @@ custom_param("shim_mem", parse_shim_mem); - - uint64_t pv_shim_mem(uint64_t avail) - { -- uint64_t rsvd = min(avail / 16, shim_max_nrpages); -+ if ( !shim_nrpages ) -+ { -+ shim_nrpages = max(shim_min_nrpages, -+ total_pages - avail + (1UL << (20 - PAGE_SHIFT))); -+ if ( shim_max_nrpages ) -+ shim_max_nrpages = min(shim_nrpages, shim_max_nrpages); -+ } -+ -+ if ( total_pages - avail > shim_nrpages ) -+ panic("pages used by shim > shim_nrpages (%#lx > %#lx)", -+ total_pages - avail, shim_nrpages); - -- if ( shim_nrpages ) -- return shim_nrpages; -+ shim_nrpages -= total_pages - avail; - -- if ( shim_min_nrpages <= shim_max_nrpages ) -- rsvd = max(rsvd, shim_min_nrpages); -+ printk("shim used pages %#lx reserving %#lx free pages\n", -+ total_pages - avail, shim_nrpages); - -- return rsvd; -+ return shim_nrpages; - } - - #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER| \ --- -2.14.3 - - -From b5be9c817d04b006886a0d7b87eacf7bd78f504d Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:20 +0000 -Subject: [PATCH 67/77] xen/pvshim: use default position for the m2p mappings -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When running a 32bit kernel as Dom0 on a 64bit hypervisor the -hypervisor will try to shrink the hypervisor hole to the minimum -needed, and thus requires the Dom0 to use XENMEM_machphys_mapping in -order to fetch the position of the start of the hypervisor virtual -mappings. - -Disable this feature when running as a PV shim, since some DomU -kernels don't implemented XENMEM_machphys_mapping and break if the m2p -doesn't begin at the default address. - -NB: support for the XENMEM_machphys_mapping was added in Linux by -commit 7e7750. - -Signed-off-by: Roger Pau Monné -Acked-by: Jan Beulich ---- - xen/arch/x86/pv/dom0_build.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c -index 72752b8656..ebcb47bf26 100644 ---- a/xen/arch/x86/pv/dom0_build.c -+++ b/xen/arch/x86/pv/dom0_build.c -@@ -398,7 +398,8 @@ int __init dom0_construct_pv(struct domain *d, - if ( parms.pae == XEN_PAE_EXTCR3 ) - set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist); - -- if ( (parms.virt_hv_start_low != UNSET_ADDR) && elf_32bit(&elf) ) -+ if ( !pv_shim && (parms.virt_hv_start_low != UNSET_ADDR) && -+ elf_32bit(&elf) ) - { - unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1; - value = (parms.virt_hv_start_low + mask) & ~mask; --- -2.14.3 - - -From c9083de0ae6b0f5b42e7f92f6d43edc3bd09d4f1 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:21 +0000 -Subject: [PATCH 68/77] xen/shim: crash instead of reboot in shim mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -All guest shutdown operations are forwarded to L0, so the only native -calls to machine_restart happen from crash related paths inside the -hypervisor, hence switch the reboot code to instead issue a crash -shutdown. - -Signed-off-by: Roger Pau Monné -Acked-by: Jan Beulich -[ wei: fix arm build ] -Signed-off-by: Wei Liu ---- -Changes since v1: - - Use the ternary operator. ---- - xen/arch/x86/shutdown.c | 7 ++++++- - xen/drivers/char/console.c | 4 ++++ - 2 files changed, 10 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/shutdown.c b/xen/arch/x86/shutdown.c -index 689f6f137d..a350714319 100644 ---- a/xen/arch/x86/shutdown.c -+++ b/xen/arch/x86/shutdown.c -@@ -642,7 +642,12 @@ void machine_restart(unsigned int delay_millisecs) - break; - - case BOOT_XEN: -- xen_hypercall_shutdown(SHUTDOWN_reboot); -+ /* -+ * When running in PV shim mode guest shutdown calls are -+ * forwarded to L0, hence the only way to get here is if a -+ * shim crash happens. -+ */ -+ xen_hypercall_shutdown(pv_shim ? SHUTDOWN_crash : SHUTDOWN_reboot); - break; - } - } -diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c -index 18f5b7f7aa..121073c8ed 100644 ---- a/xen/drivers/char/console.c -+++ b/xen/drivers/char/console.c -@@ -1253,7 +1253,11 @@ void panic(const char *fmt, ...) - if ( opt_noreboot ) - printk("Manual reset required ('noreboot' specified)\n"); - else -+#ifdef CONFIG_X86 -+ printk("%s in five seconds...\n", pv_shim ? "Crash" : "Reboot"); -+#else - printk("Reboot in five seconds...\n"); -+#endif - - spin_unlock_irqrestore(&lock, flags); - --- -2.14.3 - - -From 321ef983a06bc14570b79da1ab60344e3feb2c2b Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 11 Jan 2018 11:41:21 +0000 -Subject: [PATCH 69/77] xen/shim: allow DomU to have as many vcpus as available -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Since the shim VCPUOP_{up/down} hypercall is wired to the plug/unplug -of CPUs to the shim itself, start the shim DomU with only the BSP -online, and let the guest bring up other CPUs as it needs them. - -Signed-off-by: Roger Pau Monné ---- -Changes since v1: - - Fix single line comment style. - - Print Dom%u d->domain_id. - - Change position of __start_xen comment. ---- - xen/arch/x86/dom0_build.c | 30 +++++++++++++++++++++++++++--- - xen/arch/x86/pv/dom0_build.c | 2 +- - xen/arch/x86/setup.c | 28 ++++++++++++++++++---------- - 3 files changed, 46 insertions(+), 14 deletions(-) - -diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c -index d77c6b40de..b4f4a4ac4a 100644 ---- a/xen/arch/x86/dom0_build.c -+++ b/xen/arch/x86/dom0_build.c -@@ -138,9 +138,18 @@ struct vcpu *__init dom0_setup_vcpu(struct domain *d, - - if ( v ) - { -- if ( !d->is_pinned && !dom0_affinity_relaxed ) -- cpumask_copy(v->cpu_hard_affinity, &dom0_cpus); -- cpumask_copy(v->cpu_soft_affinity, &dom0_cpus); -+ if ( pv_shim ) -+ { -+ -+ cpumask_setall(v->cpu_hard_affinity); -+ cpumask_setall(v->cpu_soft_affinity); -+ } -+ else -+ { -+ if ( !d->is_pinned && !dom0_affinity_relaxed ) -+ cpumask_copy(v->cpu_hard_affinity, &dom0_cpus); -+ cpumask_copy(v->cpu_soft_affinity, &dom0_cpus); -+ } - } - - return v; -@@ -153,6 +162,21 @@ unsigned int __init dom0_max_vcpus(void) - unsigned int i, max_vcpus, limit; - nodeid_t node; - -+ if ( pv_shim ) -+ { -+ nodes_setall(dom0_nodes); -+ -+ /* -+ * When booting in shim mode APs are not started until the guest brings -+ * other vCPUs up. -+ */ -+ cpumask_set_cpu(0, &dom0_cpus); -+ -+ /* On PV shim mode allow the guest to have as many CPUs as available. */ -+ return nr_cpu_ids; -+ } -+ -+ - for ( i = 0; i < dom0_nr_pxms; ++i ) - if ( (node = pxm_to_node(dom0_pxms[i])) != NUMA_NO_NODE ) - node_set(node, dom0_nodes); -diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c -index ebcb47bf26..5d8909fa13 100644 ---- a/xen/arch/x86/pv/dom0_build.c -+++ b/xen/arch/x86/pv/dom0_build.c -@@ -701,7 +701,7 @@ int __init dom0_construct_pv(struct domain *d, - for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ ) - shared_info(d, vcpu_info[i].evtchn_upcall_mask) = 1; - -- printk("Dom0 has maximum %u VCPUs\n", d->max_vcpus); -+ printk("Dom%u has maximum %u VCPUs\n", d->domain_id, d->max_vcpus); - - cpu = v->processor; - for ( i = 1; i < d->max_vcpus; i++ ) -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index 7091c38047..cf07e5045d 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -1584,18 +1584,26 @@ void __init noreturn __start_xen(unsigned long mbi_p) - - do_presmp_initcalls(); - -- for_each_present_cpu ( i ) -+ /* -+ * NB: when running as a PV shim VCPUOP_up/down is wired to the shim -+ * physical cpu_add/remove functions, so launch the guest with only -+ * the BSP online and let it bring up the other CPUs as required. -+ */ -+ if ( !pv_shim ) - { -- /* Set up cpu_to_node[]. */ -- srat_detect_node(i); -- /* Set up node_to_cpumask based on cpu_to_node[]. */ -- numa_add_cpu(i); -- -- if ( (num_online_cpus() < max_cpus) && !cpu_online(i) ) -+ for_each_present_cpu ( i ) - { -- int ret = cpu_up(i); -- if ( ret != 0 ) -- printk("Failed to bring up CPU %u (error %d)\n", i, ret); -+ /* Set up cpu_to_node[]. */ -+ srat_detect_node(i); -+ /* Set up node_to_cpumask based on cpu_to_node[]. */ -+ numa_add_cpu(i); -+ -+ if ( (num_online_cpus() < max_cpus) && !cpu_online(i) ) -+ { -+ int ret = cpu_up(i); -+ if ( ret != 0 ) -+ printk("Failed to bring up CPU %u (error %d)\n", i, ret); -+ } - } - } - --- -2.14.3 - - -From abdde49edc15cc4dc61356d7f3f8f52a2d14e2d8 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Thu, 14 Dec 2017 16:16:20 +0000 -Subject: [PATCH 70/77] libxl: pvshim: Provide first-class config settings to - enable shim mode - -This is API-compatible because old callers are supposed to call -libxl_*_init to initialise the struct; and the updated function clears -these members. - -It is ABI-compatible because the new fields make this member of the -guest type union larger but only within the existing size of that -union. - -Unfortunately it is not easy to backport because it depends on the PVH -domain type. Attempts to avoid use of the PVH domain type involved -working with two views of the configuration: the "underlying" domain -type and the "visible" type (and corresponding config info). Also -there are different sets of config settings for PV and PVH, which -callers would have to know to set. - -And, unfortunately, it will not be possible, with this approach, to -enable the shim by default for all libxl callers. (Although it could -perhaps be done in xl.) - -For now, our config defaults are: - * if enabled, path is "xen-shim" in the xen firmware directory - * if enabled, cmdline is the one we are currently debugging with - -The debugging arguments will be rationalised in a moment. - -Signed-off-by: Ian Jackson -Signed-off-by: George Dunlap -Signed-off-by: Wei Liu ---- -v2: pvshim, not pvhshim - works with type "pvh", not type "pv" ---- - tools/libxl/libxl.h | 8 +++++++ - tools/libxl/libxl_create.c | 15 ++++++++++++ - tools/libxl/libxl_dom.c | 57 +++++++++++++++++++++++++++++++++++--------- - tools/libxl/libxl_internal.h | 4 ++++ - tools/libxl/libxl_types.idl | 5 +++- - 5 files changed, 77 insertions(+), 12 deletions(-) - -diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h -index 5e9aed739d..9632fd6d2f 100644 ---- a/tools/libxl/libxl.h -+++ b/tools/libxl/libxl.h -@@ -1101,6 +1101,14 @@ void libxl_mac_copy(libxl_ctx *ctx, libxl_mac *dst, const libxl_mac *src); - */ - #define LIBXL_HAVE_SET_PARAMETERS 1 - -+/* -+ * LIBXL_HAVE_PV_SHIM -+ * -+ * If this is defined, libxl_domain_build_info's pvh type information -+ * contains members pvshim, pvshim_path, pvshim_cmdline. -+ */ -+#define LIBXL_HAVE_PV_SHIM 1 -+ - typedef char **libxl_string_list; - void libxl_string_list_dispose(libxl_string_list *sl); - int libxl_string_list_length(const libxl_string_list *sl); -diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c -index f15fb215c2..576c61ffab 100644 ---- a/tools/libxl/libxl_create.c -+++ b/tools/libxl/libxl_create.c -@@ -389,6 +389,18 @@ int libxl__domain_build_info_setdefault(libxl__gc *gc, - } - break; - case LIBXL_DOMAIN_TYPE_PVH: -+ libxl_defbool_setdefault(&b_info->u.pvh.pvshim, false); -+ if (libxl_defbool_val(b_info->u.pvh.pvshim)) { -+ if (!b_info->u.pvh.pvshim_path) -+ b_info->u.pvh.pvshim_path = -+ libxl__sprintf(NOGC, "%s/%s", -+ libxl__xenfirmwaredir_path(), -+ PVSHIM_BASENAME); -+ if (!b_info->u.pvh.pvshim_cmdline) -+ b_info->u.pvh.pvshim_cmdline = -+ libxl__strdup(NOGC, PVSHIM_CMDLINE); -+ } -+ - break; - default: - LOG(ERROR, "invalid domain type %s in create info", -@@ -499,6 +511,9 @@ int libxl__domain_build(libxl__gc *gc, - - break; - case LIBXL_DOMAIN_TYPE_PVH: -+ state->shim_path = info->u.pvh.pvshim_path; -+ state->shim_cmdline = info->u.pvh.pvshim_cmdline; -+ - ret = libxl__build_hvm(gc, domid, d_config, state); - if (ret) - goto out; -diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c -index fbbdb9ec2f..b03386409f 100644 ---- a/tools/libxl/libxl_dom.c -+++ b/tools/libxl/libxl_dom.c -@@ -1025,22 +1025,51 @@ static int libxl__domain_firmware(libxl__gc *gc, - - if (state->pv_kernel.path != NULL && - info->type == LIBXL_DOMAIN_TYPE_PVH) { -- /* Try to load a kernel instead of the firmware. */ -- if (state->pv_kernel.mapped) { -- rc = xc_dom_kernel_mem(dom, state->pv_kernel.data, -- state->pv_kernel.size); -+ -+ if (state->shim_path) { -+ rc = xc_dom_kernel_file(dom, state->shim_path); - if (rc) { -- LOGE(ERROR, "xc_dom_kernel_mem failed"); -+ LOGE(ERROR, "xc_dom_kernel_file failed"); - goto out; - } -+ -+ /* We've loaded the shim, so load the kernel as a secondary module */ -+ if (state->pv_kernel.mapped) { -+ LOG(WARN, "xc_dom_module_mem, cmdline %s", -+ state->pv_cmdline); -+ rc = xc_dom_module_mem(dom, state->pv_kernel.data, -+ state->pv_kernel.size, state->pv_cmdline); -+ if (rc) { -+ LOGE(ERROR, "xc_dom_kernel_mem failed"); -+ goto out; -+ } -+ } else { -+ LOG(WARN, "xc_dom_module_file, path %s cmdline %s", -+ state->pv_kernel.path, state->pv_cmdline); -+ rc = xc_dom_module_file(dom, state->pv_kernel.path, state->pv_cmdline); -+ if (rc) { -+ LOGE(ERROR, "xc_dom_kernel_file failed"); -+ goto out; -+ } -+ } - } else { -- rc = xc_dom_kernel_file(dom, state->pv_kernel.path); -- if (rc) { -- LOGE(ERROR, "xc_dom_kernel_file failed"); -- goto out; -+ /* No shim, so load the kernel directly */ -+ if (state->pv_kernel.mapped) { -+ rc = xc_dom_kernel_mem(dom, state->pv_kernel.data, -+ state->pv_kernel.size); -+ if (rc) { -+ LOGE(ERROR, "xc_dom_kernel_mem failed"); -+ goto out; -+ } -+ } else { -+ rc = xc_dom_kernel_file(dom, state->pv_kernel.path); -+ if (rc) { -+ LOGE(ERROR, "xc_dom_kernel_file failed"); -+ goto out; -+ } - } - } -- -+ - if (state->pv_ramdisk.path && strlen(state->pv_ramdisk.path)) { - if (state->pv_ramdisk.mapped) { - rc = xc_dom_module_mem(dom, state->pv_ramdisk.data, -@@ -1154,8 +1183,14 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid, - - xc_dom_loginit(ctx->xch); - -+ /* -+ * If PVH and we have a shim override, use the shim cmdline. -+ * If PVH and no shim override, use the pv cmdline. -+ * If not PVH, use info->cmdline. -+ */ - dom = xc_dom_allocate(ctx->xch, info->type == LIBXL_DOMAIN_TYPE_PVH ? -- state->pv_cmdline : info->cmdline, NULL); -+ (state->shim_path ? state->shim_cmdline : state->pv_cmdline) : -+ info->cmdline, NULL); - if (!dom) { - LOGE(ERROR, "xc_dom_allocate failed"); - rc = ERROR_NOMEM; -diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h -index bfa95d8619..2454efa621 100644 ---- a/tools/libxl/libxl_internal.h -+++ b/tools/libxl/libxl_internal.h -@@ -118,6 +118,8 @@ - #define TAP_DEVICE_SUFFIX "-emu" - #define DOMID_XS_PATH "domid" - #define INVALID_DOMID ~0 -+#define PVSHIM_BASENAME "xen-shim" -+#define PVSHIM_CMDLINE "pv-shim console=xen,pv sched=null loglvl=all guest_loglvl=all apic_verbosity=debug e820-verbose" - - /* Size macros. */ - #define __AC(X,Y) (X##Y) -@@ -1136,6 +1138,8 @@ typedef struct { - - libxl__file_reference pv_kernel; - libxl__file_reference pv_ramdisk; -+ const char * shim_path; -+ const char * shim_cmdline; - const char * pv_cmdline; - - xen_vmemrange_t *vmemranges; -diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl -index a239324341..6d060edc0d 100644 ---- a/tools/libxl/libxl_types.idl -+++ b/tools/libxl/libxl_types.idl -@@ -592,7 +592,10 @@ libxl_domain_build_info = Struct("domain_build_info",[ - # Use host's E820 for PCI passthrough. - ("e820_host", libxl_defbool), - ])), -- ("pvh", None), -+ ("pvh", Struct(None, [("pvshim", libxl_defbool), -+ ("pvshim_path", string), -+ ("pvshim_cmdline", string), -+ ])), - ("invalid", None), - ], keyvar_init_val = "LIBXL_DOMAIN_TYPE_INVALID")), - --- -2.14.3 - - -From ab9e3854ddb2fad2b86aaf5144a26f5569b63cfc Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Fri, 5 Jan 2018 15:59:29 +0000 -Subject: [PATCH 71/77] libxl: pvshim: Introduce pvshim_extra - -And move the debugging options from the default config into a doc -comment in libxl_types.idl. - -Signed-off-by: Ian Jackson ---- -v2: pvshim, not pvhshim - works with type "pvh", not type "pv" ---- - tools/libxl/libxl.h | 2 +- - tools/libxl/libxl_create.c | 5 ++++- - tools/libxl/libxl_internal.h | 2 +- - tools/libxl/libxl_types.idl | 1 + - 4 files changed, 7 insertions(+), 3 deletions(-) - -diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h -index 9632fd6d2f..3c0ae6de47 100644 ---- a/tools/libxl/libxl.h -+++ b/tools/libxl/libxl.h -@@ -1105,7 +1105,7 @@ void libxl_mac_copy(libxl_ctx *ctx, libxl_mac *dst, const libxl_mac *src); - * LIBXL_HAVE_PV_SHIM - * - * If this is defined, libxl_domain_build_info's pvh type information -- * contains members pvshim, pvshim_path, pvshim_cmdline. -+ * contains members pvshim, pvshim_path, pvshim_cmdline, pvshim_extra. - */ - #define LIBXL_HAVE_PV_SHIM 1 - -diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c -index 576c61ffab..1fa1d3a621 100644 ---- a/tools/libxl/libxl_create.c -+++ b/tools/libxl/libxl_create.c -@@ -512,7 +512,10 @@ int libxl__domain_build(libxl__gc *gc, - break; - case LIBXL_DOMAIN_TYPE_PVH: - state->shim_path = info->u.pvh.pvshim_path; -- state->shim_cmdline = info->u.pvh.pvshim_cmdline; -+ state->shim_cmdline = GCSPRINTF("%s%s%s", -+ info->u.pvh.pvshim_cmdline, -+ info->u.pvh.pvshim_extra ? " " : "", -+ info->u.pvh.pvshim_extra ? info->u.pvh.pvshim_extra : ""); - - ret = libxl__build_hvm(gc, domid, d_config, state); - if (ret) -diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h -index 2454efa621..0f89364466 100644 ---- a/tools/libxl/libxl_internal.h -+++ b/tools/libxl/libxl_internal.h -@@ -119,7 +119,7 @@ - #define DOMID_XS_PATH "domid" - #define INVALID_DOMID ~0 - #define PVSHIM_BASENAME "xen-shim" --#define PVSHIM_CMDLINE "pv-shim console=xen,pv sched=null loglvl=all guest_loglvl=all apic_verbosity=debug e820-verbose" -+#define PVSHIM_CMDLINE "pv-shim console=xen,pv sched=null" - - /* Size macros. */ - #define __AC(X,Y) (X##Y) -diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl -index 6d060edc0d..d74fac7d30 100644 ---- a/tools/libxl/libxl_types.idl -+++ b/tools/libxl/libxl_types.idl -@@ -595,6 +595,7 @@ libxl_domain_build_info = Struct("domain_build_info",[ - ("pvh", Struct(None, [("pvshim", libxl_defbool), - ("pvshim_path", string), - ("pvshim_cmdline", string), -+ ("pvshim_extra", string), # eg "loglvl=all guest_loglvl=all apic_verbosity=debug e820-verbose" - ])), - ("invalid", None), - ], keyvar_init_val = "LIBXL_DOMAIN_TYPE_INVALID")), --- -2.14.3 - - -From 0e2d64ae8f4af4dbd49127107ae6237e7f748c04 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Fri, 22 Dec 2017 16:12:23 +0000 -Subject: [PATCH 72/77] xl: pvshim: Provide and document xl config - -Signed-off-by: Ian Jackson -Signed-off-by: Wei Liu ---- -v2: pvshim, not pvhshim - works with type "pvh", not type "pv" - pvshim_etc. options in config are not erroneously ignored ---- - docs/man/xl.cfg.pod.5.in | 35 +++++++++++++++++++++++++++++++++++ - tools/xl/xl_parse.c | 14 ++++++++++++++ - 2 files changed, 49 insertions(+) - -diff --git a/docs/man/xl.cfg.pod.5.in b/docs/man/xl.cfg.pod.5.in -index b7b91d8627..bf6c266de1 100644 ---- a/docs/man/xl.cfg.pod.5.in -+++ b/docs/man/xl.cfg.pod.5.in -@@ -508,6 +508,41 @@ Load the specified file as firmware for the guest. - Currently there's no firmware available for PVH guests, they should be - booted using the B method or the B option. - -+=over 4 -+ -+=item B -+ -+Whether to boot this guest as a PV guest within a PVH container. -+Ie, the guest will experience a PV environment, -+but -+processor hardware extensions are used to -+separate its address space -+to mitigate the Meltdown attack (CVE-2017-5754). -+ -+Default is false. -+ -+=item B -+ -+The PV shim is a specially-built firmware-like executable -+constructed from the hypervisor source tree. -+This option specifies to use a non-default shim. -+Ignored if pvhsim is false. -+ -+=item B -+ -+Command line for the shim. -+Default is "pv-shim console=xen,pv sched=null". -+Ignored if pvhsim is false. -+ -+=item B -+ -+Extra command line arguments for the shim. -+If supplied, appended to the value for pvshim_cmdline. -+Default is empty. -+Ignored if pvhsim is false. -+ -+=back -+ - =head3 Other Options - - =over 4 -diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c -index 9a692d5ae6..fdfe693de1 100644 ---- a/tools/xl/xl_parse.c -+++ b/tools/xl/xl_parse.c -@@ -964,6 +964,20 @@ void parse_config_data(const char *config_source, - xlu_cfg_replace_string(config, "pool", &c_info->pool_name, 0); - - libxl_domain_build_info_init_type(b_info, c_info->type); -+ -+ if (b_info->type == LIBXL_DOMAIN_TYPE_PVH) { -+ xlu_cfg_get_defbool(config, "pvshim", &b_info->u.pvh.pvshim, 0); -+ if (!xlu_cfg_get_string(config, "pvshim_path", &buf, 0)) -+ xlu_cfg_replace_string(config, "pvshim_path", -+ &b_info->u.pvh.pvshim_path, 0); -+ if (!xlu_cfg_get_string(config, "pvshim_cmdline", &buf, 0)) -+ xlu_cfg_replace_string(config, "pvshim_cmdline", -+ &b_info->u.pvh.pvshim_cmdline, 0); -+ if (!xlu_cfg_get_string(config, "pvshim_extra", &buf, 0)) -+ xlu_cfg_replace_string(config, "pvshim_extra", -+ &b_info->u.pvh.pvshim_extra, 0); -+ } -+ - if (blkdev_start) - b_info->blkdev_start = strdup(blkdev_start); - --- -2.14.3 - - -From 0a515eeb966add7c63d764cabffec3b2f560a588 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Tue, 16 Jan 2018 14:48:53 +0000 -Subject: [PATCH 73/77] xen/pvshim: map vcpu_info earlier for APs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Or else init_percpu_time is going to dereference a NULL pointer when -trying to access vcpu_info. - -Signed-off-by: Roger Pau Monné -Tested-by: George Dunlap ---- - xen/arch/x86/smpboot.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 5c7863035e..5ed82b16a8 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -346,6 +346,9 @@ void start_secondary(void *unused) - else - microcode_resume_cpu(cpu); - -+ if ( xen_guest ) -+ hypervisor_ap_setup(); -+ - smp_callin(); - - init_percpu_time(); -@@ -374,9 +377,6 @@ void start_secondary(void *unused) - cpumask_set_cpu(cpu, &cpu_online_map); - unlock_vector_lock(); - -- if ( xen_guest ) -- hypervisor_ap_setup(); -- - /* We can take interrupts now: we're officially "up". */ - local_irq_enable(); - mtrr_ap_init(); --- -2.14.3 - - -From 6f1979c8e4184f1f2b24b860e30d3b037b2e7f05 Mon Sep 17 00:00:00 2001 -From: Michael Young -Date: Mon, 15 Jan 2018 21:23:20 +0000 -Subject: [PATCH 74/77] -xen-attach is needed for pvh boot with qemu-xen -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Currently the boot of a pvh guest using the qemu-xen device model fails -with the error -xen emulation not implemented (yet) -in the qemu-dm log file. This patch adds the missing -xen-attach -argument. - -V2: Use b_info->type != LIBXL_DOMAIN_TYPE_HVM instead of - (b_info->type == LIBXL_DOMAIN_TYPE_PV) || - (b_info->type == LIBXL_DOMAIN_TYPE_PVH) -as recommended by Roger Pau Monné. - -Signed-off-by: Michael Young -Reviewed-by: Roger Pau Monné -Acked-by: Wei Liu ---- - tools/libxl/libxl_dm.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c -index a2ea95a9be..a3cddce8b7 100644 ---- a/tools/libxl/libxl_dm.c -+++ b/tools/libxl/libxl_dm.c -@@ -1021,7 +1021,7 @@ static int libxl__build_device_model_args_new(libxl__gc *gc, - */ - flexarray_append(dm_args, "-no-user-config"); - -- if (b_info->type == LIBXL_DOMAIN_TYPE_PV) { -+ if (b_info->type != LIBXL_DOMAIN_TYPE_HVM) { - flexarray_append(dm_args, "-xen-attach"); - } - --- -2.14.3 - - -From 69f4d872e524932d392acd80989c5b776baa4522 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Wed, 17 Jan 2018 10:57:02 +0000 -Subject: [PATCH 75/77] x86/guest: use the vcpu_info area from shared_info -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -If using less than 32 vCPUs (XEN_LEGACY_MAX_VCPUS). - -This is a workaround that should allow to boot the shim on hypervisors -without commit "x86/upcall: inject a spurious event after setting -upcall vector" as long as less than 32 vCPUs are assigned to the -shim. - -Signed-off-by: Roger Pau Monné -Acked-by: Jan Beulich ---- - xen/arch/x86/guest/xen.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c -index 2a5554ab26..ed8b8c8c7b 100644 ---- a/xen/arch/x86/guest/xen.c -+++ b/xen/arch/x86/guest/xen.c -@@ -257,7 +257,8 @@ void __init hypervisor_setup(void) - map_shared_info(); - - set_vcpu_id(); -- vcpu_info = xzalloc_array(struct vcpu_info, nr_cpu_ids); -+ if ( nr_cpu_ids > XEN_LEGACY_MAX_VCPUS ) -+ vcpu_info = xzalloc_array(struct vcpu_info, nr_cpu_ids); - if ( map_vcpuinfo() ) - { - xfree(vcpu_info); --- -2.14.3 - - -From 79f797c3f41c15a74d627a8eabc373ec7b202933 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Wed, 17 Jan 2018 09:48:14 +0000 -Subject: [PATCH 76/77] firmware/shim: fix build process to use POSIX find - options -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The -printf find option is not POSIX compatible, so replace it with -another rune. - -Signed-off-by: Roger Pau Monné -Acked-by: Wei Liu ---- - tools/firmware/xen-dir/Makefile | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/tools/firmware/xen-dir/Makefile b/tools/firmware/xen-dir/Makefile -index adf6c31e8d..de754c752e 100644 ---- a/tools/firmware/xen-dir/Makefile -+++ b/tools/firmware/xen-dir/Makefile -@@ -21,7 +21,8 @@ linkfarm.stamp: $(DEP_DIRS) $(DEP_FILES) FORCE - $(foreach d, $(LINK_DIRS), \ - (mkdir -p $(D)/$(d); \ - cd $(D)/$(d); \ -- find $(XEN_ROOT)/$(d)/ -type d -printf "./%P\n" | xargs mkdir -p);) -+ find $(XEN_ROOT)/$(d)/ -type d -exec sh -c \ -+ "echo {} | sed 's,^$(XEN_ROOT)/$(d)/,,g' | xargs mkdir -p" \;);) - $(foreach d, $(LINK_DIRS), \ - (cd $(XEN_ROOT); \ - find $(d) ! -type l -type f \ --- -2.14.3 - - -From fa23f2aaa24c603f748b49b32378b738d18cc68f Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Wed, 17 Jan 2018 12:00:41 +0000 -Subject: [PATCH 77/77] xen/pvh: place the trampoline at page 0x1 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Since PVH guest jump straight into trampoline_setup trampoline_phys is -not initialized, thus the trampoline is relocated to address 0. - -This works, but has the undesirable effect of having VA 0 mapped to -MFN 0, which means NULL pointed dereferences no longer trigger a page -fault. - -In order to solve this, place the trampoline at page 0x1 and reserve -the memory used by it. - -Signed-off-by: Roger Pau Monné -Reviewed-by: Wei Liu ---- - xen/arch/x86/boot/head.S | 3 +++ - xen/arch/x86/mm.c | 9 +++++++-- - 2 files changed, 10 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S -index 14caca6798..c527910478 100644 ---- a/xen/arch/x86/boot/head.S -+++ b/xen/arch/x86/boot/head.S -@@ -411,6 +411,9 @@ __pvh_start: - /* Skip bootloader setup and bios setup, go straight to trampoline */ - movb $1, sym_esi(pvh_boot) - movb $1, sym_esi(skip_realmode) -+ -+ /* Set trampoline_phys to use mfn 1 to avoid having a mapping at VA 0 */ -+ movw $0x1000, sym_esi(trampoline_phys) - jmp trampoline_setup - - #endif /* CONFIG_PVH_GUEST */ -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 371c764027..a8b59617d3 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -292,9 +292,14 @@ void __init arch_init_memory(void) - /* - * First 1MB of RAM is historically marked as I/O. If we booted PVH, - * reclaim the space. Irrespective, leave MFN 0 as special for the sake -- * of 0 being a very common default value. -+ * of 0 being a very common default value. Also reserve page 0x1 which is -+ * used by the trampoline code on PVH. - */ -- for ( i = 0; i < (pvh_boot ? 1 : 0x100); i++ ) -+ BUG_ON(pvh_boot && trampoline_phys != 0x1000); -+ for ( i = 0; -+ i < (pvh_boot ? (1 + PFN_UP(trampoline_end - trampoline_start)) -+ : 0x100); -+ i++ ) - share_xen_page_with_guest(mfn_to_page(_mfn(i)), - dom_io, XENSHARE_writable); - --- -2.14.3 - diff --git a/sources b/sources index 0ffe957..3d3eb03 100644 --- a/sources +++ b/sources @@ -4,4 +4,4 @@ SHA512 (newlib-1.16.0.tar.gz) = 40eb96bbc6736a16b6399e0cdb73e853d0d90b685c967e77 SHA512 (zlib-1.2.3.tar.gz) = 021b958fcd0d346c4ba761bcf0cc40f3522de6186cf5a0a6ea34a70504ce9622b1c2626fce40675bc8282cf5f5ade18473656abc38050f72f5d6480507a2106e SHA512 (polarssl-1.1.4-gpl.tgz) = 88da614e4d3f4409c4fd3bb3e44c7587ba051e3fed4e33d526069a67e8180212e1ea22da984656f50e290049f60ddca65383e5983c0f8884f648d71f698303ad SHA512 (pciutils-2.2.9.tar.bz2) = 2b3d98d027e46d8c08037366dde6f0781ca03c610ef2b380984639e4ef39899ed8d8b8e4cd9c9dc54df101279b95879bd66bfd4d04ad07fef41e847ea7ae32b5 -SHA512 (xen-4.10.0.tar.gz) = 5a37935c382f9cfe3641a35c3be0ba11689bca10c7d3c2401963513e3a834ee8d0c8a0ddcf3716dbf0a795aea1bab78caf19acf1272e5e054bf012cfa06a4690 +SHA512 (xen-4.10.1.tar.gz) = 236c02bee69e33644703ed26d323d4c491a91fc05bd0ee0990a7368579f7c82f5bb4510845bf80348fd923024d7d60d521f593dfd0365d971dc592f8ef10fbea diff --git a/xen.comet.fixes.patch b/xen.comet.fixes.patch deleted file mode 100644 index 2cc0465..0000000 --- a/xen.comet.fixes.patch +++ /dev/null @@ -1,150 +0,0 @@ -From db3ae8becc2b4f9f544eafa06a7c858c7cc9f029 Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Wed, 17 Jan 2018 09:50:27 +0000 -Subject: [PATCH] tools: fix arm build after bdf693ee61b48 - -The ramdisk fields were removed. We should use modules[0] instead. - -Signed-off-by: Wei Liu -Acked-by: Ian Jackson ---- - tools/libxc/xc_dom_arm.c | 10 +++++----- - tools/libxl/libxl_arm.c | 6 +++--- - 2 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/tools/libxc/xc_dom_arm.c b/tools/libxc/xc_dom_arm.c -index fce151d821..5b9eca6087 100644 ---- a/tools/libxc/xc_dom_arm.c -+++ b/tools/libxc/xc_dom_arm.c -@@ -390,8 +390,8 @@ static int meminit(struct xc_dom_image *dom) - const uint64_t kernsize = kernend - kernbase; - const uint64_t dtb_size = dom->devicetree_blob ? - ROUNDUP(dom->devicetree_size, XC_PAGE_SHIFT) : 0; -- const uint64_t ramdisk_size = dom->ramdisk_blob ? -- ROUNDUP(dom->ramdisk_size, XC_PAGE_SHIFT) : 0; -+ const uint64_t ramdisk_size = dom->modules[0].blob ? -+ ROUNDUP(dom->modules[0].size, XC_PAGE_SHIFT) : 0; - const uint64_t modsize = dtb_size + ramdisk_size; - const uint64_t ram128mb = bankbase[0] + (128<<20); - -@@ -483,12 +483,12 @@ static int meminit(struct xc_dom_image *dom) - */ - if ( ramdisk_size ) - { -- dom->ramdisk_seg.vstart = modbase; -- dom->ramdisk_seg.vend = modbase + ramdisk_size; -+ dom->modules[0].seg.vstart = modbase; -+ dom->modules[0].seg.vend = modbase + ramdisk_size; - - DOMPRINTF("%s: ramdisk: 0x%" PRIx64 " -> 0x%" PRIx64 "", - __FUNCTION__, -- dom->ramdisk_seg.vstart, dom->ramdisk_seg.vend); -+ dom->modules[0].seg.vstart, dom->modules[0].seg.vend); - - modbase += ramdisk_size; - } -diff --git a/tools/libxl/libxl_arm.c b/tools/libxl/libxl_arm.c -index de1840bece..3e46554301 100644 ---- a/tools/libxl/libxl_arm.c -+++ b/tools/libxl/libxl_arm.c -@@ -923,7 +923,7 @@ next_resize: - FDT( fdt_begin_node(fdt, "") ); - - FDT( make_root_properties(gc, vers, fdt) ); -- FDT( make_chosen_node(gc, fdt, !!dom->ramdisk_blob, state, info) ); -+ FDT( make_chosen_node(gc, fdt, !!dom->modules[0].blob, state, info) ); - FDT( make_cpus_node(gc, fdt, info->max_vcpus, ainfo) ); - FDT( make_psci_node(gc, fdt) ); - -@@ -1053,8 +1053,8 @@ int libxl__arch_domain_finalise_hw_description(libxl__gc *gc, - int i; - const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; - -- const struct xc_dom_seg *ramdisk = dom->ramdisk_blob ? -- &dom->ramdisk_seg : NULL; -+ const struct xc_dom_seg *ramdisk = dom->modules[0].blob ? -+ &dom->modules[0].seg : NULL; - - if (ramdisk) { - int chosen, res; --- -2.14.3 - -From 81838c9067ab7f4b89d33f90a71225ffff9800ba Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Wed, 17 Jan 2018 16:43:54 +0000 -Subject: [PATCH] ocaml: fix arm build - -ARM doesn't have emulation_flags in the arch_domainconfig. - -Signed-off-by: Wei Liu -Reviewed-by: Julien Grall ---- - tools/ocaml/libs/xc/xenctrl_stubs.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c -index 0b5a2361c0..dd6000caa3 100644 ---- a/tools/ocaml/libs/xc/xenctrl_stubs.c -+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c -@@ -176,10 +176,14 @@ CAMLprim value stub_xc_domain_create(value xch, value ssidref, - break; - - case 1: /* X86 - emulation flags in the block */ -+#if defined(__i386__) || defined(__x86_64__) - for (l = Field(Field(domconfig, 0), 0); - l != Val_none; - l = Field(l, 1)) - config.emulation_flags |= 1u << Int_val(Field(l, 0)); -+#else -+ caml_failwith("Unhandled: x86"); -+#endif - break; - - default: -@@ -320,6 +324,7 @@ static value alloc_domaininfo(xc_domaininfo_t * info) - - Store_field(result, 15, tmp); - -+#if defined(__i386__) || defined(__x86_64__) - /* emulation_flags: x86_arch_emulation_flags list; */ - tmp = emul_list = Val_emptylist; - for (i = 0; i < 10; i++) { -@@ -341,6 +346,7 @@ static value alloc_domaininfo(xc_domaininfo_t * info) - Store_field(arch_config, 0, x86_arch_config); - - Store_field(result, 16, arch_config); -+#endif - - CAMLreturn(result); - } --- -2.14.3 - -From 36c560e7f38130f12a36e8b66b0785fb655fe893 Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Tue, 16 Jan 2018 18:56:45 +0000 -Subject: [PATCH] Don't build xen-shim for 32 bit build host - -Signed-off-by: Wei Liu ---- - tools/firmware/Makefile | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/tools/firmware/Makefile b/tools/firmware/Makefile -index 9387cc0878..b2f011df49 100644 ---- a/tools/firmware/Makefile -+++ b/tools/firmware/Makefile -@@ -1,7 +1,9 @@ - XEN_ROOT = $(CURDIR)/../.. - include $(XEN_ROOT)/tools/Rules.mk - -+ifneq ($(XEN_TARGET_ARCH),x86_32) - CONFIG_PV_SHIM := y -+endif - - # hvmloader is a 32-bit protected mode binary. - TARGET := hvmloader/hvmloader --- -2.14.3 - diff --git a/xen.fedora.efi.build.patch b/xen.fedora.efi.build.patch index 96cfb35..a531445 100644 --- a/xen.fedora.efi.build.patch +++ b/xen.fedora.efi.build.patch @@ -6,8 +6,8 @@ echo '$(TARGET).efi'; fi) +LD_EFI ?= $(LD) - shim-$(CONFIG_PVH_GUEST) := $(TARGET)-shim - + ifneq ($(build_id_linker),) + notes_phdrs = --notes @@ -173,20 +174,20 @@ $(TARGET).efi: prelink-efi.o $(note_file) efi.lds efi/relocs-dummy.o $(BASEDIR)/common/symbols-dummy.o efi/mkreloc diff --git a/xen.spec b/xen.spec index 93f49d8..7544095 100644 --- a/xen.spec +++ b/xen.spec @@ -59,8 +59,8 @@ Summary: Xen is a virtual machine monitor Name: xen -Version: 4.10.0 -Release: 9%{?dist} +Version: 4.10.1 +Release: 1%{?dist} Group: Development/Libraries License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ @@ -116,19 +116,8 @@ Patch37: droplibvirtconflict.patch Patch38: qemu.trad.CVE-2017-8309.patch Patch39: qemu.trad.CVE-2017-9330.patch Patch40: xen.ocaml.safe-strings.patch -Patch41: xsa253.patch -Patch42: 4.10.0-shim-comet-3.patch -Patch43: xen.comet.fixes.patch -Patch44: xen.xsa254.pti.patch Patch45: xen.gcc8.fix.patch -Patch46: xen.xsa254.bti.patch -Patch47: xsa252.patch -Patch48: xsa255-1.patch -Patch49: xsa255-2.patch -Patch50: xsa256.patch Patch51: xen.gcc8.temp.fix.patch -Patch52: xsa258.patch -Patch53: xsa259.patch %if %build_qemutrad @@ -334,21 +323,10 @@ manage Xen virtual machines. %patch34 -p1 %patch37 -p1 %patch40 -p1 -#%patch41 -p1 -%patch42 -p1 -%patch43 -p1 -%patch44 -p1 %patch2 -p1 %patch3 -p1 %patch45 -p1 -%patch46 -p1 -%patch47 -p1 -%patch48 -p1 -%patch49 -p1 -%patch50 -p1 %patch51 -p1 -%patch52 -p1 -%patch53 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -843,6 +821,7 @@ fi %endif /usr/lib/debug/xen* %endif +/usr/lib/debug/usr/lib/xen/boot/xen-shim-syms %if %build_docs %files doc @@ -885,9 +864,16 @@ fi %endif %changelog +* Thu May 03 2018 Michael Young - 4.10.1-1 +- update to xen-4.10.1 + adjust xen.use.fedora.ipxe.patch and xen.fedora.efi.build.patch + remove patches for issues now fixed upstream + package /usr/lib/debug/usr/lib/xen/boot/xen-shim-syms + * Wed Apr 25 2018 Michael Young - 4.10.0-9 -- Information leak via crafted user-supplied CDROM [XSA-258] (#1571867) -- x86: PV guest may crash Xen with XPTI [XSA-259] (#1571878) +- Information leak via crafted user-supplied CDROM [XSA-258, CVE-2018-10472] + (#1571867) +- x86: PV guest may crash Xen with XPTI [XSA-259, CVE-2018-10471] (#1571878) * Fri Mar 09 2018 Michael Young - 4.10.0-8 - fix safe-strings patch for OCaml 4.0.6 diff --git a/xen.use.fedora.ipxe.patch b/xen.use.fedora.ipxe.patch index 8785393..aef30c3 100644 --- a/xen.use.fedora.ipxe.patch +++ b/xen.use.fedora.ipxe.patch @@ -19,7 +19,7 @@ +ETHERBOOT_NICS ?= 10ec8139 8086100e - QEMU_TRADITIONAL_REVISION ?= xen-4.10.0 + QEMU_TRADITIONAL_REVISION ?= xen-4.10.1 --- xen-4.2.0/tools/firmware/Makefile.orig 2012-05-27 21:57:04.480812871 +0100 +++ xen-4.2.0/tools/firmware/Makefile 2012-06-02 19:03:52.254691484 +0100 @@ -10,7 +10,7 @@ diff --git a/xen.xsa254.bti.patch b/xen.xsa254.bti.patch deleted file mode 100644 index 5d4a18c..0000000 --- a/xen.xsa254.bti.patch +++ /dev/null @@ -1,5669 +0,0 @@ -From b829d42829c1ff626a02756acae4dd482fc20c9a Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Tue, 16 Jan 2018 14:23:33 +0000 -Subject: [PATCH 06/49] xen/arm: Introduce enable callback to enable a - capabilities on each online CPU - -Once Xen knows what features/workarounds present on the platform, it -might be necessary to configure each online CPU. - -Introduce a new callback "enable" that will be called on each online CPU to -configure the "capability". - -The code is based on Linux v4.14 (where cpufeature.c comes from), the -explanation of why using stop_machine_run is kept as we have similar -problem in the future. - -Lastly introduce enable_errata_workaround that will be called once CPUs -have booted and before the hardware domain is created. - -This is part of XSA-254. - -Signed-of-by: Julien Grall -Reviewed-by: Stefano Stabellini -(cherry picked from commit 7500495155aacce437878cb576f45224ae984f40) ---- - xen/arch/arm/cpuerrata.c | 6 ++++++ - xen/arch/arm/cpufeature.c | 29 +++++++++++++++++++++++++++++ - xen/arch/arm/setup.c | 1 + - xen/include/asm-arm/cpuerrata.h | 1 + - xen/include/asm-arm/cpufeature.h | 3 +++ - 5 files changed, 40 insertions(+) - -diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c -index fe9e9facbe..772587c05a 100644 ---- a/xen/arch/arm/cpuerrata.c -+++ b/xen/arch/arm/cpuerrata.c -@@ -64,6 +64,12 @@ void check_local_cpu_errata(void) - { - update_cpu_capabilities(arm_errata, "enabled workaround for"); - } -+ -+void __init enable_errata_workarounds(void) -+{ -+ enable_cpu_capabilities(arm_errata); -+} -+ - /* - * Local variables: - * mode: C -diff --git a/xen/arch/arm/cpufeature.c b/xen/arch/arm/cpufeature.c -index 479c9fb011..525b45e22f 100644 ---- a/xen/arch/arm/cpufeature.c -+++ b/xen/arch/arm/cpufeature.c -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include - - DECLARE_BITMAP(cpu_hwcaps, ARM_NCAPS); -@@ -39,6 +40,34 @@ void update_cpu_capabilities(const struct arm_cpu_capabilities *caps, - } - } - -+/* -+ * Run through the enabled capabilities and enable() it on all active -+ * CPUs. -+ */ -+void __init enable_cpu_capabilities(const struct arm_cpu_capabilities *caps) -+{ -+ for ( ; caps->matches; caps++ ) -+ { -+ if ( !cpus_have_cap(caps->capability) ) -+ continue; -+ -+ if ( caps->enable ) -+ { -+ int ret; -+ -+ /* -+ * Use stop_machine_run() as it schedules the work allowing -+ * us to modify PSTATE, instead of on_each_cpu() which uses -+ * an IPI, giving us a PSTATE that disappears when we -+ * return. -+ */ -+ ret = stop_machine_run(caps->enable, (void *)caps, NR_CPUS); -+ /* stop_machine_run should never fail at this stage of the boot. */ -+ BUG_ON(ret); -+ } -+ } -+} -+ - /* - * Local variables: - * mode: C -diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c -index 16a3b1be8e..032a6a882d 100644 ---- a/xen/arch/arm/setup.c -+++ b/xen/arch/arm/setup.c -@@ -849,6 +849,7 @@ void __init start_xen(unsigned long boot_phys_offset, - * stop_machine (tasklets initialized via an initcall). - */ - apply_alternatives_all(); -+ enable_errata_workarounds(); - - /* Create initial domain 0. */ - /* The vGIC for DOM0 is exactly emulating the hardware GIC */ -diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h -index 8b158429c7..7de68361ff 100644 ---- a/xen/include/asm-arm/cpuerrata.h -+++ b/xen/include/asm-arm/cpuerrata.h -@@ -5,6 +5,7 @@ - #include - - void check_local_cpu_errata(void); -+void enable_errata_workarounds(void); - - #ifdef CONFIG_HAS_ALTERNATIVE - -diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h -index f00b6dbd39..21c65e198c 100644 ---- a/xen/include/asm-arm/cpufeature.h -+++ b/xen/include/asm-arm/cpufeature.h -@@ -74,6 +74,7 @@ struct arm_cpu_capabilities { - const char *desc; - u16 capability; - bool (*matches)(const struct arm_cpu_capabilities *); -+ int (*enable)(void *); /* Called on every active CPUs */ - union { - struct { /* To be used for eratum handling only */ - u32 midr_model; -@@ -85,6 +86,8 @@ struct arm_cpu_capabilities { - void update_cpu_capabilities(const struct arm_cpu_capabilities *caps, - const char *info); - -+void enable_cpu_capabilities(const struct arm_cpu_capabilities *caps); -+ - #endif /* __ASSEMBLY__ */ - - #endif --- -2.14.3 - - -From 0f7a4faafb2d79920cc63457cfca3e03990af4cc Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Tue, 16 Jan 2018 14:23:34 +0000 -Subject: [PATCH 07/49] xen/arm64: Add missing MIDR values for Cortex-A72, A73 - and A75 - -Cortex-A72, A73 and A75 MIDR will be used to a follow-up for hardening -the branch predictor. - -This is part of XSA-254. - -Signed-off-by: Julien Grall -Acked-by: Stefano Stabellini -(cherry picked from commit 7975bff524c4e2c30efbf144de753f151d974e53) ---- - xen/include/asm-arm/processor.h | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h -index 65eb1071e1..3edab1b893 100644 ---- a/xen/include/asm-arm/processor.h -+++ b/xen/include/asm-arm/processor.h -@@ -47,10 +47,16 @@ - #define ARM_CPU_PART_CORTEX_A15 0xC0F - #define ARM_CPU_PART_CORTEX_A53 0xD03 - #define ARM_CPU_PART_CORTEX_A57 0xD07 -+#define ARM_CPU_PART_CORTEX_A72 0xD08 -+#define ARM_CPU_PART_CORTEX_A73 0xD09 -+#define ARM_CPU_PART_CORTEX_A75 0xD0A - - #define MIDR_CORTEX_A15 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A15) - #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) - #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) -+#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) -+#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) -+#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) - - /* MPIDR Multiprocessor Affinity Register */ - #define _MPIDR_UP (30) --- -2.14.3 - - -From d1f4283a1d8405a480b4121e1efcfaec8bbdbffa Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Tue, 16 Jan 2018 14:23:35 +0000 -Subject: [PATCH 08/49] xen/arm: cpuerrata: Add MIDR_ALL_VERSIONS - -Introduce a new macro MIDR_ALL_VERSIONS to match all variant/revision of a -given CPU model. - -This is part of XSA-254. - -Signed-off-by: Julien Grall -Reviewed-by: Stefano Stabellini -(cherry picked from commit ba73070af43a38d200413f446d6a718e108867b6) ---- - xen/arch/arm/cpuerrata.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c -index 772587c05a..c50d3331f2 100644 ---- a/xen/arch/arm/cpuerrata.c -+++ b/xen/arch/arm/cpuerrata.c -@@ -7,6 +7,12 @@ - .midr_range_min = min, \ - .midr_range_max = max - -+#define MIDR_ALL_VERSIONS(model) \ -+ .matches = is_affected_midr_range, \ -+ .midr_model = model, \ -+ .midr_range_min = 0, \ -+ .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) -+ - static bool __maybe_unused - is_affected_midr_range(const struct arm_cpu_capabilities *entry) - { --- -2.14.3 - - -From cae6e1572f39a1906be0fc3bdaf49fe514c6a9c0 Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Tue, 16 Jan 2018 14:23:36 +0000 -Subject: [PATCH 09/49] xen/arm64: Add skeleton to harden the branch predictor - aliasing attacks - -Aliasing attacked against CPU branch predictors can allow an attacker to -redirect speculative control flow on some CPUs and potentially divulge -information from one context to another. - -This patch adds initial skeleton code behind a new Kconfig option to -enable implementation-specific mitigations against these attacks for -CPUs that are affected. - -Most of the mitigations will have to be applied when entering to the -hypervisor from the guest context. For safety, it is applied at every -exception entry. So there are potential for optimizing when receiving -an exception at the same level. - -Because the attack is against branch predictor, it is not possible to -safely use branch instruction before the mitigation is applied. -Therefore, this has to be done in the vector entry before jump to the -helper handling a given exception. - -On Arm64, each vector can hold 32 instructions. This leave us 31 -instructions for the mitigation. The last one is the branch instruction -to the helper. - -Because a platform may have CPUs with different micro-architectures, -per-CPU vector table needs to be provided. Realistically, only a few -different mitigations will be necessary. So provide a small set of -vector tables. They will be re-used and patch with the mitigations -on-demand. - -This is based on the work done in Linux (see [1]). - -This is part of XSA-254. - -[1] git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git -branch ktpi - -Signed-off-by: Julien Grall -Reviewed-by: Stefano Stabellini -(cherry picked from commit 4c4fddc166cf528aca49540bcc9ee4f196b01dac) ---- - xen/arch/arm/Kconfig | 20 ++++++ - xen/arch/arm/arm64/Makefile | 1 + - xen/arch/arm/arm64/bpi.S | 64 ++++++++++++++++++ - xen/arch/arm/cpuerrata.c | 142 +++++++++++++++++++++++++++++++++++++++ - xen/arch/arm/traps.c | 5 +- - xen/include/asm-arm/cpuerrata.h | 1 + - xen/include/asm-arm/cpufeature.h | 3 +- - xen/include/asm-arm/processor.h | 5 +- - 8 files changed, 237 insertions(+), 4 deletions(-) - create mode 100644 xen/arch/arm/arm64/bpi.S - -diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig -index f58019d6ed..06fd85cc77 100644 ---- a/xen/arch/arm/Kconfig -+++ b/xen/arch/arm/Kconfig -@@ -171,6 +171,26 @@ config ARM64_ERRATUM_834220 - - endmenu - -+config HARDEN_BRANCH_PREDICTOR -+ bool "Harden the branch predictor against aliasing attacks" if EXPERT -+ default y -+ help -+ Speculation attacks against some high-performance processors rely on -+ being able to manipulate the branch predictor for a victim context by -+ executing aliasing branches in the attacker context. Such attacks -+ can be partially mitigated against by clearing internal branch -+ predictor state and limiting the prediction logic in some situations. -+ -+ This config option will take CPU-specific actions to harden the -+ branch predictor against aliasing attacks and may rely on specific -+ instruction sequences or control bits being set by the system -+ firmware. -+ -+ If unsure, say Y. -+ -+config ARM64_HARDEN_BRANCH_PREDICTOR -+ def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR -+ - source "common/Kconfig" - - source "drivers/Kconfig" -diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile -index 718fe44455..bb5c610b2a 100644 ---- a/xen/arch/arm/arm64/Makefile -+++ b/xen/arch/arm/arm64/Makefile -@@ -1,6 +1,7 @@ - subdir-y += lib - - obj-y += cache.o -+obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o - obj-$(EARLY_PRINTK) += debug.o - obj-y += domctl.o - obj-y += domain.o -diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S -new file mode 100644 -index 0000000000..6cc2f17529 ---- /dev/null -+++ b/xen/arch/arm/arm64/bpi.S -@@ -0,0 +1,64 @@ -+/* -+ * Contains CPU specific branch predictor invalidation sequences -+ * -+ * Copyright (C) 2018 ARM Ltd. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program. If not, see . -+ */ -+ -+.macro ventry target -+ .rept 31 -+ nop -+ .endr -+ b \target -+.endm -+ -+.macro vectors target -+ ventry \target + 0x000 -+ ventry \target + 0x080 -+ ventry \target + 0x100 -+ ventry \target + 0x180 -+ -+ ventry \target + 0x200 -+ ventry \target + 0x280 -+ ventry \target + 0x300 -+ ventry \target + 0x380 -+ -+ ventry \target + 0x400 -+ ventry \target + 0x480 -+ ventry \target + 0x500 -+ ventry \target + 0x580 -+ -+ ventry \target + 0x600 -+ ventry \target + 0x680 -+ ventry \target + 0x700 -+ ventry \target + 0x780 -+.endm -+ -+/* -+ * Populate 4 vector tables. This will cover up to 4 different -+ * micro-architectures in a system. -+ */ -+ .align 11 -+ENTRY(__bp_harden_hyp_vecs_start) -+ .rept 4 -+ vectors hyp_traps_vector -+ .endr -+ENTRY(__bp_harden_hyp_vecs_end) -+ -+/* -+ * Local variables: -+ * mode: ASM -+ * indent-tabs-mode: nil -+ * End: -+ */ -diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c -index c50d3331f2..76d98e771d 100644 ---- a/xen/arch/arm/cpuerrata.c -+++ b/xen/arch/arm/cpuerrata.c -@@ -1,6 +1,148 @@ -+#include -+#include -+#include -+#include -+#include -+#include - #include - #include - -+/* Override macros from asm/page.h to make them work with mfn_t */ -+#undef virt_to_mfn -+#define virt_to_mfn(va) _mfn(__virt_to_mfn(va)) -+ -+/* Hardening Branch predictor code for Arm64 */ -+#ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR -+ -+#define VECTOR_TABLE_SIZE SZ_2K -+ -+/* -+ * Number of available table vectors (this should be in-sync with -+ * arch/arm64/bpi.S -+ */ -+#define NR_BPI_HYP_VECS 4 -+ -+extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[]; -+ -+/* -+ * Key for each slot. This is used to find whether a specific workaround -+ * had a slot assigned. -+ * -+ * The key is virtual address of the vector workaround -+ */ -+static uintptr_t bp_harden_slot_key[NR_BPI_HYP_VECS]; -+ -+/* -+ * [hyp_vec_start, hyp_vec_end[ corresponds to the first 31 instructions -+ * of each vector. The last (i.e 32th) instruction is used to branch to -+ * the original entry. -+ * -+ * Those instructions will be copied on each vector to harden them. -+ */ -+static bool copy_hyp_vect_bpi(unsigned int slot, const char *hyp_vec_start, -+ const char *hyp_vec_end) -+{ -+ void *dst_remapped; -+ const void *dst = __bp_harden_hyp_vecs_start + slot * VECTOR_TABLE_SIZE; -+ unsigned int i; -+ mfn_t dst_mfn = virt_to_mfn(dst); -+ -+ BUG_ON(((hyp_vec_end - hyp_vec_start) / 4) > 31); -+ -+ /* -+ * Vectors are part of the text that are mapped read-only. So re-map -+ * the vector table to be able to update vectors. -+ */ -+ dst_remapped = __vmap(&dst_mfn, -+ 1UL << get_order_from_bytes(VECTOR_TABLE_SIZE), -+ 1, 1, PAGE_HYPERVISOR, VMAP_DEFAULT); -+ if ( !dst_remapped ) -+ return false; -+ -+ dst_remapped += (vaddr_t)dst & ~PAGE_MASK; -+ -+ for ( i = 0; i < VECTOR_TABLE_SIZE; i += 0x80 ) -+ { -+ memcpy(dst_remapped + i, hyp_vec_start, hyp_vec_end - hyp_vec_start); -+ } -+ -+ clean_dcache_va_range(dst_remapped, VECTOR_TABLE_SIZE); -+ invalidate_icache(); -+ -+ vunmap(dst_remapped); -+ -+ return true; -+} -+ -+static bool __maybe_unused -+install_bp_hardening_vec(const struct arm_cpu_capabilities *entry, -+ const char *hyp_vec_start, -+ const char *hyp_vec_end) -+{ -+ static int last_slot = -1; -+ static DEFINE_SPINLOCK(bp_lock); -+ unsigned int i, slot = -1; -+ bool ret = true; -+ -+ /* -+ * Enable callbacks are called on every CPU based on the -+ * capabilities. So double-check whether the CPU matches the -+ * entry. -+ */ -+ if ( !entry->matches(entry) ) -+ return true; -+ -+ /* -+ * No need to install hardened vector when the processor has -+ * ID_AA64PRF0_EL1.CSV2 set. -+ */ -+ if ( cpu_data[smp_processor_id()].pfr64.csv2 ) -+ return true; -+ -+ spin_lock(&bp_lock); -+ -+ /* -+ * Look up whether the hardening vector had a slot already -+ * assigned. -+ */ -+ for ( i = 0; i < 4; i++ ) -+ { -+ if ( bp_harden_slot_key[i] == (uintptr_t)hyp_vec_start ) -+ { -+ slot = i; -+ break; -+ } -+ } -+ -+ if ( slot == -1 ) -+ { -+ last_slot++; -+ /* Check we don't overrun the number of slots available. */ -+ BUG_ON(NR_BPI_HYP_VECS <= last_slot); -+ -+ slot = last_slot; -+ ret = copy_hyp_vect_bpi(slot, hyp_vec_start, hyp_vec_end); -+ -+ /* Only update the slot if the copy succeeded. */ -+ if ( ret ) -+ bp_harden_slot_key[slot] = (uintptr_t)hyp_vec_start; -+ } -+ -+ if ( ret ) -+ { -+ /* Install the new vector table. */ -+ WRITE_SYSREG((vaddr_t)(__bp_harden_hyp_vecs_start + slot * VECTOR_TABLE_SIZE), -+ VBAR_EL2); -+ isb(); -+ } -+ -+ spin_unlock(&bp_lock); -+ -+ return ret; -+} -+ -+#endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */ -+ - #define MIDR_RANGE(model, min, max) \ - .matches = is_affected_midr_range, \ - .midr_model = model, \ -diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c -index f6f6de3691..9ec5b93dc2 100644 ---- a/xen/arch/arm/traps.c -+++ b/xen/arch/arm/traps.c -@@ -161,7 +161,10 @@ __initcall(update_serrors_cpu_caps); - - void init_traps(void) - { -- /* Setup Hyp vector base */ -+ /* -+ * Setup Hyp vector base. Note they might get updated with the -+ * branch predictor hardening. -+ */ - WRITE_SYSREG((vaddr_t)hyp_traps_vector, VBAR_EL2); - - /* Trap Debug and Performance Monitor accesses */ -diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h -index 7de68361ff..23ebf367ea 100644 ---- a/xen/include/asm-arm/cpuerrata.h -+++ b/xen/include/asm-arm/cpuerrata.h -@@ -1,6 +1,7 @@ - #ifndef __ARM_CPUERRATA_H__ - #define __ARM_CPUERRATA_H__ - -+#include - #include - #include - -diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h -index 21c65e198c..e557a095af 100644 ---- a/xen/include/asm-arm/cpufeature.h -+++ b/xen/include/asm-arm/cpufeature.h -@@ -42,8 +42,9 @@ - #define LIVEPATCH_FEATURE 4 - #define SKIP_SYNCHRONIZE_SERROR_ENTRY_EXIT 5 - #define SKIP_CTXT_SWITCH_SERROR_SYNC 6 -+#define ARM_HARDEN_BRANCH_PREDICTOR 7 - --#define ARM_NCAPS 7 -+#define ARM_NCAPS 8 - - #ifndef __ASSEMBLY__ - -diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h -index 3edab1b893..466da5da86 100644 ---- a/xen/include/asm-arm/processor.h -+++ b/xen/include/asm-arm/processor.h -@@ -385,8 +385,9 @@ struct cpuinfo_arm { - unsigned long fp:4; /* Floating Point */ - unsigned long simd:4; /* Advanced SIMD */ - unsigned long gic:4; /* GIC support */ -- unsigned long __res0:4; -- unsigned long __res1; -+ unsigned long __res0:28; -+ unsigned long csv2:4; -+ unsigned long __res1:4; - }; - } pfr64; - --- -2.14.3 - - -From 928112900e5b4a92ccebb2eea11665fd76aa0f0d Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Tue, 16 Jan 2018 14:23:37 +0000 -Subject: [PATCH 10/49] xen/arm64: Implement branch predictor hardening for - affected Cortex-A CPUs - -Cortex-A57, A72, A73 and A75 are susceptible to branch predictor -aliasing and can theoritically be attacked by malicious code. - -This patch implements a PSCI-based mitigation for these CPUs when -available. The call into firmware will invalidate the branch predictor -state, preventing any malicious entries from affection other victim -contexts. - -Ported from Linux git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git -branch kpti. - - Signed-off-by: Marc Zyngier - Signed-off-by: Will Deacon - -This is part of XSA-254. - -Signed-off-by: Julien Grall -Reviewed-by: Stefano Stabellini -(cherry picked from commit e730f8e41e8537f1db9770b9464f9523c28857b9) ---- - xen/arch/arm/arm64/bpi.S | 25 ++++++++++++++++++++++++ - xen/arch/arm/cpuerrata.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 74 insertions(+) - -diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S -index 6cc2f17529..4b7f1dc21f 100644 ---- a/xen/arch/arm/arm64/bpi.S -+++ b/xen/arch/arm/arm64/bpi.S -@@ -56,6 +56,31 @@ ENTRY(__bp_harden_hyp_vecs_start) - .endr - ENTRY(__bp_harden_hyp_vecs_end) - -+ENTRY(__psci_hyp_bp_inval_start) -+ sub sp, sp, #(8 * 18) -+ stp x16, x17, [sp, #(16 * 0)] -+ stp x14, x15, [sp, #(16 * 1)] -+ stp x12, x13, [sp, #(16 * 2)] -+ stp x10, x11, [sp, #(16 * 3)] -+ stp x8, x9, [sp, #(16 * 4)] -+ stp x6, x7, [sp, #(16 * 5)] -+ stp x4, x5, [sp, #(16 * 6)] -+ stp x2, x3, [sp, #(16 * 7)] -+ stp x0, x1, [sp, #(16 * 8)] -+ mov x0, #0x84000000 -+ smc #0 -+ ldp x16, x17, [sp, #(16 * 0)] -+ ldp x14, x15, [sp, #(16 * 1)] -+ ldp x12, x13, [sp, #(16 * 2)] -+ ldp x10, x11, [sp, #(16 * 3)] -+ ldp x8, x9, [sp, #(16 * 4)] -+ ldp x6, x7, [sp, #(16 * 5)] -+ ldp x4, x5, [sp, #(16 * 6)] -+ ldp x2, x3, [sp, #(16 * 7)] -+ ldp x0, x1, [sp, #(16 * 8)] -+ add sp, sp, #(8 * 18) -+ENTRY(__psci_hyp_bp_inval_end) -+ - /* - * Local variables: - * mode: ASM -diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c -index 76d98e771d..f1ea7f3c5b 100644 ---- a/xen/arch/arm/cpuerrata.c -+++ b/xen/arch/arm/cpuerrata.c -@@ -4,8 +4,10 @@ - #include - #include - #include -+#include - #include - #include -+#include - - /* Override macros from asm/page.h to make them work with mfn_t */ - #undef virt_to_mfn -@@ -141,6 +143,31 @@ install_bp_hardening_vec(const struct arm_cpu_capabilities *entry, - return ret; - } - -+extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; -+ -+static int enable_psci_bp_hardening(void *data) -+{ -+ bool ret = true; -+ static bool warned = false; -+ -+ /* -+ * The mitigation is using PSCI version function to invalidate the -+ * branch predictor. This function is only available with PSCI 0.2 -+ * and later. -+ */ -+ if ( psci_ver >= PSCI_VERSION(0, 2) ) -+ ret = install_bp_hardening_vec(data, __psci_hyp_bp_inval_start, -+ __psci_hyp_bp_inval_end); -+ else if ( !warned ) -+ { -+ ASSERT(system_state < SYS_STATE_active); -+ warning_add("PSCI 0.2 or later is required for the branch predictor hardening.\n"); -+ warned = true; -+ } -+ -+ return !ret; -+} -+ - #endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */ - - #define MIDR_RANGE(model, min, max) \ -@@ -204,6 +231,28 @@ static const struct arm_cpu_capabilities arm_errata[] = { - MIDR_RANGE(MIDR_CORTEX_A57, 0x00, - (1 << MIDR_VARIANT_SHIFT) | 2), - }, -+#endif -+#ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR -+ { -+ .capability = ARM_HARDEN_BRANCH_PREDICTOR, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), -+ .enable = enable_psci_bp_hardening, -+ }, -+ { -+ .capability = ARM_HARDEN_BRANCH_PREDICTOR, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), -+ .enable = enable_psci_bp_hardening, -+ }, -+ { -+ .capability = ARM_HARDEN_BRANCH_PREDICTOR, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), -+ .enable = enable_psci_bp_hardening, -+ }, -+ { -+ .capability = ARM_HARDEN_BRANCH_PREDICTOR, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), -+ .enable = enable_psci_bp_hardening, -+ }, - #endif - {}, - }; --- -2.14.3 - - -From 728fadb586a2a14a244dabd70463bcc1654ecc85 Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Mon, 22 Jan 2018 14:35:42 +0000 -Subject: [PATCH 11/49] xen/arm: cpuerrata: Remove percpu.h include - -The include percpu.h was added by mistake in cpuerrata.h (see commit -4c4fddc166 "xen/arm64: Add skeleton to harden the branch aliasing -attacks"). So remove it. - -Signed-off-by: Julien Grall -Reviewed-by: Stefano Stabellini -(cherry picked from commit a5e7ce9560b408dbdc2f7fb8a58f6209601cc054) ---- - xen/include/asm-arm/cpuerrata.h | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h -index 23ebf367ea..7de68361ff 100644 ---- a/xen/include/asm-arm/cpuerrata.h -+++ b/xen/include/asm-arm/cpuerrata.h -@@ -1,7 +1,6 @@ - #ifndef __ARM_CPUERRATA_H__ - #define __ARM_CPUERRATA_H__ - --#include - #include - #include - --- -2.14.3 - - -From df7be94f26757a77747bf4fbfb84bbe2a3da3b4f Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Fri, 2 Feb 2018 14:19:19 +0000 -Subject: [PATCH 13/49] xen/arm32: entry: Consolidate DEFINE_TRAP_ENTRY_* - macros - -The only difference between all the DEFINE_TRAP_ENTRY_* macros are the -interrupts (Asynchronous Abort, IRQ, FIQ) unmasked. - -Rather than duplicating the code, introduce __DEFINE_TRAP_ENTRY macro -that will take the list of interrupts to unmask. - -This is part of XSA-254. - -Signed-off-by: Julien Grall -Reviewed-by: Stefano Stabellini -(cherry picked from commit 3bd8fd751e50dd981b7055fb33cdc8aa29537673) ---- - xen/arch/arm/arm32/entry.S | 36 +++++++++++++----------------------- - 1 file changed, 13 insertions(+), 23 deletions(-) - -diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S -index 120922e64e..c6490d2847 100644 ---- a/xen/arch/arm/arm32/entry.S -+++ b/xen/arch/arm/arm32/entry.S -@@ -111,39 +111,29 @@ abort_guest_exit_end: - skip_check: - mov pc, lr - --#define DEFINE_TRAP_ENTRY(trap) \ -+/* -+ * Macro to define trap entry. The iflags corresponds to the list of -+ * interrupts (Asynchronous Abort, IRQ, FIQ) to unmask. -+ */ -+#define __DEFINE_TRAP_ENTRY(trap, iflags) \ - ALIGN; \ - trap_##trap: \ - SAVE_ALL; \ -- cpsie i; /* local_irq_enable */ \ -- cpsie a; /* asynchronous abort enable */ \ -+ cpsie iflags; \ - adr lr, return_from_trap; \ - mov r0, sp; \ - mov r11, sp; \ - bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ - b do_trap_##trap - --#define DEFINE_TRAP_ENTRY_NOIRQ(trap) \ -- ALIGN; \ --trap_##trap: \ -- SAVE_ALL; \ -- cpsie a; /* asynchronous abort enable */ \ -- adr lr, return_from_trap; \ -- mov r0, sp; \ -- mov r11, sp; \ -- bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ -- b do_trap_##trap -+/* Trap handler which unmask IRQ/Abort, keep FIQ masked */ -+#define DEFINE_TRAP_ENTRY(trap) __DEFINE_TRAP_ENTRY(trap, ai) - --#define DEFINE_TRAP_ENTRY_NOABORT(trap) \ -- ALIGN; \ --trap_##trap: \ -- SAVE_ALL; \ -- cpsie i; /* local_irq_enable */ \ -- adr lr, return_from_trap; \ -- mov r0, sp; \ -- mov r11, sp; \ -- bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ -- b do_trap_##trap -+/* Trap handler which unmask Abort, keep IRQ/FIQ masked */ -+#define DEFINE_TRAP_ENTRY_NOIRQ(trap) __DEFINE_TRAP_ENTRY(trap, a) -+ -+/* Trap handler which unmask IRQ, keep Abort/FIQ masked */ -+#define DEFINE_TRAP_ENTRY_NOABORT(trap) __DEFINE_TRAP_ENTRY(trap, i) - - .align 5 - GLOBAL(hyp_traps_vector) --- -2.14.3 - - -From 3caf32c470f2f7eb3452c8a61d6224d10e56f9a3 Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Fri, 2 Feb 2018 14:19:20 +0000 -Subject: [PATCH 14/49] xen/arm32: Add missing MIDR values for Cortex-A17 and - A12 - -Cortex-A17 and A12 MIDR will be used in a follow-up patch for hardening -the branch predictor. - -This is part of XSA-254. - -Signed-off-by: Julien Grall -Reviewed-by: Stefano Stabellini -(cherry picked from commit 340367bca5360f3e3d263341b58234d0efe5ced2) ---- - xen/include/asm-arm/processor.h | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h -index 466da5da86..c0f79d0093 100644 ---- a/xen/include/asm-arm/processor.h -+++ b/xen/include/asm-arm/processor.h -@@ -44,6 +44,8 @@ - - #define ARM_CPU_IMP_ARM 0x41 - -+#define ARM_CPU_PART_CORTEX_A12 0xC0D -+#define ARM_CPU_PART_CORTEX_A17 0xC0E - #define ARM_CPU_PART_CORTEX_A15 0xC0F - #define ARM_CPU_PART_CORTEX_A53 0xD03 - #define ARM_CPU_PART_CORTEX_A57 0xD07 -@@ -51,6 +53,8 @@ - #define ARM_CPU_PART_CORTEX_A73 0xD09 - #define ARM_CPU_PART_CORTEX_A75 0xD0A - -+#define MIDR_CORTEX_A12 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A12) -+#define MIDR_CORTEX_A17 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A17) - #define MIDR_CORTEX_A15 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A15) - #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) - #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) --- -2.14.3 - - -From 19ad8a7287298f701b557e55e4be689a702194c0 Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Fri, 2 Feb 2018 14:19:21 +0000 -Subject: [PATCH 15/49] xen/arm32: entry: Add missing trap_reset entry - -At the moment, the reset vector is defined as .word 0 (e.g andeq r0, r0, -r0). - -This is rather unintuitive and will result to execute the trap -undefined. Instead introduce trap helpers for reset and will generate an -error message in the unlikely case that reset will be called. - -This is part of XSA-254. - -Signed-off-by: Julien Grall -Reviewed-by: Stefano Stabellini -(cherry picked from commit 00268cc91270c7b0aa3a1906bf7e7702db9c61c1) ---- - xen/arch/arm/arm32/entry.S | 3 ++- - xen/arch/arm/arm32/traps.c | 5 +++++ - 2 files changed, 7 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S -index c6490d2847..64876c1184 100644 ---- a/xen/arch/arm/arm32/entry.S -+++ b/xen/arch/arm/arm32/entry.S -@@ -137,7 +137,7 @@ trap_##trap: \ - - .align 5 - GLOBAL(hyp_traps_vector) -- .word 0 /* 0x00 - Reset */ -+ b trap_reset /* 0x00 - Reset */ - b trap_undefined_instruction /* 0x04 - Undefined Instruction */ - b trap_hypervisor_call /* 0x08 - Hypervisor Call */ - b trap_prefetch_abort /* 0x0c - Prefetch Abort */ -@@ -146,6 +146,7 @@ GLOBAL(hyp_traps_vector) - b trap_irq /* 0x18 - IRQ */ - b trap_fiq /* 0x1c - FIQ */ - -+DEFINE_TRAP_ENTRY(reset) - DEFINE_TRAP_ENTRY(undefined_instruction) - DEFINE_TRAP_ENTRY(hypervisor_call) - DEFINE_TRAP_ENTRY(prefetch_abort) -diff --git a/xen/arch/arm/arm32/traps.c b/xen/arch/arm/arm32/traps.c -index 705255883e..4f27543dec 100644 ---- a/xen/arch/arm/arm32/traps.c -+++ b/xen/arch/arm/arm32/traps.c -@@ -23,6 +23,11 @@ - - #include - -+void do_trap_reset(struct cpu_user_regs *regs) -+{ -+ do_unexpected_trap("Reset", regs); -+} -+ - void do_trap_undefined_instruction(struct cpu_user_regs *regs) - { - uint32_t pc = regs->pc; --- -2.14.3 - - -From c4c0187839bacadc82a5729cea739e8c485f6c60 Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Fri, 2 Feb 2018 14:19:22 +0000 -Subject: [PATCH 16/49] xen/arm32: Add skeleton to harden branch predictor - aliasing attacks - -Aliasing attacked against CPU branch predictors can allow an attacker to -redirect speculative control flow on some CPUs and potentially divulge -information from one context to another. - -This patch adds initiatial skeleton code behind a new Kconfig option -to enable implementation-specific mitigations against these attacks -for CPUs that are affected. - -Most of mitigations will have to be applied when entering to the -hypervisor from the guest context. - -Because the attack is against branch predictor, it is not possible to -safely use branch instruction before the mitigation is applied. -Therefore this has to be done in the vector entry before jump to the -helper handling a given exception. - -However, on arm32, each vector contain a single instruction. This means -that the hardened vector tables may rely on the state of registers that -does not hold when in the hypervisor (e.g SP is 8 bytes aligned). -Therefore hypervisor code running with guest vectors table should be -minimized and always have IRQs and SErrors masked to reduce the risk to -use them. - -This patch provides an infrastructure to switch vector tables before -entering to the guest and when leaving it. - -Note that alternative could have been used, but older Xen (4.8 or -earlier) doesn't have support. So avoid using alternative to ease -backporting. - -This is part of XSA-254. - -Signed-off-by: Julien Grall -Reviewed-by: Stefano Stabellini -(cherry picked from commit 9bd4463b5c7cc026a07b9bbd41a6a7122a95647e) ---- - xen/arch/arm/Kconfig | 3 +++ - xen/arch/arm/arm32/entry.S | 41 ++++++++++++++++++++++++++++++++++++++++- - xen/arch/arm/cpuerrata.c | 30 ++++++++++++++++++++++++++++++ - 3 files changed, 73 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig -index 06fd85cc77..2782ee6589 100644 ---- a/xen/arch/arm/Kconfig -+++ b/xen/arch/arm/Kconfig -@@ -191,6 +191,9 @@ config HARDEN_BRANCH_PREDICTOR - config ARM64_HARDEN_BRANCH_PREDICTOR - def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR - -+config ARM32_HARDEN_BRANCH_PREDICTOR -+ def_bool y if ARM_32 && HARDEN_BRANCH_PREDICTOR -+ - source "common/Kconfig" - - source "drivers/Kconfig" -diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S -index 64876c1184..828e52c25c 100644 ---- a/xen/arch/arm/arm32/entry.S -+++ b/xen/arch/arm/arm32/entry.S -@@ -34,6 +34,20 @@ - blne save_guest_regs - - save_guest_regs: -+#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR -+ /* -+ * Restore vectors table to the default as it may have been -+ * changed when returning to the guest (see -+ * return_to_hypervisor). We need to do that early (e.g before -+ * any interrupts are unmasked) because hardened vectors requires -+ * SP to be 8 bytes aligned. This does not hold when running in -+ * the hypervisor. -+ */ -+ ldr r1, =hyp_traps_vector -+ mcr p15, 4, r1, c12, c0, 0 -+ isb -+#endif -+ - ldr r11, =0xffffffff /* Clobber SP which is only valid for hypervisor frames. */ - str r11, [sp, #UREGS_sp] - SAVE_ONE_BANKED(SP_usr) -@@ -179,12 +193,37 @@ return_to_guest: - RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq); - /* Fall thru */ - return_to_hypervisor: -- cpsid i -+ cpsid ai - ldr lr, [sp, #UREGS_lr] - ldr r11, [sp, #UREGS_pc] - msr ELR_hyp, r11 - ldr r11, [sp, #UREGS_cpsr] - msr SPSR_hyp, r11 -+#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR -+ /* -+ * Hardening branch predictor may require to setup a different -+ * vector tables before returning to the guests. Those vectors -+ * may rely on the state of registers that does not hold when -+ * running in the hypervisor (e.g SP is 8 bytes aligned). So setup -+ * HVBAR very late. -+ * -+ * Default vectors table will be restored on exit (see -+ * save_guest_regs). -+ */ -+ mov r9, #0 /* vector tables = NULL */ -+ /* -+ * Load vector tables pointer from the per-cpu bp_harden_vecs -+ * when returning to the guest only. -+ */ -+ and r11, #PSR_MODE_MASK -+ cmp r11, #PSR_MODE_HYP -+ ldrne r11, =per_cpu__bp_harden_vecs -+ mrcne p15, 4, r10, c13, c0, 2 /* r10 = per-cpu offset (HTPIDR) */ -+ addne r11, r11, r10 /* r11 = offset of the vector tables */ -+ ldrne r9, [r11] /* r9 = vector tables */ -+ cmp r9, #0 /* Only update HVBAR when the vector */ -+ mcrne p15, 4, r9, c12, c0, 0 /* tables is not NULL. */ -+#endif - pop {r0-r12} - add sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */ - clrex -diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c -index f1ea7f3c5b..0a138fa735 100644 ---- a/xen/arch/arm/cpuerrata.c -+++ b/xen/arch/arm/cpuerrata.c -@@ -170,6 +170,36 @@ static int enable_psci_bp_hardening(void *data) - - #endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */ - -+/* Hardening Branch predictor code for Arm32 */ -+#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR -+ -+/* -+ * Per-CPU vector tables to use when returning to the guests. They will -+ * only be used on platform requiring to harden the branch predictor. -+ */ -+DEFINE_PER_CPU_READ_MOSTLY(const char *, bp_harden_vecs); -+ -+extern char hyp_traps_vector_bp_inv[]; -+ -+static void __maybe_unused -+install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry, -+ const char *hyp_vecs, const char *desc) -+{ -+ /* -+ * Enable callbacks are called on every CPU based on the -+ * capabilities. So double-check whether the CPU matches the -+ * entry. -+ */ -+ if ( !entry->matches(entry) ) -+ return; -+ -+ printk(XENLOG_INFO "CPU%u will %s on guest exit\n", -+ smp_processor_id(), desc); -+ this_cpu(bp_harden_vecs) = hyp_vecs; -+} -+ -+#endif -+ - #define MIDR_RANGE(model, min, max) \ - .matches = is_affected_midr_range, \ - .midr_model = model, \ --- -2.14.3 - - -From f167ebf6b33c4dbdb0135c350c0d927980191ac5 Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Fri, 2 Feb 2018 14:19:23 +0000 -Subject: [PATCH 17/49] xen/arm32: Invalidate BTB on guest exit for Cortex A17 - and 12 - -In order to avoid aliasing attackes agains the branch predictor, let's -invalidate the BTB on guest exist. This is made complicated by the fact -that we cannot take a branch invalidating the BTB. - -This is based on the fourth version posted by Marc Zyngier on Linux-arm -mailing list (see [1]). - -This is part of XSA-254. - -[1] https://www.spinics.net/lists/arm-kernel/msg632062.html - -Signed-off-by: Marc Zyngier -Signed-off-by: Julien Grall -Reviewed-by: Stefano Stabellini -(cherry picked from commit 05e0690d03dc6177e614e060ae78001d4f2abde2) ---- - xen/arch/arm/arm32/entry.S | 38 ++++++++++++++++++++++++++++++++++++++ - xen/arch/arm/cpuerrata.c | 19 +++++++++++++++++++ - 2 files changed, 57 insertions(+) - -diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S -index 828e52c25c..1ebbe4b065 100644 ---- a/xen/arch/arm/arm32/entry.S -+++ b/xen/arch/arm/arm32/entry.S -@@ -160,6 +160,44 @@ GLOBAL(hyp_traps_vector) - b trap_irq /* 0x18 - IRQ */ - b trap_fiq /* 0x1c - FIQ */ - -+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR -+ -+ .align 5 -+GLOBAL(hyp_traps_vector_bp_inv) -+ /* -+ * We encode the exception entry in the bottom 3 bits of -+ * SP, and we have to guarantee to be 8 bytes aligned. -+ */ -+ add sp, sp, #1 /* Reset 7 */ -+ add sp, sp, #1 /* Undef 6 */ -+ add sp, sp, #1 /* Hypervisor Call 5 */ -+ add sp, sp, #1 /* Prefetch abort 4 */ -+ add sp, sp, #1 /* Data abort 3 */ -+ add sp, sp, #1 /* Hypervisor 2 */ -+ add sp, sp, #1 /* IRQ 1 */ -+ nop /* FIQ 0 */ -+ -+ mcr p15, 0, r0, c7, c5, 6 /* BPIALL */ -+ isb -+ -+.macro vect_br val, targ -+ eor sp, sp, #\val -+ tst sp, #7 -+ eorne sp, sp, #\val -+ beq \targ -+.endm -+ -+ vect_br 0, trap_fiq -+ vect_br 1, trap_irq -+ vect_br 2, trap_guest_sync -+ vect_br 3, trap_data_abort -+ vect_br 4, trap_prefetch_abort -+ vect_br 5, trap_hypervisor_call -+ vect_br 6, trap_undefined_instruction -+ vect_br 7, trap_reset -+ -+#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ -+ - DEFINE_TRAP_ENTRY(reset) - DEFINE_TRAP_ENTRY(undefined_instruction) - DEFINE_TRAP_ENTRY(hypervisor_call) -diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c -index 0a138fa735..c79e6d65d3 100644 ---- a/xen/arch/arm/cpuerrata.c -+++ b/xen/arch/arm/cpuerrata.c -@@ -198,6 +198,13 @@ install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry, - this_cpu(bp_harden_vecs) = hyp_vecs; - } - -+static int enable_bp_inv_hardening(void *data) -+{ -+ install_bp_hardening_vecs(data, hyp_traps_vector_bp_inv, -+ "execute BPIALL"); -+ return 0; -+} -+ - #endif - - #define MIDR_RANGE(model, min, max) \ -@@ -283,6 +290,18 @@ static const struct arm_cpu_capabilities arm_errata[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - .enable = enable_psci_bp_hardening, - }, -+#endif -+#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR -+ { -+ .capability = ARM_HARDEN_BRANCH_PREDICTOR, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A12), -+ .enable = enable_bp_inv_hardening, -+ }, -+ { -+ .capability = ARM_HARDEN_BRANCH_PREDICTOR, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A17), -+ .enable = enable_bp_inv_hardening, -+ }, - #endif - {}, - }; --- -2.14.3 - - -From a69a8b5fdc9cc90aa4faf522c355abd849f11001 Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Fri, 2 Feb 2018 14:19:24 +0000 -Subject: [PATCH 18/49] xen/arm32: Invalidate icache on guest exist for - Cortex-A15 - -In order to avoid aliasing attacks against the branch predictor on -Cortex A-15, let's invalidate the BTB on guest exit, which can only be -done by invalidating the icache (with ACTLR[0] being set). - -We use the same hack as for A12/A17 to perform the vector decoding. - -This is based on Linux patch from the kpti branch in [1]. - -[1] https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git - -Signed-off-by: Marc Zyngier -Signed-off-by: Julien Grall -Reviewed-by: Stefano Stabellini -(cherry picked from commit 665c4b6aa79eb21b1aada9f7f98fb5cb7f03743a) ---- - xen/arch/arm/arm32/entry.S | 21 +++++++++++++++++++++ - xen/arch/arm/cpuerrata.c | 13 +++++++++++++ - 2 files changed, 34 insertions(+) - -diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S -index 1ebbe4b065..2f8b7cb7b8 100644 ---- a/xen/arch/arm/arm32/entry.S -+++ b/xen/arch/arm/arm32/entry.S -@@ -162,6 +162,26 @@ GLOBAL(hyp_traps_vector) - - #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR - -+ .align 5 -+GLOBAL(hyp_traps_vector_ic_inv) -+ /* -+ * We encode the exception entry in the bottom 3 bits of -+ * SP, and we have to guarantee to be 8 bytes aligned. -+ */ -+ add sp, sp, #1 /* Reset 7 */ -+ add sp, sp, #1 /* Undef 6 */ -+ add sp, sp, #1 /* Hypervisor call 5 */ -+ add sp, sp, #1 /* Prefetch abort 4 */ -+ add sp, sp, #1 /* Data abort 3 */ -+ add sp, sp, #1 /* Hypervisor 2 */ -+ add sp, sp, #1 /* IRQ 1 */ -+ nop /* FIQ 0 */ -+ -+ mcr p15, 0, r0, c7, c5, 0 /* ICIALLU */ -+ isb -+ -+ b decode_vectors -+ - .align 5 - GLOBAL(hyp_traps_vector_bp_inv) - /* -@@ -180,6 +200,7 @@ GLOBAL(hyp_traps_vector_bp_inv) - mcr p15, 0, r0, c7, c5, 6 /* BPIALL */ - isb - -+decode_vectors: - .macro vect_br val, targ - eor sp, sp, #\val - tst sp, #7 -diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c -index c79e6d65d3..9c7458ef06 100644 ---- a/xen/arch/arm/cpuerrata.c -+++ b/xen/arch/arm/cpuerrata.c -@@ -180,6 +180,7 @@ static int enable_psci_bp_hardening(void *data) - DEFINE_PER_CPU_READ_MOSTLY(const char *, bp_harden_vecs); - - extern char hyp_traps_vector_bp_inv[]; -+extern char hyp_traps_vector_ic_inv[]; - - static void __maybe_unused - install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry, -@@ -205,6 +206,13 @@ static int enable_bp_inv_hardening(void *data) - return 0; - } - -+static int enable_ic_inv_hardening(void *data) -+{ -+ install_bp_hardening_vecs(data, hyp_traps_vector_ic_inv, -+ "execute ICIALLU"); -+ return 0; -+} -+ - #endif - - #define MIDR_RANGE(model, min, max) \ -@@ -302,6 +310,11 @@ static const struct arm_cpu_capabilities arm_errata[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A17), - .enable = enable_bp_inv_hardening, - }, -+ { -+ .capability = ARM_HARDEN_BRANCH_PREDICTOR, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A15), -+ .enable = enable_ic_inv_hardening, -+ }, - #endif - {}, - }; --- -2.14.3 - - -From bbd093c5033d87c0043cf90aa782efdc141dc0e7 Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Fri, 2 Feb 2018 14:19:25 +0000 -Subject: [PATCH 19/49] xen/arm32: entry: Document the purpose of r11 in the - traps handler - -It took me a bit of time to understand why __DEFINE_TRAP_ENTRY is -storing the original stack pointer in r11. It is working in pair with -return_traps_entry where sp will be restored from r11. - -This is fine because per the AAPCS r11 must be preserved by the -subroutine. So in return_from_trap, r11 will still contain the original -stack pointer. - -Add some documentation in the code to point the 2 sides to each other. - -Signed-off-by: Julien Grall -Reviewed-by: Stefano Stabellini -(cherry picked from commit dd855aa430f2da9b677c145f0c625a82aaa97110) ---- - xen/arch/arm/arm32/entry.S | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S -index 2f8b7cb7b8..f6908e3f16 100644 ---- a/xen/arch/arm/arm32/entry.S -+++ b/xen/arch/arm/arm32/entry.S -@@ -136,6 +136,10 @@ trap_##trap: \ - cpsie iflags; \ - adr lr, return_from_trap; \ - mov r0, sp; \ -+ /* \ -+ * Save the stack pointer in r11. It will be restored after the \ -+ * trap has been handled (see return_from_trap). \ -+ */ \ - mov r11, sp; \ - bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ - b do_trap_##trap -@@ -229,6 +233,10 @@ DEFINE_TRAP_ENTRY_NOIRQ(fiq) - DEFINE_TRAP_ENTRY_NOABORT(data_abort) - - return_from_trap: -+ /* -+ * Restore the stack pointer from r11. It was saved on exception -+ * entry (see __DEFINE_TRAP_ENTRY). -+ */ - mov sp, r11 - ENTRY(return_to_new_vcpu32) - ldr r11, [sp, #UREGS_cpsr] --- -2.14.3 - - -From 79012ead937f0533ec591c4ece925e4d23568874 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 10:54:11 +0100 -Subject: [PATCH 20/49] x86/alt: Break out alternative-asm into a separate - header file - -Signed-off-by: Andrew Cooper -Reviewed-by: Wei Liu -Acked-by: Jan Beulich -master commit: 9d7b4351d3bb5c744db311cffa57ba3ebb583327 -master date: 2018-01-05 19:57:07 +0000 ---- - xen/include/asm-x86/alternative-asm.h | 31 +++++++++++++++++++++++++++++++ - xen/include/asm-x86/alternative.h | 13 +++---------- - 2 files changed, 34 insertions(+), 10 deletions(-) - create mode 100644 xen/include/asm-x86/alternative-asm.h - -diff --git a/xen/include/asm-x86/alternative-asm.h b/xen/include/asm-x86/alternative-asm.h -new file mode 100644 -index 0000000000..bf0332ef28 ---- /dev/null -+++ b/xen/include/asm-x86/alternative-asm.h -@@ -0,0 +1,31 @@ -+#ifndef _ASM_X86_ALTERNATIVE_ASM_H_ -+#define _ASM_X86_ALTERNATIVE_ASM_H_ -+ -+#ifdef __ASSEMBLY__ -+ -+/* -+ * Issue one struct alt_instr descriptor entry (need to put it into -+ * the section .altinstructions, see below). This entry contains -+ * enough information for the alternatives patching code to patch an -+ * instruction. See apply_alternatives(). -+ */ -+.macro altinstruction_entry orig alt feature orig_len alt_len -+ .long \orig - . -+ .long \alt - . -+ .word \feature -+ .byte \orig_len -+ .byte \alt_len -+.endm -+ -+#endif /* __ASSEMBLY__ */ -+#endif /* _ASM_X86_ALTERNATIVE_ASM_H_ */ -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ -diff --git a/xen/include/asm-x86/alternative.h b/xen/include/asm-x86/alternative.h -index db4f08e0e7..ba537d6b7e 100644 ---- a/xen/include/asm-x86/alternative.h -+++ b/xen/include/asm-x86/alternative.h -@@ -1,17 +1,10 @@ - #ifndef __X86_ALTERNATIVE_H__ - #define __X86_ALTERNATIVE_H__ - -+#include - #include - --#ifdef __ASSEMBLY__ --.macro altinstruction_entry orig alt feature orig_len alt_len -- .long \orig - . -- .long \alt - . -- .word \feature -- .byte \orig_len -- .byte \alt_len --.endm --#else -+#ifndef __ASSEMBLY__ - #include - #include - -@@ -145,6 +138,6 @@ extern void alternative_instructions(void); - /* Use this macro(s) if you need more than one output parameter. */ - #define ASM_OUTPUT2(a...) a - --#endif /* __ASSEMBLY__ */ -+#endif /* !__ASSEMBLY__ */ - - #endif /* __X86_ALTERNATIVE_H__ */ --- -2.14.3 - - -From be3138b6f65955196d67c1d54aea3d6a3bf33934 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 10:55:11 +0100 -Subject: [PATCH 21/49] x86/alt: Introduce ALTERNATIVE{,_2} macros - -To help creating alternative frames in assembly. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: 4711428f5e2a9bfff9f8d75b6a696072118c19a4 -master date: 2018-01-05 19:57:07 +0000 ---- - xen/include/asm-x86/alternative-asm.h | 46 +++++++++++++++++++++++++++++++++++ - 1 file changed, 46 insertions(+) - -diff --git a/xen/include/asm-x86/alternative-asm.h b/xen/include/asm-x86/alternative-asm.h -index bf0332ef28..6640e85581 100644 ---- a/xen/include/asm-x86/alternative-asm.h -+++ b/xen/include/asm-x86/alternative-asm.h -@@ -17,6 +17,52 @@ - .byte \alt_len - .endm - -+.macro ALTERNATIVE oldinstr, newinstr, feature -+.Lold_start_\@: -+ \oldinstr -+.Lold_end_\@: -+ -+ .pushsection .altinstructions, "a", @progbits -+ altinstruction_entry .Lold_start_\@, .Lnew_start_\@, \feature, \ -+ (.Lold_end_\@ - .Lold_start_\@), (.Lnew_end_\@ - .Lnew_start_\@) -+ -+ .section .discard, "a", @progbits -+ /* Assembler-time check that \newinstr isn't longer than \oldinstr. */ -+ .byte 0xff + (.Lnew_end_\@ - .Lnew_start_\@) - (.Lold_end_\@ - .Lold_start_\@) -+ -+ .section .altinstr_replacement, "ax", @progbits -+.Lnew_start_\@: -+ \newinstr -+.Lnew_end_\@: -+ .popsection -+.endm -+ -+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 -+.Lold_start_\@: -+ \oldinstr -+.Lold_end_\@: -+ -+ .pushsection .altinstructions, "a", @progbits -+ altinstruction_entry .Lold_start_\@, .Lnew1_start_\@, \feature1, \ -+ (.Lold_end_\@ - .Lold_start_\@), (.Lnew1_end_\@ - .Lnew1_start_\@) -+ altinstruction_entry .Lold_start_\@, .Lnew2_start_\@, \feature2, \ -+ (.Lold_end_\@ - .Lold_start_\@), (.Lnew2_end_\@ - .Lnew2_start_\@) -+ -+ .section .discard, "a", @progbits -+ /* Assembler-time check that \newinstr{1,2} aren't longer than \oldinstr. */ -+ .byte 0xff + (.Lnew1_end_\@ - .Lnew1_start_\@) - (.Lold_end_\@ - .Lold_start_\@) -+ .byte 0xff + (.Lnew2_end_\@ - .Lnew2_start_\@) - (.Lold_end_\@ - .Lold_start_\@) -+ -+ .section .altinstr_replacement, "ax", @progbits -+.Lnew1_start_\@: -+ \newinstr1 -+.Lnew1_end_\@: -+.Lnew2_start_\@: -+ \newinstr2 -+.Lnew2_end_\@: -+ .popsection -+.endm -+ - #endif /* __ASSEMBLY__ */ - #endif /* _ASM_X86_ALTERNATIVE_ASM_H_ */ - --- -2.14.3 - - -From c534ab4e940ae3fbddf0b4840c3549c03654921f Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 10:56:01 +0100 -Subject: [PATCH 22/49] x86/hvm: Rename update_guest_vendor() callback to - cpuid_policy_changed() - -It will shortly be used for more than just changing the vendor. - -Signed-off-by: Andrew Cooper -Reviewed-by: Wei Liu -Reviewed-by: Jan Beulich -master commit: 3bea00966eb6680410c89df764d075a8fbacc3cc -master date: 2018-01-05 19:57:07 +0000 ---- - xen/arch/x86/domctl.c | 17 ++++++++++------- - xen/arch/x86/hvm/hvm.c | 2 +- - xen/arch/x86/hvm/svm/svm.c | 4 ++-- - xen/arch/x86/hvm/vmx/vmx.c | 5 ++--- - xen/include/asm-x86/hvm/hvm.h | 6 +++--- - 5 files changed, 18 insertions(+), 16 deletions(-) - -diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c -index 075ee92cd7..fbb4c5e758 100644 ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -53,6 +53,7 @@ static int update_domain_cpuid_info(struct domain *d, - struct cpuid_policy *p = d->arch.cpuid; - const struct cpuid_leaf leaf = { ctl->eax, ctl->ebx, ctl->ecx, ctl->edx }; - int old_vendor = p->x86_vendor; -+ bool call_policy_changed = false; /* Avoid for_each_vcpu() unnecessarily */ - - /* - * Skip update for leaves we don't care about. This avoids the overhead -@@ -128,13 +129,7 @@ static int update_domain_cpuid_info(struct domain *d, - switch ( ctl->input[0] ) - { - case 0: -- if ( is_hvm_domain(d) && (p->x86_vendor != old_vendor) ) -- { -- struct vcpu *v; -- -- for_each_vcpu( d, v ) -- hvm_update_guest_vendor(v); -- } -+ call_policy_changed = (p->x86_vendor != old_vendor); - break; - - case 1: -@@ -299,6 +294,14 @@ static int update_domain_cpuid_info(struct domain *d, - break; - } - -+ if ( is_hvm_domain(d) && call_policy_changed ) -+ { -+ struct vcpu *v; -+ -+ for_each_vcpu( d, v ) -+ hvm_cpuid_policy_changed(v); -+ } -+ - return 0; - } - -diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c -index 9f7b096072..f5de233b2e 100644 ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -1555,7 +1555,7 @@ int hvm_vcpu_initialise(struct vcpu *v) - hvm_set_guest_tsc(v, 0); - } - -- hvm_update_guest_vendor(v); -+ hvm_cpuid_policy_changed(v); - - return 0; - -diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c -index b9cf423fd9..b5b927933f 100644 ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -613,7 +613,7 @@ static void svm_update_guest_efer(struct vcpu *v) - vmcb_set_efer(vmcb, new_efer); - } - --static void svm_update_guest_vendor(struct vcpu *v) -+static void svm_cpuid_policy_changed(struct vcpu *v) - { - struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; - struct vmcb_struct *vmcb = arch_svm->vmcb; -@@ -2422,7 +2422,7 @@ static struct hvm_function_table __initdata svm_function_table = { - .get_shadow_gs_base = svm_get_shadow_gs_base, - .update_guest_cr = svm_update_guest_cr, - .update_guest_efer = svm_update_guest_efer, -- .update_guest_vendor = svm_update_guest_vendor, -+ .cpuid_policy_changed = svm_cpuid_policy_changed, - .fpu_leave = svm_fpu_leave, - .set_guest_pat = svm_set_guest_pat, - .get_guest_pat = svm_get_guest_pat, -diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c -index 73254bf5d4..4221fb8c56 100644 ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -72,7 +72,6 @@ static void vmx_free_vlapic_mapping(struct domain *d); - static void vmx_install_vlapic_mapping(struct vcpu *v); - static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr); - static void vmx_update_guest_efer(struct vcpu *v); --static void vmx_update_guest_vendor(struct vcpu *v); - static void vmx_wbinvd_intercept(void); - static void vmx_fpu_dirty_intercept(void); - static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content); -@@ -655,7 +654,7 @@ void vmx_update_exception_bitmap(struct vcpu *v) - __vmwrite(EXCEPTION_BITMAP, bitmap); - } - --static void vmx_update_guest_vendor(struct vcpu *v) -+static void vmx_cpuid_policy_changed(struct vcpu *v) - { - if ( opt_hvm_fep || - (v->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor) ) -@@ -2318,7 +2317,7 @@ static struct hvm_function_table __initdata vmx_function_table = { - .update_host_cr3 = vmx_update_host_cr3, - .update_guest_cr = vmx_update_guest_cr, - .update_guest_efer = vmx_update_guest_efer, -- .update_guest_vendor = vmx_update_guest_vendor, -+ .cpuid_policy_changed = vmx_cpuid_policy_changed, - .fpu_leave = vmx_fpu_leave, - .set_guest_pat = vmx_set_guest_pat, - .get_guest_pat = vmx_get_guest_pat, -diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h -index 6ecad33316..7275c65d07 100644 ---- a/xen/include/asm-x86/hvm/hvm.h -+++ b/xen/include/asm-x86/hvm/hvm.h -@@ -135,7 +135,7 @@ struct hvm_function_table { - void (*update_guest_cr)(struct vcpu *v, unsigned int cr); - void (*update_guest_efer)(struct vcpu *v); - -- void (*update_guest_vendor)(struct vcpu *v); -+ void (*cpuid_policy_changed)(struct vcpu *v); - - void (*fpu_leave)(struct vcpu *v); - -@@ -334,9 +334,9 @@ static inline void hvm_update_guest_efer(struct vcpu *v) - hvm_funcs.update_guest_efer(v); - } - --static inline void hvm_update_guest_vendor(struct vcpu *v) -+static inline void hvm_cpuid_policy_changed(struct vcpu *v) - { -- hvm_funcs.update_guest_vendor(v); -+ hvm_funcs.cpuid_policy_changed(v); - } - - /* --- -2.14.3 - - -From e32f814160c95094da83fbc813b45eca42d5397a Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 10:56:51 +0100 -Subject: [PATCH 23/49] x86: Introduce a common cpuid_policy_updated() - -No practical change at the moment, but future changes will need to react -irrespective of guest type. - -Signed-off-by: Andrew Cooper -Reviewed-by: Wei Liu -Acked-by: Jan Beulich -master commit: b357546b43ab87dfb10d740ae637a685134d5e32 -master date: 2018-01-05 19:57:07 +0000 ---- - xen/arch/x86/domain.c | 12 ++++++++++++ - xen/arch/x86/domctl.c | 4 ++-- - xen/arch/x86/hvm/hvm.c | 2 -- - xen/include/asm-x86/domain.h | 2 ++ - 4 files changed, 16 insertions(+), 4 deletions(-) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index b357b60f73..aaa2b28413 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -363,6 +363,8 @@ int vcpu_initialise(struct vcpu *v) - - if ( (rc = init_vcpu_msr_policy(v)) ) - goto fail; -+ -+ cpuid_policy_updated(v); - } - - return rc; -@@ -2026,6 +2028,16 @@ int domain_relinquish_resources(struct domain *d) - return 0; - } - -+/* -+ * Called during vcpu construction, and each time the toolstack changes the -+ * CPUID configuration for the domain. -+ */ -+void cpuid_policy_updated(struct vcpu *v) -+{ -+ if ( is_hvm_vcpu(v) ) -+ hvm_cpuid_policy_changed(v); -+} -+ - void arch_dump_domain_info(struct domain *d) - { - paging_dump_domain_info(d); -diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c -index fbb4c5e758..e61201267b 100644 ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -294,12 +294,12 @@ static int update_domain_cpuid_info(struct domain *d, - break; - } - -- if ( is_hvm_domain(d) && call_policy_changed ) -+ if ( call_policy_changed ) - { - struct vcpu *v; - - for_each_vcpu( d, v ) -- hvm_cpuid_policy_changed(v); -+ cpuid_policy_updated(v); - } - - return 0; -diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c -index f5de233b2e..2a3dd4ee91 100644 ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -1555,8 +1555,6 @@ int hvm_vcpu_initialise(struct vcpu *v) - hvm_set_guest_tsc(v, 0); - } - -- hvm_cpuid_policy_changed(v); -- - return 0; - - fail6: -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index f69911918e..4679d5477d 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -79,6 +79,8 @@ void toggle_guest_mode(struct vcpu *); - /* x86/64: toggle guest page tables between kernel and user modes. */ - void toggle_guest_pt(struct vcpu *); - -+void cpuid_policy_updated(struct vcpu *v); -+ - /* - * Initialise a hypercall-transfer page. The given pointer must be mapped - * in Xen virtual address space (accesses are not validated or checked). --- -2.14.3 - - -From d02ef3d27485e1429ac480cca78ab3636387df23 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 10:57:44 +0100 -Subject: [PATCH 24/49] x86/entry: Rearrange RESTORE_ALL to restore register in - stack order - -Results in a more predictable (i.e. linear) memory access pattern. - -No functional change. - -This is part of XSA-254. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -Reviewed-by: Wei Liu -master commit: f85d105e27735f0e20aa30d77f03774f3ed55ae5 -master date: 2018-01-05 19:57:08 +0000 ---- - xen/include/asm-x86/asm_defns.h | 26 +++++++++++++------------- - 1 file changed, 13 insertions(+), 13 deletions(-) - -diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h -index fb0fee9286..c0523861d9 100644 ---- a/xen/include/asm-x86/asm_defns.h -+++ b/xen/include/asm-x86/asm_defns.h -@@ -288,6 +288,19 @@ static always_inline void stac(void) - * safety against information leaks. - */ - .macro RESTORE_ALL adj=0 compat=0 -+.if !\compat -+ movq UREGS_r15(%rsp), %r15 -+ movq UREGS_r14(%rsp), %r14 -+ movq UREGS_r13(%rsp), %r13 -+ movq UREGS_r12(%rsp), %r12 -+.else -+ xor %r15, %r15 -+ xor %r14, %r14 -+ xor %r13, %r13 -+ xor %r12, %r12 -+.endif -+ LOAD_ONE_REG(bp, \compat) -+ LOAD_ONE_REG(bx, \compat) - .if !\compat - movq UREGS_r11(%rsp),%r11 - movq UREGS_r10(%rsp),%r10 -@@ -304,19 +317,6 @@ static always_inline void stac(void) - LOAD_ONE_REG(dx, \compat) - LOAD_ONE_REG(si, \compat) - LOAD_ONE_REG(di, \compat) --.if !\compat -- movq UREGS_r15(%rsp),%r15 -- movq UREGS_r14(%rsp),%r14 -- movq UREGS_r13(%rsp),%r13 -- movq UREGS_r12(%rsp),%r12 --.else -- xor %r15, %r15 -- xor %r14, %r14 -- xor %r13, %r13 -- xor %r12, %r12 --.endif -- LOAD_ONE_REG(bp, \compat) -- LOAD_ONE_REG(bx, \compat) - subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp - .endm - --- -2.14.3 - - -From ab95cb0d948fdc9fcda215fec0526ac902340b14 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:00:45 +0100 -Subject: [PATCH 25/49] x86/hvm: Use SAVE_ALL to construct the cpu_user_regs - frame after VMExit - -No practical change. - -One side effect in debug builds is that %rbp is inverted in the manner -expected by the stack unwinder to indicate a interrupt frame. - -This is part of XSA-254. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -Reviewed-by: Wei Liu -master commit: 13682ca8c94bd5612a44f7f1edc1fd8ff675dacb -master date: 2018-01-05 19:57:08 +0000 ---- - xen/arch/x86/hvm/svm/entry.S | 22 ++++------------------ - xen/arch/x86/hvm/vmx/entry.S | 17 ++--------------- - 2 files changed, 6 insertions(+), 33 deletions(-) - -diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S -index 4a72e38e8b..df86da0a81 100644 ---- a/xen/arch/x86/hvm/svm/entry.S -+++ b/xen/arch/x86/hvm/svm/entry.S -@@ -98,24 +98,10 @@ UNLIKELY_END(svm_trace) - - VMRUN - -- GET_CURRENT(ax) -- push %rdi -- push %rsi -- push %rdx -- push %rcx -- mov VCPU_svm_vmcb(%rax),%rcx -- push %rax -- push %r8 -- push %r9 -- push %r10 -- push %r11 -- push %rbx -- mov %rax,%rbx -- push %rbp -- push %r12 -- push %r13 -- push %r14 -- push %r15 -+ SAVE_ALL -+ -+ GET_CURRENT(bx) -+ mov VCPU_svm_vmcb(%rbx),%rcx - - movb $0,VCPU_svm_vmcb_in_sync(%rbx) - mov VMCB_rax(%rcx),%rax -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index 47cd674260..b2f98be7f5 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -30,23 +30,10 @@ - #define VMLAUNCH .byte 0x0f,0x01,0xc2 - - ENTRY(vmx_asm_vmexit_handler) -- push %rdi -- push %rsi -- push %rdx -- push %rcx -- push %rax -+ SAVE_ALL -+ - mov %cr2,%rax -- push %r8 -- push %r9 -- push %r10 -- push %r11 -- push %rbx - GET_CURRENT(bx) -- push %rbp -- push %r12 -- push %r13 -- push %r14 -- push %r15 - - movb $1,VCPU_vmx_launched(%rbx) - mov %rax,VCPU_hvm_guest_cr2(%rbx) --- -2.14.3 - - -From 1830b20b6b83be38738784ea162d62fcf85f3178 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:01:29 +0100 -Subject: [PATCH 26/49] x86/entry: Erase guest GPR state on entry to Xen - -This reduces the number of code gadgets which can be attacked with arbitrary -guest-controlled GPR values. - -This is part of XSA-254. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -Reviewed-by: Wei Liu -master commit: 03bd8c3a70d101fc2f8f36f1e171b7594462a4cd -master date: 2018-01-05 19:57:08 +0000 ---- - xen/include/asm-x86/asm_defns.h | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h -index c0523861d9..73d96227f4 100644 ---- a/xen/include/asm-x86/asm_defns.h -+++ b/xen/include/asm-x86/asm_defns.h -@@ -247,22 +247,34 @@ static always_inline void stac(void) - addq $-(UREGS_error_code-UREGS_r15), %rsp - cld - movq %rdi,UREGS_rdi(%rsp) -+ xor %edi, %edi - movq %rsi,UREGS_rsi(%rsp) -+ xor %esi, %esi - movq %rdx,UREGS_rdx(%rsp) -+ xor %edx, %edx - movq %rcx,UREGS_rcx(%rsp) -+ xor %ecx, %ecx - movq %rax,UREGS_rax(%rsp) -+ xor %eax, %eax - .if !\compat - movq %r8,UREGS_r8(%rsp) - movq %r9,UREGS_r9(%rsp) - movq %r10,UREGS_r10(%rsp) - movq %r11,UREGS_r11(%rsp) - .endif -+ xor %r8, %r8 -+ xor %r9, %r9 -+ xor %r10, %r10 -+ xor %r11, %r11 - movq %rbx,UREGS_rbx(%rsp) -+ xor %ebx, %ebx - movq %rbp,UREGS_rbp(%rsp) - #ifdef CONFIG_FRAME_POINTER - /* Indicate special exception stack frame by inverting the frame pointer. */ - leaq UREGS_rbp(%rsp), %rbp - notq %rbp -+#else -+ xor %ebp, %ebp - #endif - .if !\compat - movq %r12,UREGS_r12(%rsp) -@@ -270,6 +282,10 @@ static always_inline void stac(void) - movq %r14,UREGS_r14(%rsp) - movq %r15,UREGS_r15(%rsp) - .endif -+ xor %r12, %r12 -+ xor %r13, %r13 -+ xor %r14, %r14 -+ xor %r15, %r15 - .endm - - #define LOAD_ONE_REG(reg, compat) \ --- -2.14.3 - - -From 8743fc2ef7d107104c17b773eadee15fefa64e53 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:02:14 +0100 -Subject: [PATCH 27/49] common/wait: Clarifications to wait infrastructure - -This logic is not as clear as it could be. Add some comments to help. - -Rearrange the asm block in __prepare_to_wait() to separate the GPR -saving/restoring from the internal logic. - -While tweaking, add an unreachable() following the jmp in -check_wakeup_from_wait(). - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: 2d1c82261d966735e82e5971eddb63ba3c565a37 -master date: 2018-01-05 19:57:08 +0000 ---- - xen/common/wait.c | 31 ++++++++++++++++++++++++------- - 1 file changed, 24 insertions(+), 7 deletions(-) - -diff --git a/xen/common/wait.c b/xen/common/wait.c -index c5fc094e2c..3d3d9fe7a2 100644 ---- a/xen/common/wait.c -+++ b/xen/common/wait.c -@@ -138,14 +138,26 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv) - domain_crash_synchronous(); - } - -+ /* Hand-rolled setjmp(). */ - asm volatile ( -- "push %%rax; push %%rbx; push %%rdx; " -- "push %%rbp; push %%r8; push %%r9; push %%r10; push %%r11; " -- "push %%r12; push %%r13; push %%r14; push %%r15; call 1f; " -- "1: addq $2f-1b,(%%rsp); sub %%esp,%%ecx; cmp %3,%%ecx; ja 3f; " -- "mov %%rsp,%%rsi; 2: rep movsb; mov %%rsp,%%rsi; 3: pop %%rax; " -- "pop %%r15; pop %%r14; pop %%r13; pop %%r12; " -- "pop %%r11; pop %%r10; pop %%r9; pop %%r8; " -+ "push %%rax; push %%rbx; push %%rdx; push %%rbp;" -+ "push %%r8; push %%r9; push %%r10; push %%r11;" -+ "push %%r12; push %%r13; push %%r14; push %%r15;" -+ -+ "call 1f;" -+ "1: addq $2f-1b,(%%rsp);" -+ "sub %%esp,%%ecx;" -+ "cmp %3,%%ecx;" -+ "ja 3f;" -+ "mov %%rsp,%%rsi;" -+ -+ /* check_wakeup_from_wait() longjmp()'s to this point. */ -+ "2: rep movsb;" -+ "mov %%rsp,%%rsi;" -+ "3: pop %%rax;" -+ -+ "pop %%r15; pop %%r14; pop %%r13; pop %%r12;" -+ "pop %%r11; pop %%r10; pop %%r9; pop %%r8;" - "pop %%rbp; pop %%rdx; pop %%rbx; pop %%rax" - : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy) - : "i" (PAGE_SIZE), "0" (0), "1" (cpu_info), "2" (wqv->stack) -@@ -189,11 +201,16 @@ void check_wakeup_from_wait(void) - wait(); /* takes us back into the scheduler */ - } - -+ /* -+ * Hand-rolled longjmp(). Returns to the pointer on the top of -+ * wqv->stack, and lands on a `rep movs` instruction. -+ */ - asm volatile ( - "mov %1,%%"__OP"sp; jmp *(%0)" - : : "S" (wqv->stack), "D" (wqv->esp), - "c" ((char *)get_cpu_info() - (char *)wqv->esp) - : "memory" ); -+ unreachable(); - } - - #else /* !CONFIG_X86 */ --- -2.14.3 - - -From 47bbcb2dd1291d61062fe58da807010631fe1b3a Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:03:45 +0100 -Subject: [PATCH 28/49] x86: Support compiling with indirect branch thunks - -Use -mindirect-branch=thunk-extern/-mindirect-branch-register when available. -To begin with, use the retpoline thunk. Later work will add alternative -thunks which can be selected at boot time. - -This is part of XSA-254. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -master commit: 3659f0f4bcc6ca08103d1a7ae4e97535ecc978be -master date: 2018-01-16 17:45:50 +0000 ---- - xen/arch/x86/Makefile | 1 + - xen/arch/x86/Rules.mk | 7 +++++++ - xen/arch/x86/indirect-thunk.S | 38 ++++++++++++++++++++++++++++++++++++++ - xen/arch/x86/xen.lds.S | 1 + - 4 files changed, 47 insertions(+) - create mode 100644 xen/arch/x86/indirect-thunk.S - -diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile -index d5d58a205e..b334366db8 100644 ---- a/xen/arch/x86/Makefile -+++ b/xen/arch/x86/Makefile -@@ -36,6 +36,7 @@ obj-y += io_apic.o - obj-$(CONFIG_LIVEPATCH) += alternative.o livepatch.o - obj-y += msi.o - obj-y += msr.o -+obj-$(CONFIG_INDIRECT_THUNK) += indirect-thunk.o - obj-y += ioport_emulate.o - obj-y += irq.o - obj-$(CONFIG_KEXEC) += machine_kexec.o -diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk -index 568657ee52..abcc4d4f70 100644 ---- a/xen/arch/x86/Rules.mk -+++ b/xen/arch/x86/Rules.mk -@@ -30,3 +30,10 @@ CFLAGS += -fno-asynchronous-unwind-tables - ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n) - CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE - endif -+ -+# Compile with thunk-extern, indirect-branch-register if avaiable. -+ifneq ($(call cc-option,$(CC),-mindirect-branch-register,n),n) -+CFLAGS += -mindirect-branch=thunk-extern -mindirect-branch-register -+CFLAGS += -DCONFIG_INDIRECT_THUNK -+export CONFIG_INDIRECT_THUNK=y -+endif -diff --git a/xen/arch/x86/indirect-thunk.S b/xen/arch/x86/indirect-thunk.S -new file mode 100644 -index 0000000000..3eaf505d0e ---- /dev/null -+++ b/xen/arch/x86/indirect-thunk.S -@@ -0,0 +1,38 @@ -+/* -+ * Implement __x86_indirect_thunk_* symbols for use with compatbile compilers -+ * and the -mindirect-branch=thunk-extern -mindirect-branch-register options. -+ * -+ * Copyright (c) 2017-2018 Citrix Systems Ltd. -+ * -+ * This source code is licensed under the GNU General Public License, -+ * Version 2. See the file COPYING for more details. -+ */ -+ .file __FILE__ -+ -+#include -+ -+.macro IND_THUNK_RETPOLINE reg:req -+ call 2f -+1: -+ lfence -+ jmp 1b -+2: -+ mov %\reg, (%rsp) -+ ret -+.endm -+ -+/* -+ * Build the __x86_indirect_thunk_* symbols. Currently implement the -+ * retpoline thunk only. -+ */ -+.macro GEN_INDIRECT_THUNK reg:req -+ .section .text.__x86_indirect_thunk_\reg, "ax", @progbits -+ -+ENTRY(__x86_indirect_thunk_\reg) -+ IND_THUNK_RETPOLINE \reg -+.endm -+ -+/* Instantiate GEN_INDIRECT_THUNK for each register except %rsp. */ -+.irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15 -+ GEN_INDIRECT_THUNK reg=r\reg -+.endr -diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S -index d5e8821d41..d3c984a463 100644 ---- a/xen/arch/x86/xen.lds.S -+++ b/xen/arch/x86/xen.lds.S -@@ -59,6 +59,7 @@ SECTIONS - _stext = .; /* Text and read-only data */ - *(.text) - *(.text.page_aligned) -+ *(.text.__x86_indirect_thunk_*) - *(.text.cold) - *(.text.unlikely) - *(.fixup) --- -2.14.3 - - -From 32babfc19ad3a3123f8ed4466df3c79492a2212b Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:04:53 +0100 -Subject: [PATCH 29/49] x86: Support indirect thunks from assembly code - -Introduce INDIRECT_CALL and INDIRECT_JMP which either degrade to a normal -indirect branch, or dispatch to the __x86_indirect_thunk_* symbols. - -Update all the manual indirect branches in to use the new thunks. The -indirect branches in the early boot and kexec path are left intact as we can't -use the compiled-in thunks at those points. - -This is part of XSA-254. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: 7c508612f7a5096b4819d4ef2ce566e01bd66c0c -master date: 2018-01-16 17:45:50 +0000 ---- - xen/Rules.mk | 4 ++-- - xen/arch/x86/Rules.mk | 6 +++++ - xen/arch/x86/boot/trampoline.S | 24 +++++++++++++++++-- - xen/arch/x86/extable.c | 4 ++-- - xen/arch/x86/pv/emul-priv-op.c | 39 +++++++++++++++++++++--------- - xen/arch/x86/x86_64/entry.S | 6 +++-- - xen/arch/x86/x86_emulate/x86_emulate.c | 4 ++-- - xen/common/wait.c | 8 ++++--- - xen/include/asm-x86/asm_defns.h | 8 +++++++ - xen/include/asm-x86/indirect_thunk_asm.h | 41 ++++++++++++++++++++++++++++++++ - 10 files changed, 120 insertions(+), 24 deletions(-) - create mode 100644 xen/include/asm-x86/indirect_thunk_asm.h - -diff --git a/xen/Rules.mk b/xen/Rules.mk -index 2659f8a4d1..3cf40754a6 100644 ---- a/xen/Rules.mk -+++ b/xen/Rules.mk -@@ -66,8 +66,8 @@ endif - - AFLAGS-y += -D__ASSEMBLY__ - --# Clang's built-in assembler can't handle .code16/.code32/.code64 yet --AFLAGS-$(clang) += -no-integrated-as -+# Clang's built-in assembler can't handle embedded .include's -+CFLAGS-$(clang) += -no-integrated-as - - ALL_OBJS := $(ALL_OBJS-y) - -diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk -index abcc4d4f70..70e9d8f5cf 100644 ---- a/xen/arch/x86/Rules.mk -+++ b/xen/arch/x86/Rules.mk -@@ -37,3 +37,9 @@ CFLAGS += -mindirect-branch=thunk-extern -mindirect-branch-register - CFLAGS += -DCONFIG_INDIRECT_THUNK - export CONFIG_INDIRECT_THUNK=y - endif -+ -+# Set up the assembler include path properly for older GCC toolchains. Clang -+# objects to the agument being passed however. -+ifneq ($(clang),y) -+CFLAGS += -Wa,-I$(BASEDIR)/include -+endif -diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S -index 4d640f3fcd..f70d913544 100644 ---- a/xen/arch/x86/boot/trampoline.S -+++ b/xen/arch/x86/boot/trampoline.S -@@ -153,8 +153,28 @@ trampoline_protmode_entry: - .code64 - start64: - /* Jump to high mappings. */ -- movabs $__high_start,%rax -- jmpq *%rax -+ movabs $__high_start, %rdi -+ -+#ifdef CONFIG_INDIRECT_THUNK -+ /* -+ * If booting virtualised, or hot-onlining a CPU, sibling threads can -+ * attempt Branch Target Injection against this jmp. -+ * -+ * We've got no usable stack so can't use a RETPOLINE thunk, and are -+ * further than disp32 from the high mappings so couldn't use -+ * JUMP_THUNK even if it was a non-RETPOLINE thunk. Furthermore, an -+ * LFENCE isn't necessarily safe to use at this point. -+ * -+ * As this isn't a hotpath, use a fully serialising event to reduce -+ * the speculation window as much as possible. %ebx needs preserving -+ * for __high_start. -+ */ -+ mov %ebx, %esi -+ cpuid -+ mov %esi, %ebx -+#endif -+ -+ jmpq *%rdi - - #include "wakeup.S" - -diff --git a/xen/arch/x86/extable.c b/xen/arch/x86/extable.c -index 6fffe057c6..72f30d9060 100644 ---- a/xen/arch/x86/extable.c -+++ b/xen/arch/x86/extable.c -@@ -158,7 +158,7 @@ static int __init stub_selftest(void) - memcpy(ptr, tests[i].opc, ARRAY_SIZE(tests[i].opc)); - unmap_domain_page(ptr); - -- asm volatile ( "call *%[stb]\n" -+ asm volatile ( "INDIRECT_CALL %[stb]\n" - ".Lret%=:\n\t" - ".pushsection .fixup,\"ax\"\n" - ".Lfix%=:\n\t" -@@ -167,7 +167,7 @@ static int __init stub_selftest(void) - ".popsection\n\t" - _ASM_EXTABLE(.Lret%=, .Lfix%=) - : [exn] "+m" (res) -- : [stb] "rm" (addr), "a" (tests[i].rax)); -+ : [stb] "r" (addr), "a" (tests[i].rax)); - ASSERT(res == tests[i].res.raw); - } - -diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c -index 5f23c2cfbf..b965b3ece7 100644 ---- a/xen/arch/x86/pv/emul-priv-op.c -+++ b/xen/arch/x86/pv/emul-priv-op.c -@@ -73,37 +73,54 @@ void (*pv_post_outb_hook)(unsigned int port, u8 value); - - typedef void io_emul_stub_t(struct cpu_user_regs *); - -+void __x86_indirect_thunk_rcx(void); -+ - static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode, - unsigned int port, unsigned int bytes) - { -+ struct stubs *this_stubs = &this_cpu(stubs); -+ unsigned long stub_va = this_stubs->addr + STUB_BUF_SIZE / 2; -+ - if ( !ctxt->io_emul_stub ) -- ctxt->io_emul_stub = map_domain_page(_mfn(this_cpu(stubs.mfn))) + -- (this_cpu(stubs.addr) & -- ~PAGE_MASK) + -- STUB_BUF_SIZE / 2; -+ ctxt->io_emul_stub = -+ map_domain_page(_mfn(this_stubs->mfn)) + (stub_va & ~PAGE_MASK); - - /* movq $host_to_guest_gpr_switch,%rcx */ - ctxt->io_emul_stub[0] = 0x48; - ctxt->io_emul_stub[1] = 0xb9; - *(void **)&ctxt->io_emul_stub[2] = (void *)host_to_guest_gpr_switch; -+ -+#ifdef CONFIG_INDIRECT_THUNK -+ /* callq __x86_indirect_thunk_rcx */ -+ ctxt->io_emul_stub[10] = 0xe8; -+ *(int32_t *)&ctxt->io_emul_stub[11] = -+ (long)__x86_indirect_thunk_rcx - (stub_va + 11 + 4); -+#else - /* callq *%rcx */ - ctxt->io_emul_stub[10] = 0xff; - ctxt->io_emul_stub[11] = 0xd1; -+ /* TODO: untangle ideal_nops from init/livepatch Kconfig options. */ -+ memcpy(&ctxt->io_emul_stub[12], "\x0f\x1f\x00", 3); /* P6_NOP3 */ -+#endif -+ - /* data16 or nop */ -- ctxt->io_emul_stub[12] = (bytes != 2) ? 0x90 : 0x66; -+ ctxt->io_emul_stub[15] = (bytes != 2) ? 0x90 : 0x66; - /* */ -- ctxt->io_emul_stub[13] = opcode; -+ ctxt->io_emul_stub[16] = opcode; - /* imm8 or nop */ -- ctxt->io_emul_stub[14] = !(opcode & 8) ? port : 0x90; -+ ctxt->io_emul_stub[17] = !(opcode & 8) ? port : 0x90; - /* ret (jumps to guest_to_host_gpr_switch) */ -- ctxt->io_emul_stub[15] = 0xc3; -- BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 16); -+ ctxt->io_emul_stub[18] = 0xc3; -+ BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 19); - - if ( ioemul_handle_quirk ) -- ioemul_handle_quirk(opcode, &ctxt->io_emul_stub[12], ctxt->ctxt.regs); -+ { -+ BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 15 + 10); -+ ioemul_handle_quirk(opcode, &ctxt->io_emul_stub[15], ctxt->ctxt.regs); -+ } - - /* Handy function-typed pointer to the stub. */ -- return (void *)(this_cpu(stubs.addr) + STUB_BUF_SIZE / 2); -+ return (void *)stub_va; - } - - -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index a8825c89df..710c0616ba 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -585,7 +585,8 @@ handle_exception_saved: - movzbl UREGS_entry_vector(%rsp),%eax - leaq exception_table(%rip),%rdx - PERFC_INCR(exceptions, %rax, %rbx) -- callq *(%rdx,%rax,8) -+ mov (%rdx, %rax, 8), %rdx -+ INDIRECT_CALL %rdx - mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - testb $3,UREGS_cs(%rsp) - jz restore_all_xen -@@ -757,7 +758,8 @@ handle_ist_exception: - 1: movq %rsp,%rdi - movzbl UREGS_entry_vector(%rsp),%eax - leaq exception_table(%rip),%rdx -- callq *(%rdx,%rax,8) -+ mov (%rdx, %rax, 8), %rdx -+ INDIRECT_CALL %rdx - mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - cmpb $TRAP_nmi,UREGS_entry_vector(%rsp) - jne ret_from_intr -diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c -index 820495fb9c..ff0a003902 100644 ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -867,7 +867,7 @@ static inline int mkec(uint8_t e, int32_t ec, ...) - #ifdef __XEN__ - # define invoke_stub(pre, post, constraints...) do { \ - union stub_exception_token res_ = { .raw = ~0 }; \ -- asm volatile ( pre "\n\tcall *%[stub]\n\t" post "\n" \ -+ asm volatile ( pre "\n\tINDIRECT_CALL %[stub]\n\t" post "\n" \ - ".Lret%=:\n\t" \ - ".pushsection .fixup,\"ax\"\n" \ - ".Lfix%=:\n\t" \ -@@ -876,7 +876,7 @@ static inline int mkec(uint8_t e, int32_t ec, ...) - ".popsection\n\t" \ - _ASM_EXTABLE(.Lret%=, .Lfix%=) \ - : [exn] "+g" (res_), constraints, \ -- [stub] "rm" (stub.func), \ -+ [stub] "r" (stub.func), \ - "m" (*(uint8_t(*)[MAX_INST_LEN + 1])stub.ptr) ); \ - if ( unlikely(~res_.raw) ) \ - { \ -diff --git a/xen/common/wait.c b/xen/common/wait.c -index 3d3d9fe7a2..a57bc10d61 100644 ---- a/xen/common/wait.c -+++ b/xen/common/wait.c -@@ -203,12 +203,14 @@ void check_wakeup_from_wait(void) - - /* - * Hand-rolled longjmp(). Returns to the pointer on the top of -- * wqv->stack, and lands on a `rep movs` instruction. -+ * wqv->stack, and lands on a `rep movs` instruction. All other GPRs are -+ * restored from the stack, so are available for use here. - */ - asm volatile ( -- "mov %1,%%"__OP"sp; jmp *(%0)" -+ "mov %1,%%"__OP"sp; INDIRECT_JMP %[ip]" - : : "S" (wqv->stack), "D" (wqv->esp), -- "c" ((char *)get_cpu_info() - (char *)wqv->esp) -+ "c" ((char *)get_cpu_info() - (char *)wqv->esp), -+ [ip] "r" (*(unsigned long *)wqv->stack) - : "memory" ); - unreachable(); - } -diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h -index 73d96227f4..9cfd79f0c6 100644 ---- a/xen/include/asm-x86/asm_defns.h -+++ b/xen/include/asm-x86/asm_defns.h -@@ -13,6 +13,14 @@ - #include - #include - -+#ifdef __ASSEMBLY__ -+# include -+#else -+asm ( "\t.equ CONFIG_INDIRECT_THUNK, " -+ __stringify(IS_ENABLED(CONFIG_INDIRECT_THUNK)) ); -+asm ( "\t.include \"asm/indirect_thunk_asm.h\"" ); -+#endif -+ - #ifndef __ASSEMBLY__ - void ret_from_intr(void); - #endif -diff --git a/xen/include/asm-x86/indirect_thunk_asm.h b/xen/include/asm-x86/indirect_thunk_asm.h -new file mode 100644 -index 0000000000..96bcc25497 ---- /dev/null -+++ b/xen/include/asm-x86/indirect_thunk_asm.h -@@ -0,0 +1,41 @@ -+/* -+ * Warning! This file is included at an assembler level for .c files, causing -+ * usual #ifdef'ary to turn into comments. -+ */ -+ -+.macro INDIRECT_BRANCH insn:req arg:req -+/* -+ * Create an indirect branch. insn is one of call/jmp, arg is a single -+ * register. -+ * -+ * With no compiler support, this degrades into a plain indirect call/jmp. -+ * With compiler support, dispatch to the correct __x86_indirect_thunk_* -+ */ -+ .if CONFIG_INDIRECT_THUNK == 1 -+ -+ $done = 0 -+ .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15 -+ .ifeqs "\arg", "%r\reg" -+ \insn __x86_indirect_thunk_r\reg -+ $done = 1 -+ .exitm -+ .endif -+ .endr -+ -+ .if $done != 1 -+ .error "Bad register arg \arg" -+ .endif -+ -+ .else -+ \insn *\arg -+ .endif -+.endm -+ -+/* Convenience wrappers. */ -+.macro INDIRECT_CALL arg:req -+ INDIRECT_BRANCH call \arg -+.endm -+ -+.macro INDIRECT_JMP arg:req -+ INDIRECT_BRANCH jmp \arg -+.endm --- -2.14.3 - - -From 6aaf353f2ecbe8ae57e16812a6d74a4f089def3a Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:06:07 +0100 -Subject: [PATCH 30/49] x86/boot: Report details of speculative mitigations - -Nothing very interesting at the moment, but the logic will grow as new -mitigations are added. - -This is part of XSA-254. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -master commit: 31d6c53adf6417bf449ca50e8416e41b64d46803 -master date: 2018-01-16 17:45:50 +0000 ---- - xen/arch/x86/Makefile | 1 + - xen/arch/x86/setup.c | 3 ++ - xen/arch/x86/spec_ctrl.c | 75 +++++++++++++++++++++++++++++++++++++++++ - xen/include/asm-x86/spec_ctrl.h | 35 +++++++++++++++++++ - 4 files changed, 114 insertions(+) - create mode 100644 xen/arch/x86/spec_ctrl.c - create mode 100644 xen/include/asm-x86/spec_ctrl.h - -diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile -index b334366db8..e8c49639d8 100644 ---- a/xen/arch/x86/Makefile -+++ b/xen/arch/x86/Makefile -@@ -57,6 +57,7 @@ obj-y += setup.o - obj-y += shutdown.o - obj-y += smp.o - obj-y += smpboot.o -+obj-y += spec_ctrl.o - obj-y += srat.o - obj-y += string.o - obj-y += sysctl.o -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index 2e10c6bdf4..470427bc64 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -51,6 +51,7 @@ - #include - #include - #include -+#include - - /* opt_nosmp: If true, secondary processors are ignored. */ - static bool __initdata opt_nosmp; -@@ -1502,6 +1503,8 @@ void __init noreturn __start_xen(unsigned long mbi_p) - if ( cpu_has_fsgsbase ) - set_in_cr4(X86_CR4_FSGSBASE); - -+ init_speculation_mitigations(); -+ - init_idle_domain(); - - this_cpu(stubs.addr) = alloc_stub_page(smp_processor_id(), -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -new file mode 100644 -index 0000000000..256701a43c ---- /dev/null -+++ b/xen/arch/x86/spec_ctrl.c -@@ -0,0 +1,75 @@ -+/****************************************************************************** -+ * arch/x86/spec_ctrl.c -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; If not, see . -+ * -+ * Copyright (c) 2017-2018 Citrix Systems Ltd. -+ */ -+#include -+#include -+ -+#include -+#include -+ -+enum ind_thunk { -+ THUNK_DEFAULT, /* Decide which thunk to use at boot time. */ -+ THUNK_NONE, /* Missing compiler support for thunks. */ -+ -+ THUNK_RETPOLINE, -+}; -+ -+static void __init print_details(enum ind_thunk thunk) -+{ -+ printk(XENLOG_DEBUG "Speculative mitigation facilities:\n"); -+ -+ /* Compiled-in support which pertains to BTI mitigations. */ -+ if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) ) -+ printk(XENLOG_DEBUG " Compiled-in support: INDIRECT_THUNK\n"); -+ -+ printk(XENLOG_INFO -+ "BTI mitigations: Thunk %s\n", -+ thunk == THUNK_NONE ? "N/A" : -+ thunk == THUNK_RETPOLINE ? "RETPOLINE" : "?"); -+} -+ -+void __init init_speculation_mitigations(void) -+{ -+ enum ind_thunk thunk = THUNK_DEFAULT; -+ -+ /* -+ * Supplimentary minor adjustments. Without compiler support, there are -+ * no thunks. -+ */ -+ if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) ) -+ thunk = THUNK_NONE; -+ -+ /* -+ * If there are still no thunk preferences, the compiled default is -+ * actually retpoline, and it is better than nothing. -+ */ -+ if ( thunk == THUNK_DEFAULT ) -+ thunk = THUNK_RETPOLINE; -+ -+ print_details(thunk); -+} -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -new file mode 100644 -index 0000000000..e088a551da ---- /dev/null -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -0,0 +1,35 @@ -+/****************************************************************************** -+ * include/asm-x86/spec_ctrl.h -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; If not, see . -+ * -+ * Copyright (c) 2017-2018 Citrix Systems Ltd. -+ */ -+ -+#ifndef __X86_SPEC_CTRL_H__ -+#define __X86_SPEC_CTRL_H__ -+ -+void init_speculation_mitigations(void); -+ -+#endif /* !__X86_SPEC_CTRL_H__ */ -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ --- -2.14.3 - - -From 0e12c2c881aa12016bb659ab1eb4c7289244b3e7 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:07:02 +0100 -Subject: [PATCH 31/49] x86/amd: Try to set lfence as being Dispatch - Serialising - -This property is required for the AMD's recommended mitigation for Branch -Target Injection, but Xen needs to cope with being unable to detect or modify -the MSR. - -This is part of XSA-254. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: fe3ee5530a8d0d0b6a478167125d00c40f294a86 -master date: 2018-01-16 17:45:50 +0000 ---- - xen/arch/x86/cpu/amd.c | 35 ++++++++++++++++++++++++++++++++++- - xen/include/asm-x86/cpufeature.h | 1 + - xen/include/asm-x86/cpufeatures.h | 1 + - xen/include/asm-x86/msr-index.h | 1 + - 4 files changed, 37 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index 5f36ac75a7..40c0bac80b 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -558,8 +558,41 @@ static void init_amd(struct cpuinfo_x86 *c) - wrmsr_amd_safe(0xc001100d, l, h & ~1); - } - -+ /* -+ * Attempt to set lfence to be Dispatch Serialising. This MSR almost -+ * certainly isn't virtualised (and Xen at least will leak the real -+ * value in but silently discard writes), as well as being per-core -+ * rather than per-thread, so do a full safe read/write/readback cycle -+ * in the worst case. -+ */ -+ if (c->x86 == 0x0f || c->x86 == 0x11) -+ /* Always dispatch serialising on this hardare. */ -+ __set_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability); -+ else /* Implicily "== 0x10 || >= 0x12" by being 64bit. */ { -+ if (rdmsr_safe(MSR_AMD64_DE_CFG, value)) -+ /* Unable to read. Assume the safer default. */ -+ __clear_bit(X86_FEATURE_LFENCE_DISPATCH, -+ c->x86_capability); -+ else if (value & AMD64_DE_CFG_LFENCE_SERIALISE) -+ /* Already dispatch serialising. */ -+ __set_bit(X86_FEATURE_LFENCE_DISPATCH, -+ c->x86_capability); -+ else if (wrmsr_safe(MSR_AMD64_DE_CFG, -+ value | AMD64_DE_CFG_LFENCE_SERIALISE) || -+ rdmsr_safe(MSR_AMD64_DE_CFG, value) || -+ !(value & AMD64_DE_CFG_LFENCE_SERIALISE)) -+ /* Attempt to set failed. Assume the safer default. */ -+ __clear_bit(X86_FEATURE_LFENCE_DISPATCH, -+ c->x86_capability); -+ else -+ /* Successfully enabled! */ -+ __set_bit(X86_FEATURE_LFENCE_DISPATCH, -+ c->x86_capability); -+ } -+ - /* MFENCE stops RDTSC speculation */ -- __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); -+ if (!cpu_has_lfence_dispatch) -+ __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); - - switch(c->x86) - { -diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h -index 84cc51d2bd..adc333f20e 100644 ---- a/xen/include/asm-x86/cpufeature.h -+++ b/xen/include/asm-x86/cpufeature.h -@@ -104,6 +104,7 @@ - #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) - #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) - #define cpu_has_aperfmperf boot_cpu_has(X86_FEATURE_APERFMPERF) -+#define cpu_has_lfence_dispatch boot_cpu_has(X86_FEATURE_LFENCE_DISPATCH) - - enum _cache_type { - CACHE_TYPE_NULL = 0, -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index bc98227763..58b37d6a6d 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -22,3 +22,4 @@ XEN_CPUFEATURE(APERFMPERF, (FSCAPINTS+0)*32+ 8) /* APERFMPERF */ - XEN_CPUFEATURE(MFENCE_RDTSC, (FSCAPINTS+0)*32+ 9) /* MFENCE synchronizes RDTSC */ - XEN_CPUFEATURE(XEN_SMEP, (FSCAPINTS+0)*32+10) /* SMEP gets used by Xen itself */ - XEN_CPUFEATURE(XEN_SMAP, (FSCAPINTS+0)*32+11) /* SMAP gets used by Xen itself */ -+XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+12) /* lfence set as Dispatch Serialising */ -diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h -index b99c623367..9c8bae6c35 100644 ---- a/xen/include/asm-x86/msr-index.h -+++ b/xen/include/asm-x86/msr-index.h -@@ -207,6 +207,7 @@ - #define MSR_AMD64_IC_CFG 0xc0011021 - #define MSR_AMD64_DC_CFG 0xc0011022 - #define MSR_AMD64_DE_CFG 0xc0011029 -+#define AMD64_DE_CFG_LFENCE_SERIALISE (_AC(1, ULL) << 1) - - #define MSR_AMD64_DR0_ADDRESS_MASK 0xc0011027 - #define MSR_AMD64_DR1_ADDRESS_MASK 0xc0011019 --- -2.14.3 - - -From c513244d8e5b8aa0326c6f2d5fb2382811c97d6d Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:07:50 +0100 -Subject: [PATCH 32/49] x86: Introduce alternative indirect thunks - -Depending on hardware and microcode availability, we will want to replace -IND_THUNK_REPOLINE with other implementations. - -For AMD hardware, choose IND_THUNK_LFENCE in preference to retpoline if lfence -is known to be (or was successfully made) dispatch serialising. - -This is part of XSA-254. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: 858cba0d4c6b6b45180afcb41561fd6585ad51a3 -master date: 2018-01-16 17:45:50 +0000 ---- - docs/misc/xen-command-line.markdown | 16 ++++++++ - xen/arch/x86/indirect-thunk.S | 17 +++++++-- - xen/arch/x86/spec_ctrl.c | 75 +++++++++++++++++++++++++++++++++++-- - xen/include/asm-x86/cpufeatures.h | 2 + - 4 files changed, 104 insertions(+), 6 deletions(-) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 49539b4d1c..214012bd9e 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -245,6 +245,22 @@ and not running softirqs. Reduce this if softirqs are not being run frequently - enough. Setting this to a high value may cause boot failure, particularly if - the NMI watchdog is also enabled. - -+### bti (x86) -+> `= List of [ thunk=retpoline|lfence|jmp ]` -+ -+Branch Target Injection controls. By default, Xen will pick the most -+appropriate BTI mitigations based on compiled in support, loaded microcode, -+and hardware details. -+ -+**WARNING: Any use of this option may interfere with heuristics. Use with -+extreme care.** -+ -+If Xen was compiled with INDIRECT_THUNK support, `thunk=` can be used to -+select which of the thunks gets patched into the `__x86_indirect_thunk_%reg` -+locations. The default thunk is `retpoline` (generally preferred for Intel -+hardware), with the alternatives being `jmp` (a `jmp *%reg` gadget, minimal -+overhead), and `lfence` (an `lfence; jmp *%reg` gadget, preferred for AMD). -+ - ### xenheap\_megabytes (arm32) - > `= ` - -diff --git a/xen/arch/x86/indirect-thunk.S b/xen/arch/x86/indirect-thunk.S -index 3eaf505d0e..7d34707218 100644 ---- a/xen/arch/x86/indirect-thunk.S -+++ b/xen/arch/x86/indirect-thunk.S -@@ -21,15 +21,26 @@ - ret - .endm - -+.macro IND_THUNK_LFENCE reg:req -+ lfence -+ jmp *%\reg -+.endm -+ -+.macro IND_THUNK_JMP reg:req -+ jmp *%\reg -+.endm -+ - /* -- * Build the __x86_indirect_thunk_* symbols. Currently implement the -- * retpoline thunk only. -+ * Build the __x86.indirect_thunk.* symbols. Execution lands on an -+ * alternative patch point which implements one of the above THUNK_*'s - */ - .macro GEN_INDIRECT_THUNK reg:req - .section .text.__x86_indirect_thunk_\reg, "ax", @progbits - - ENTRY(__x86_indirect_thunk_\reg) -- IND_THUNK_RETPOLINE \reg -+ ALTERNATIVE_2 __stringify(IND_THUNK_RETPOLINE \reg), \ -+ __stringify(IND_THUNK_LFENCE \reg), X86_FEATURE_IND_THUNK_LFENCE, \ -+ __stringify(IND_THUNK_JMP \reg), X86_FEATURE_IND_THUNK_JMP - .endm - - /* Instantiate GEN_INDIRECT_THUNK for each register except %rsp. */ -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 256701a43c..d601c028d8 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -16,18 +16,54 @@ - * - * Copyright (c) 2017-2018 Citrix Systems Ltd. - */ -+#include - #include - #include - - #include - #include - --enum ind_thunk { -+static enum ind_thunk { - THUNK_DEFAULT, /* Decide which thunk to use at boot time. */ - THUNK_NONE, /* Missing compiler support for thunks. */ - - THUNK_RETPOLINE, --}; -+ THUNK_LFENCE, -+ THUNK_JMP, -+} opt_thunk __initdata = THUNK_DEFAULT; -+ -+static int __init parse_bti(const char *s) -+{ -+ const char *ss; -+ int rc = 0; -+ -+ do { -+ ss = strchr(s, ','); -+ if ( !ss ) -+ ss = strchr(s, '\0'); -+ -+ if ( !strncmp(s, "thunk=", 6) ) -+ { -+ s += 6; -+ -+ if ( !strncmp(s, "retpoline", ss - s) ) -+ opt_thunk = THUNK_RETPOLINE; -+ else if ( !strncmp(s, "lfence", ss - s) ) -+ opt_thunk = THUNK_LFENCE; -+ else if ( !strncmp(s, "jmp", ss - s) ) -+ opt_thunk = THUNK_JMP; -+ else -+ rc = -EINVAL; -+ } -+ else -+ rc = -EINVAL; -+ -+ s = ss + 1; -+ } while ( *ss ); -+ -+ return rc; -+} -+custom_param("bti", parse_bti); - - static void __init print_details(enum ind_thunk thunk) - { -@@ -40,13 +76,40 @@ static void __init print_details(enum ind_thunk thunk) - printk(XENLOG_INFO - "BTI mitigations: Thunk %s\n", - thunk == THUNK_NONE ? "N/A" : -- thunk == THUNK_RETPOLINE ? "RETPOLINE" : "?"); -+ thunk == THUNK_RETPOLINE ? "RETPOLINE" : -+ thunk == THUNK_LFENCE ? "LFENCE" : -+ thunk == THUNK_JMP ? "JMP" : "?"); - } - - void __init init_speculation_mitigations(void) - { - enum ind_thunk thunk = THUNK_DEFAULT; - -+ /* -+ * Has the user specified any custom BTI mitigations? If so, follow their -+ * instructions exactly and disable all heuristics. -+ */ -+ if ( opt_thunk != THUNK_DEFAULT ) -+ { -+ thunk = opt_thunk; -+ } -+ else -+ { -+ /* -+ * Evaluate the safest Branch Target Injection mitigations to use. -+ * First, begin with compiler-aided mitigations. -+ */ -+ if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) ) -+ { -+ /* -+ * AMD's recommended mitigation is to set lfence as being dispatch -+ * serialising, and to use IND_THUNK_LFENCE. -+ */ -+ if ( cpu_has_lfence_dispatch ) -+ thunk = THUNK_LFENCE; -+ } -+ } -+ - /* - * Supplimentary minor adjustments. Without compiler support, there are - * no thunks. -@@ -61,6 +124,12 @@ void __init init_speculation_mitigations(void) - if ( thunk == THUNK_DEFAULT ) - thunk = THUNK_RETPOLINE; - -+ /* Apply the chosen settings. */ -+ if ( thunk == THUNK_LFENCE ) -+ setup_force_cpu_cap(X86_FEATURE_IND_THUNK_LFENCE); -+ else if ( thunk == THUNK_JMP ) -+ setup_force_cpu_cap(X86_FEATURE_IND_THUNK_JMP); -+ - print_details(thunk); - } - -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index 58b37d6a6d..ba1771b3d3 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -23,3 +23,5 @@ XEN_CPUFEATURE(MFENCE_RDTSC, (FSCAPINTS+0)*32+ 9) /* MFENCE synchronizes RDTS - XEN_CPUFEATURE(XEN_SMEP, (FSCAPINTS+0)*32+10) /* SMEP gets used by Xen itself */ - XEN_CPUFEATURE(XEN_SMAP, (FSCAPINTS+0)*32+11) /* SMAP gets used by Xen itself */ - XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+12) /* lfence set as Dispatch Serialising */ -+XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+13) /* Use IND_THUNK_LFENCE */ -+XEN_CPUFEATURE(IND_THUNK_JMP, (FSCAPINTS+0)*32+14) /* Use IND_THUNK_JMP */ --- -2.14.3 - - -From 129880dd8f28bc728f93e3aad4675622c1ee2aad Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:41:14 +0100 -Subject: [PATCH 33/49] x86/feature: Definitions for Indirect Branch Controls - -Contemporary processors are gaining Indirect Branch Controls via microcode -updates. Intel are introducing one bit to indicate IBRS and IBPB support, and -a second bit for STIBP. AMD are introducing IBPB only, so enumerate it with a -separate bit. - -Furthermore, depending on compiler and microcode availability, we may want to -run Xen with IBRS set, or clear. - -To use these facilities, we synthesise separate IBRS and IBPB bits for -internal use. A lot of infrastructure is required before these features are -safe to offer to guests. - -This is part of XSA-254. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -Acked-by: Wei Liu -master commit: 0d703a701cc4bc47773986b2796eebd28b1439b5 -master date: 2018-01-16 17:45:50 +0000 ---- - tools/libxl/libxl_cpuid.c | 3 +++ - tools/misc/xen-cpuid.c | 12 ++++++++++-- - xen/arch/x86/spec_ctrl.c | 17 +++++++++++++++++ - xen/include/asm-x86/cpufeatures.h | 3 +++ - xen/include/asm-x86/msr-index.h | 8 ++++++++ - xen/include/public/arch-x86/cpufeatureset.h | 3 +++ - xen/tools/gen-cpuid.py | 5 +++++ - 7 files changed, 49 insertions(+), 2 deletions(-) - -diff --git a/tools/libxl/libxl_cpuid.c b/tools/libxl/libxl_cpuid.c -index e692b61569..81ba9616bc 100644 ---- a/tools/libxl/libxl_cpuid.c -+++ b/tools/libxl/libxl_cpuid.c -@@ -202,6 +202,8 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str) - - {"avx512-4vnniw",0x00000007, 0, CPUID_REG_EDX, 2, 1}, - {"avx512-4fmaps",0x00000007, 0, CPUID_REG_EDX, 3, 1}, -+ {"ibrsb", 0x00000007, 0, CPUID_REG_EDX, 26, 1}, -+ {"stibp", 0x00000007, 0, CPUID_REG_EDX, 27, 1}, - - {"lahfsahf", 0x80000001, NA, CPUID_REG_ECX, 0, 1}, - {"cmplegacy", 0x80000001, NA, CPUID_REG_ECX, 1, 1}, -@@ -239,6 +241,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str) - - {"invtsc", 0x80000007, NA, CPUID_REG_EDX, 8, 1}, - -+ {"ibpb", 0x80000008, NA, CPUID_REG_EBX, 12, 1}, - {"nc", 0x80000008, NA, CPUID_REG_ECX, 0, 8}, - {"apicidsize", 0x80000008, NA, CPUID_REG_ECX, 12, 4}, - -diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c -index 0831f7551d..8c3dac0d50 100644 ---- a/tools/misc/xen-cpuid.c -+++ b/tools/misc/xen-cpuid.c -@@ -149,7 +149,11 @@ static const char *str_e8b[32] = - { - [ 0] = "clzero", - -- [1 ... 31] = "REZ", -+ [1 ... 11] = "REZ", -+ -+ [12] = "ibpb", -+ -+ [13 ... 31] = "REZ", - }; - - static const char *str_7d0[32] = -@@ -158,7 +162,11 @@ static const char *str_7d0[32] = - - [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps", - -- [4 ... 31] = "REZ", -+ [4 ... 25] = "REZ", -+ -+ [26] = "ibrsb", [27] = "stibp", -+ -+ [28 ... 31] = "REZ", - }; - - static struct { -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index d601c028d8..89e7287e43 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -67,8 +67,25 @@ custom_param("bti", parse_bti); - - static void __init print_details(enum ind_thunk thunk) - { -+ unsigned int _7d0 = 0, e8b = 0, tmp; -+ -+ /* Collect diagnostics about available mitigations. */ -+ if ( boot_cpu_data.cpuid_level >= 7 ) -+ cpuid_count(7, 0, &tmp, &tmp, &tmp, &_7d0); -+ if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 ) -+ cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp); -+ - printk(XENLOG_DEBUG "Speculative mitigation facilities:\n"); - -+ /* Hardware features which pertain to speculative mitigations. */ -+ if ( (_7d0 & (cpufeat_mask(X86_FEATURE_IBRSB) | -+ cpufeat_mask(X86_FEATURE_STIBP))) || -+ (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ) -+ printk(XENLOG_DEBUG " Hardware features:%s%s%s\n", -+ (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "", -+ (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP" : "", -+ (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : ""); -+ - /* Compiled-in support which pertains to BTI mitigations. */ - if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) ) - printk(XENLOG_DEBUG " Compiled-in support: INDIRECT_THUNK\n"); -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index ba1771b3d3..dd2388f393 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -25,3 +25,6 @@ XEN_CPUFEATURE(XEN_SMAP, (FSCAPINTS+0)*32+11) /* SMAP gets used by Xen it - XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+12) /* lfence set as Dispatch Serialising */ - XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+13) /* Use IND_THUNK_LFENCE */ - XEN_CPUFEATURE(IND_THUNK_JMP, (FSCAPINTS+0)*32+14) /* Use IND_THUNK_JMP */ -+XEN_CPUFEATURE(XEN_IBPB, (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */ -+XEN_CPUFEATURE(XEN_IBRS_SET, (FSCAPINTS+0)*32+16) /* IBRSB && IRBS set in Xen */ -+XEN_CPUFEATURE(XEN_IBRS_CLEAR, (FSCAPINTS+0)*32+17) /* IBRSB && IBRS clear in Xen */ -diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h -index 9c8bae6c35..11c43fa83e 100644 ---- a/xen/include/asm-x86/msr-index.h -+++ b/xen/include/asm-x86/msr-index.h -@@ -31,6 +31,14 @@ - #define EFER_LMSLE (1<<_EFER_LMSLE) - #define EFER_FFXSE (1<<_EFER_FFXSE) - -+/* Speculation Controls. */ -+#define MSR_SPEC_CTRL 0x00000048 -+#define SPEC_CTRL_IBRS (_AC(1, ULL) << 0) -+#define SPEC_CTRL_STIBP (_AC(1, ULL) << 1) -+ -+#define MSR_PRED_CMD 0x00000049 -+#define PRED_CMD_IBPB (_AC(1, ULL) << 0) -+ - /* Intel MSRs. Some also available on other CPUs */ - #define MSR_IA32_PERFCTR0 0x000000c1 - #define MSR_IA32_A_PERFCTR0 0x000004c1 -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index be6da8eaf1..e148755a66 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -237,10 +237,13 @@ XEN_CPUFEATURE(EFRO, 7*32+10) /* APERF/MPERF Read Only interface */ - - /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */ - XEN_CPUFEATURE(CLZERO, 8*32+ 0) /*A CLZERO instruction */ -+XEN_CPUFEATURE(IBPB, 8*32+12) /* IBPB support only (no IBRS, used by AMD) */ - - /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */ - XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ - XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ -+XEN_CPUFEATURE(IBRSB, 9*32+26) /* IBRS and IBPB support (used by Intel) */ -+XEN_CPUFEATURE(STIBP, 9*32+27) /* STIBP */ - - #endif /* XEN_CPUFEATURE */ - -diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py -index 9ec4486f2b..613b909c3d 100755 ---- a/xen/tools/gen-cpuid.py -+++ b/xen/tools/gen-cpuid.py -@@ -256,6 +256,11 @@ def crunch_numbers(state): - AVX512F: [AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD, - AVX512BW, AVX512VL, AVX512VBMI, AVX512_4VNNIW, - AVX512_4FMAPS, AVX512_VPOPCNTDQ], -+ -+ # Single Thread Indirect Branch Predictors enumerates a new bit in the -+ # MSR enumerated by Indirect Branch Restricted Speculation/Indirect -+ # Branch Prediction Barrier enumeration. -+ IBRSB: [STIBP], - } - - deep_features = tuple(sorted(deps.keys())) --- -2.14.3 - - -From 65ee6e043a6dc61bece75a9dfe24c7ee70c6597c Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:42:36 +0100 -Subject: [PATCH 34/49] x86/cmdline: Introduce a command line option to disable - IBRS/IBPB, STIBP and IBPB - -Instead of gaining yet another top level boolean, introduce a more generic -cpuid= option. Also introduce a helper function to parse a generic boolean -value. - -This is part of XSA-254. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -xen/cmdline: Fix parse_boolean() for unadorned values - -A command line such as "cpuid=no-ibrsb,no-stibp" tickles a bug in -parse_boolean() because the separating comma fails the NUL case. - -Instead, check for slen == nlen which accounts for the boundary (if any) -passed via the 'e' parameter. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: 7850b1c00749df834ea2ad0c1f5d9364c4838795 -master date: 2018-01-16 17:45:50 +0000 -master commit: ac37ec1ddef234eeba6f438c29ff687c64962ebd -master date: 2018-01-31 10:47:12 +0000 ---- - docs/misc/xen-command-line.markdown | 12 ++++++++++++ - xen/arch/x86/cpuid.c | 35 +++++++++++++++++++++++++++++++++++ - xen/common/kernel.c | 27 +++++++++++++++++++++++++++ - xen/include/xen/lib.h | 7 +++++++ - 4 files changed, 81 insertions(+) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 214012bd9e..2d95759568 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -471,6 +471,18 @@ choice of `dom0-kernel` is deprecated and not supported by all Dom0 kernels. - respectively. - * `verbose` option can be included as a string or also as `verbose=` - -+### cpuid (x86) -+> `= List of comma separated booleans` -+ -+This option allows for fine tuning of the facilities Xen will use, after -+accounting for hardware capabilities as enumerated via CPUID. -+ -+Currently accepted: -+ -+The Speculation Control hardware features `ibrsb`, `stibp`, `ibpb` are used by -+default if avaiable. They can be ignored, e.g. `no-ibrsb`, at which point Xen -+won't use them itself, and won't offer them to guests. -+ - ### cpuid\_mask\_cpu (AMD only) - > `= fam_0f_rev_c | fam_0f_rev_d | fam_0f_rev_e | fam_0f_rev_f | fam_0f_rev_g | fam_10_rev_b | fam_10_rev_c | fam_11_rev_b` - -diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c -index 5ee82d39d7..2ef71d218e 100644 ---- a/xen/arch/x86/cpuid.c -+++ b/xen/arch/x86/cpuid.c -@@ -18,6 +18,41 @@ static const uint32_t hvm_shadow_featuremask[] = INIT_HVM_SHADOW_FEATURES; - static const uint32_t hvm_hap_featuremask[] = INIT_HVM_HAP_FEATURES; - static const uint32_t deep_features[] = INIT_DEEP_FEATURES; - -+static int __init parse_xen_cpuid(const char *s) -+{ -+ const char *ss; -+ int val, rc = 0; -+ -+ do { -+ ss = strchr(s, ','); -+ if ( !ss ) -+ ss = strchr(s, '\0'); -+ -+ if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) -+ { -+ if ( !val ) -+ setup_clear_cpu_cap(X86_FEATURE_IBPB); -+ } -+ else if ( (val = parse_boolean("ibrsb", s, ss)) >= 0 ) -+ { -+ if ( !val ) -+ setup_clear_cpu_cap(X86_FEATURE_IBRSB); -+ } -+ else if ( (val = parse_boolean("stibp", s, ss)) >= 0 ) -+ { -+ if ( !val ) -+ setup_clear_cpu_cap(X86_FEATURE_STIBP); -+ } -+ else -+ rc = -EINVAL; -+ -+ s = ss + 1; -+ } while ( *ss ); -+ -+ return rc; -+} -+custom_param("cpuid", parse_xen_cpuid); -+ - #define EMPTY_LEAF ((struct cpuid_leaf){}) - static void zero_leaves(struct cpuid_leaf *l, - unsigned int first, unsigned int last) -diff --git a/xen/common/kernel.c b/xen/common/kernel.c -index 8d137c58fb..5766a0f784 100644 ---- a/xen/common/kernel.c -+++ b/xen/common/kernel.c -@@ -244,6 +244,33 @@ int parse_bool(const char *s, const char *e) - return -1; - } - -+int parse_boolean(const char *name, const char *s, const char *e) -+{ -+ size_t slen, nlen; -+ int val = !!strncmp(s, "no-", 3); -+ -+ if ( !val ) -+ s += 3; -+ -+ slen = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s); -+ nlen = strlen(name); -+ -+ /* Does s now start with name? */ -+ if ( slen < nlen || strncmp(s, name, nlen) ) -+ return -1; -+ -+ /* Exact, unadorned name? Result depends on the 'no-' prefix. */ -+ if ( slen == nlen ) -+ return val; -+ -+ /* =$SOMETHING? Defer to the regular boolean parsing. */ -+ if ( s[nlen] == '=' ) -+ return parse_bool(&s[nlen + 1], e); -+ -+ /* Unrecognised. Give up. */ -+ return -1; -+} -+ - unsigned int tainted; - - /** -diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h -index ed00ae1379..1d9771340c 100644 ---- a/xen/include/xen/lib.h -+++ b/xen/include/xen/lib.h -@@ -74,6 +74,13 @@ void cmdline_parse(const char *cmdline); - int runtime_parse(const char *line); - int parse_bool(const char *s, const char *e); - -+/** -+ * Given a specific name, parses a string of the form: -+ * [no-]$NAME[=...] -+ * returning 0 or 1 for a recognised boolean, or -1 for an error. -+ */ -+int parse_boolean(const char *name, const char *s, const char *e); -+ - /*#define DEBUG_TRACE_DUMP*/ - #ifdef DEBUG_TRACE_DUMP - extern void debugtrace_dump(void); --- -2.14.3 - - -From 641c11ef293c7f3a58c1856138835c06e09d6b07 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:47:41 +0100 -Subject: [PATCH 39/49] x86/cpuid: Handling of IBRS/IBPB, STIBP and IBRS for - guests - -Intel specifies IBRS/IBPB (combined, in a single bit) and STIBP as a separate -bit. AMD specifies IBPB alone in a 3rd bit. - -AMD's IBPB is a subset of Intel's combined IBRS/IBPB. For performance -reasons, administrators might wish to express "IBPB only" even on Intel -hardware, so we allow the AMD bit to be used for this purpose. - -The behaviour of STIBP is more complicated. - -It is our current understanding that STIBP will be advertised on HT-capable -hardware irrespective of whether HT is enabled, but not advertised on -HT-incapable hardware. However, for ease of virtualisation, STIBP's -functionality is ignored rather than reserved by microcode/hardware on -HT-incapable hardware. - -For guest safety, we treat STIBP as special, always override the toolstack -choice, and always advertise STIBP if IBRS is available. This removes the -corner case where STIBP is not advertised, but the guest is running on -HT-capable hardware where it does matter. - -Finally as a bugfix, update the libxc CPUID logic to understand the e8b -feature leaf, which has the side effect of also offering CLZERO to guests on -applicable hardware. - -Signed-off-by: Andrew Cooper -Acked-by: Wei Liu -Reviewed-by: Jan Beulich -master commit: d297b56682e730d598e2529cc6998151d3b6f6f8 -master date: 2018-01-26 14:10:21 +0000 ---- - tools/libxc/xc_cpuid_x86.c | 4 +++- - xen/arch/x86/cpuid.c | 28 ++++++++++++++++++++++++++++ - xen/include/public/arch-x86/cpufeatureset.h | 2 +- - 3 files changed, 32 insertions(+), 2 deletions(-) - -diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c -index 25b922ea21..9fa2f7c360 100644 ---- a/tools/libxc/xc_cpuid_x86.c -+++ b/tools/libxc/xc_cpuid_x86.c -@@ -465,7 +465,9 @@ static void xc_cpuid_hvm_policy(xc_interface *xch, - - case 0x80000008: - regs[0] &= 0x0000ffffu; -- regs[1] = regs[3] = 0; -+ regs[1] = info->featureset[featureword_of(X86_FEATURE_CLZERO)]; -+ /* regs[2] handled in the per-vendor logic. */ -+ regs[3] = 0; - break; - - case 0x00000002: /* Intel cache info (dumped by AMD policy) */ -diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c -index 2ef71d218e..b3c9ac6c48 100644 ---- a/xen/arch/x86/cpuid.c -+++ b/xen/arch/x86/cpuid.c -@@ -383,6 +383,16 @@ static void __init calculate_pv_max_policy(void) - /* Unconditionally claim to be able to set the hypervisor bit. */ - __set_bit(X86_FEATURE_HYPERVISOR, pv_featureset); - -+ /* On hardware with IBRS/IBPB support, there are further adjustments. */ -+ if ( test_bit(X86_FEATURE_IBRSB, pv_featureset) ) -+ { -+ /* Offer STIBP unconditionally. It is a nop on non-HT hardware. */ -+ __set_bit(X86_FEATURE_STIBP, pv_featureset); -+ -+ /* AMD's IBPB is a subset of IBRS/IBPB. */ -+ __set_bit(X86_FEATURE_IBPB, pv_featureset); -+ } -+ - sanitise_featureset(pv_featureset); - cpuid_featureset_to_policy(pv_featureset, p); - recalculate_xstate(p); -@@ -440,6 +450,16 @@ static void __init calculate_hvm_max_policy(void) - __clear_bit(X86_FEATURE_XSAVES, hvm_featureset); - } - -+ /* On hardware with IBRS/IBPB support, there are further adjustments. */ -+ if ( test_bit(X86_FEATURE_IBRSB, hvm_featureset) ) -+ { -+ /* Offer STIBP unconditionally. It is a nop on non-HT hardware. */ -+ __set_bit(X86_FEATURE_STIBP, hvm_featureset); -+ -+ /* AMD's IBPB is a subset of IBRS/IBPB. */ -+ __set_bit(X86_FEATURE_IBPB, hvm_featureset); -+ } -+ - sanitise_featureset(hvm_featureset); - cpuid_featureset_to_policy(hvm_featureset, p); - recalculate_xstate(p); -@@ -581,6 +601,14 @@ void recalculate_cpuid_policy(struct domain *d) - recalculate_xstate(p); - recalculate_misc(p); - -+ /* -+ * Override STIBP to match IBRS. Guests can safely use STIBP -+ * functionality on non-HT hardware, but can't necesserily protect -+ * themselves from SP2/Spectre/Branch Target Injection if STIBP is hidden -+ * on HT-capable hardware. -+ */ -+ p->feat.stibp = p->feat.ibrsb; -+ - for ( i = 0; i < ARRAY_SIZE(p->cache.raw); ++i ) - { - if ( p->cache.subleaf[i].type >= 1 && -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index e148755a66..0f21fed161 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -243,7 +243,7 @@ XEN_CPUFEATURE(IBPB, 8*32+12) /* IBPB support only (no IBRS, used by - XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ - XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ - XEN_CPUFEATURE(IBRSB, 9*32+26) /* IBRS and IBPB support (used by Intel) */ --XEN_CPUFEATURE(STIBP, 9*32+27) /* STIBP */ -+XEN_CPUFEATURE(STIBP, 9*32+27) /*! STIBP */ - - #endif /* XEN_CPUFEATURE */ - --- -2.14.3 - - -From 79891ef9442acb998f354b969e7302d81245ab0b Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:48:22 +0100 -Subject: [PATCH 40/49] x86/msr: Emulation of MSR_{SPEC_CTRL,PRED_CMD} for - guests - -As per the spec currently available here: - -https://software.intel.com/sites/default/files/managed/c5/63/336996-Speculative-Execution-Side-Channel-Mitigations.pdf - -MSR_ARCH_CAPABILITIES will only come into existence on new hardware, but is -implemented as a straight #GP for now to avoid being leaky when new hardware -arrives. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: ea58a679a6190e714a592f1369b660769a48a80c -master date: 2018-01-26 14:10:21 +0000 ---- - xen/arch/x86/msr.c | 45 +++++++++++++++++++++++++++++++++++++++++ - xen/include/asm-x86/msr-index.h | 2 ++ - xen/include/asm-x86/msr.h | 10 +++++++++ - 3 files changed, 57 insertions(+) - -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index 187f8623a5..7875d9c1e0 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -120,11 +120,22 @@ int init_vcpu_msr_policy(struct vcpu *v) - - int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) - { -+ const struct cpuid_policy *cp = v->domain->arch.cpuid; - const struct msr_domain_policy *dp = v->domain->arch.msr; - const struct msr_vcpu_policy *vp = v->arch.msr; - - switch ( msr ) - { -+ case MSR_PRED_CMD: -+ /* Write-only */ -+ goto gp_fault; -+ -+ case MSR_SPEC_CTRL: -+ if ( !cp->feat.ibrsb ) -+ goto gp_fault; -+ *val = vp->spec_ctrl.raw; -+ break; -+ - case MSR_INTEL_PLATFORM_INFO: - if ( !dp->plaform_info.available ) - goto gp_fault; -@@ -132,6 +143,10 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) - _MSR_PLATFORM_INFO_CPUID_FAULTING; - break; - -+ case MSR_ARCH_CAPABILITIES: -+ /* Not implemented yet. */ -+ goto gp_fault; -+ - case MSR_INTEL_MISC_FEATURES_ENABLES: - if ( !vp->misc_features_enables.available ) - goto gp_fault; -@@ -153,14 +168,44 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) - { - const struct vcpu *curr = current; - struct domain *d = v->domain; -+ const struct cpuid_policy *cp = d->arch.cpuid; - struct msr_domain_policy *dp = d->arch.msr; - struct msr_vcpu_policy *vp = v->arch.msr; - - switch ( msr ) - { - case MSR_INTEL_PLATFORM_INFO: -+ case MSR_ARCH_CAPABILITIES: -+ /* Read-only */ - goto gp_fault; - -+ case MSR_SPEC_CTRL: -+ if ( !cp->feat.ibrsb ) -+ goto gp_fault; /* MSR available? */ -+ -+ /* -+ * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored) -+ * when STIBP isn't enumerated in hardware. -+ */ -+ -+ if ( val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) ) -+ goto gp_fault; /* Rsvd bit set? */ -+ -+ vp->spec_ctrl.raw = val; -+ break; -+ -+ case MSR_PRED_CMD: -+ if ( !cp->feat.ibrsb && !cp->extd.ibpb ) -+ goto gp_fault; /* MSR available? */ -+ -+ /* -+ * The only defined behaviour is when writing PRED_CMD_IBPB. In -+ * practice, real hardware accepts any value without faulting. -+ */ -+ if ( v == curr && (val & PRED_CMD_IBPB) ) -+ wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB); -+ break; -+ - case MSR_INTEL_MISC_FEATURES_ENABLES: - { - uint64_t rsvd = ~0ull; -diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h -index 11c43fa83e..9a5bdcbd8b 100644 ---- a/xen/include/asm-x86/msr-index.h -+++ b/xen/include/asm-x86/msr-index.h -@@ -39,6 +39,8 @@ - #define MSR_PRED_CMD 0x00000049 - #define PRED_CMD_IBPB (_AC(1, ULL) << 0) - -+#define MSR_ARCH_CAPABILITIES 0x0000010a -+ - /* Intel MSRs. Some also available on other CPUs */ - #define MSR_IA32_PERFCTR0 0x000000c1 - #define MSR_IA32_A_PERFCTR0 0x000004c1 -diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h -index 41732a4c93..20ba47e905 100644 ---- a/xen/include/asm-x86/msr.h -+++ b/xen/include/asm-x86/msr.h -@@ -215,6 +215,16 @@ struct msr_domain_policy - /* MSR policy object for per-vCPU MSRs */ - struct msr_vcpu_policy - { -+ /* 0x00000048 - MSR_SPEC_CTRL */ -+ struct { -+ /* -+ * Only the bottom two bits are defined, so no need to waste space -+ * with uint64_t at the moment, but use uint32_t for the convenience -+ * of the assembly code. -+ */ -+ uint32_t raw; -+ } spec_ctrl; -+ - /* 0x00000140 MSR_INTEL_MISC_FEATURES_ENABLES */ - struct { - bool available; /* This MSR is non-architectural */ --- -2.14.3 - - -From 8baba874d6c76c1d6dd69b1d9aa06abdc344a1f5 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:49:02 +0100 -Subject: [PATCH 41/49] x86/migrate: Move MSR_SPEC_CTRL on migrate - -Signed-off-by: Andrew Cooper -Reviewed-by: Wei Liu -Reviewed-by: Jan Beulich -master commit: 0cf2a4eb769302b7d7d7835540e7b2f15006df30 -master date: 2018-01-26 14:10:21 +0000 ---- - xen/arch/x86/domctl.c | 2 ++ - xen/arch/x86/hvm/hvm.c | 2 ++ - 2 files changed, 4 insertions(+) - -diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c -index e61201267b..c8446ac0d3 100644 ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -1290,6 +1290,7 @@ long arch_do_domctl( - struct xen_domctl_vcpu_msr msr; - struct vcpu *v; - static const uint32_t msrs_to_send[] = { -+ MSR_SPEC_CTRL, - MSR_INTEL_MISC_FEATURES_ENABLES, - }; - uint32_t nr_msrs = ARRAY_SIZE(msrs_to_send); -@@ -1416,6 +1417,7 @@ long arch_do_domctl( - - switch ( msr.index ) - { -+ case MSR_SPEC_CTRL: - case MSR_INTEL_MISC_FEATURES_ENABLES: - if ( guest_wrmsr(v, msr.index, msr.value) != X86EMUL_OKAY ) - break; -diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c -index 2a3dd4ee91..2e212f6f80 100644 ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -1323,6 +1323,7 @@ static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h) - - #define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) - static const uint32_t msrs_to_send[] = { -+ MSR_SPEC_CTRL, - MSR_INTEL_MISC_FEATURES_ENABLES, - }; - static unsigned int __read_mostly msr_count_max = ARRAY_SIZE(msrs_to_send); -@@ -1458,6 +1459,7 @@ static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t *h) - { - int rc; - -+ case MSR_SPEC_CTRL: - case MSR_INTEL_MISC_FEATURES_ENABLES: - rc = guest_wrmsr(v, ctxt->msr[i].index, ctxt->msr[i].val); - --- -2.14.3 - - -From 92efbe865813d84873a0e7262b1fa414842306b6 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:49:32 +0100 -Subject: [PATCH 42/49] x86/hvm: Permit guests direct access to - MSR_{SPEC_CTRL,PRED_CMD} - -For performance reasons, HVM guests should have direct access to these MSRs -when possible. - -Signed-off-by: Andrew Cooper -Reviewed-by: Boris Ostrovsky -Reviewed-by: Jan Beulich -Reviewed-by: Kevin Tian -master commit: 5a2fe171144ebcc908ea1fca45058d6010f6a286 -master date: 2018-01-26 14:10:21 +0000 ---- - xen/arch/x86/domctl.c | 19 +++++++++++++++++++ - xen/arch/x86/hvm/svm/svm.c | 5 +++++ - xen/arch/x86/hvm/vmx/vmx.c | 17 +++++++++++++++++ - 3 files changed, 41 insertions(+) - -diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c -index c8446ac0d3..2e3b6275e0 100644 ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -53,6 +53,7 @@ static int update_domain_cpuid_info(struct domain *d, - struct cpuid_policy *p = d->arch.cpuid; - const struct cpuid_leaf leaf = { ctl->eax, ctl->ebx, ctl->ecx, ctl->edx }; - int old_vendor = p->x86_vendor; -+ unsigned int old_7d0 = p->feat.raw[0].d, old_e8b = p->extd.raw[8].b; - bool call_policy_changed = false; /* Avoid for_each_vcpu() unnecessarily */ - - /* -@@ -218,6 +219,14 @@ static int update_domain_cpuid_info(struct domain *d, - - d->arch.pv_domain.cpuidmasks->_7ab0 = mask; - } -+ -+ /* -+ * If the IBRS/IBPB policy has changed, we need to recalculate the MSR -+ * interception bitmaps. -+ */ -+ call_policy_changed = (is_hvm_domain(d) && -+ ((old_7d0 ^ p->feat.raw[0].d) & -+ cpufeat_mask(X86_FEATURE_IBRSB))); - break; - - case 0xa: -@@ -292,6 +301,16 @@ static int update_domain_cpuid_info(struct domain *d, - d->arch.pv_domain.cpuidmasks->e1cd = mask; - } - break; -+ -+ case 0x80000008: -+ /* -+ * If the IBPB policy has changed, we need to recalculate the MSR -+ * interception bitmaps. -+ */ -+ call_policy_changed = (is_hvm_domain(d) && -+ ((old_e8b ^ p->extd.raw[8].b) & -+ cpufeat_mask(X86_FEATURE_IBPB))); -+ break; - } - - if ( call_policy_changed ) -diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c -index 975a2d80cb..e978268fb5 100644 ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -617,6 +617,7 @@ static void svm_cpuid_policy_changed(struct vcpu *v) - { - struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; - struct vmcb_struct *vmcb = arch_svm->vmcb; -+ const struct cpuid_policy *cp = v->domain->arch.cpuid; - u32 bitmap = vmcb_get_exception_intercepts(vmcb); - - if ( opt_hvm_fep || -@@ -626,6 +627,10 @@ static void svm_cpuid_policy_changed(struct vcpu *v) - bitmap &= ~(1U << TRAP_invalid_op); - - vmcb_set_exception_intercepts(vmcb, bitmap); -+ -+ /* Give access to MSR_PRED_CMD if the guest has been told about it. */ -+ svm_intercept_msr(v, MSR_PRED_CMD, -+ cp->extd.ibpb ? MSR_INTERCEPT_NONE : MSR_INTERCEPT_RW); - } - - static void svm_sync_vmcb(struct vcpu *v) -diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c -index 4221fb8c56..9416ad5df2 100644 ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -656,6 +656,8 @@ void vmx_update_exception_bitmap(struct vcpu *v) - - static void vmx_cpuid_policy_changed(struct vcpu *v) - { -+ const struct cpuid_policy *cp = v->domain->arch.cpuid; -+ - if ( opt_hvm_fep || - (v->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor) ) - v->arch.hvm_vmx.exception_bitmap |= (1U << TRAP_invalid_op); -@@ -665,6 +667,21 @@ static void vmx_cpuid_policy_changed(struct vcpu *v) - vmx_vmcs_enter(v); - vmx_update_exception_bitmap(v); - vmx_vmcs_exit(v); -+ -+ /* -+ * We can safely pass MSR_SPEC_CTRL through to the guest, even if STIBP -+ * isn't enumerated in hardware, as SPEC_CTRL_STIBP is ignored. -+ */ -+ if ( cp->feat.ibrsb ) -+ vmx_clear_msr_intercept(v, MSR_SPEC_CTRL, VMX_MSR_RW); -+ else -+ vmx_set_msr_intercept(v, MSR_SPEC_CTRL, VMX_MSR_RW); -+ -+ /* MSR_PRED_CMD is safe to pass through if the guest knows about it. */ -+ if ( cp->feat.ibrsb || cp->extd.ibpb ) -+ vmx_clear_msr_intercept(v, MSR_PRED_CMD, VMX_MSR_RW); -+ else -+ vmx_set_msr_intercept(v, MSR_PRED_CMD, VMX_MSR_RW); - } - - int vmx_guest_x86_mode(struct vcpu *v) --- -2.14.3 - - -From a695f8dce7c3f137f61c8c8a880b24b1b4cf319c Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:50:40 +0100 -Subject: [PATCH 43/49] x86/entry: Organise the use of MSR_SPEC_CTRL at each - entry/exit point - -We need to be able to either set or clear IBRS in Xen context, as well as -restore appropriate guest values in guest context. See the documentation in -asm-x86/spec_ctrl_asm.h for details. - -With the contemporary microcode, writes to %cr3 are slower when SPEC_CTRL.IBRS -is set. Therefore, the positioning of SPEC_CTRL_{ENTRY/EXIT}* is important. - -Ideally, the IBRS_SET/IBRS_CLEAR hunks might be positioned either side of the -%cr3 change, but that is rather more complicated to arrange, and could still -result in a guest controlled value in SPEC_CTRL during the %cr3 change, -negating the saving if the guest chose to have IBRS set. - -Therefore, we optimise for the pre-Skylake case (being far more common in the -field than Skylake and later, at the moment), where we have a Xen-preferred -value of IBRS clear when switching %cr3. - -There is a semi-unrelated bugfix, where various asm_defn.h macros have a -hidden dependency on PAGE_SIZE, which results in an assembler error if used in -a .macro definition. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: 5e7962901131186d3514528ed57c7a9901a15a3e -master date: 2018-01-26 14:10:21 +0000 ---- - xen/arch/x86/hvm/svm/entry.S | 11 +- - xen/arch/x86/hvm/vmx/entry.S | 19 +++ - xen/arch/x86/setup.c | 1 + - xen/arch/x86/smpboot.c | 2 + - xen/arch/x86/x86_64/asm-offsets.c | 6 + - xen/arch/x86/x86_64/compat/entry.S | 14 +++ - xen/arch/x86/x86_64/entry.S | 48 +++++++- - xen/include/asm-x86/asm_defns.h | 3 + - xen/include/asm-x86/current.h | 6 + - xen/include/asm-x86/nops.h | 6 + - xen/include/asm-x86/spec_ctrl.h | 9 ++ - xen/include/asm-x86/spec_ctrl_asm.h | 225 ++++++++++++++++++++++++++++++++++++ - 12 files changed, 344 insertions(+), 6 deletions(-) - create mode 100644 xen/include/asm-x86/spec_ctrl_asm.h - -diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S -index df86da0a81..bf092fe071 100644 ---- a/xen/arch/x86/hvm/svm/entry.S -+++ b/xen/arch/x86/hvm/svm/entry.S -@@ -79,6 +79,12 @@ UNLIKELY_END(svm_trace) - or $X86_EFLAGS_MBS,%rax - mov %rax,VMCB_rflags(%rcx) - -+ mov VCPU_arch_msr(%rbx), %rax -+ mov VCPUMSR_spec_ctrl_raw(%rax), %eax -+ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -+ SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ -+ - pop %r15 - pop %r14 - pop %r13 -@@ -101,8 +107,11 @@ UNLIKELY_END(svm_trace) - SAVE_ALL - - GET_CURRENT(bx) -- mov VCPU_svm_vmcb(%rbx),%rcx - -+ SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ -+ -+ mov VCPU_svm_vmcb(%rbx),%rcx - movb $0,VCPU_svm_vmcb_in_sync(%rbx) - mov VMCB_rax(%rcx),%rax - mov %rax,UREGS_rax(%rsp) -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index b2f98be7f5..e750544b4b 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -38,6 +38,9 @@ ENTRY(vmx_asm_vmexit_handler) - movb $1,VCPU_vmx_launched(%rbx) - mov %rax,VCPU_hvm_guest_cr2(%rbx) - -+ SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ -+ - mov %rsp,%rdi - call vmx_vmexit_handler - -@@ -68,6 +71,13 @@ UNLIKELY_END(realmode) - call vmx_vmenter_helper - test %al, %al - jz .Lvmx_vmentry_restart -+ -+ mov VCPU_arch_msr(%rbx), %rax -+ mov VCPUMSR_spec_ctrl_raw(%rax), %eax -+ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -+ SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ -+ - mov VCPU_hvm_guest_cr2(%rbx),%rax - - pop %r15 -@@ -99,6 +109,15 @@ UNLIKELY_END(realmode) - .Lvmx_vmentry_fail: - sti - SAVE_ALL -+ -+ /* -+ * PV variant needed here as no guest code has executed (so -+ * MSR_SPEC_CTRL can't have changed value), and NMIs/MCEs are liable -+ * to hit (in which case the HVM variant might corrupt things). -+ */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo Clob: acd */ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ -+ - call vmx_vmentry_failure - BUG /* vmx_vmentry_failure() shouldn't return. */ - -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index 470427bc64..b2aa281d72 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -668,6 +668,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) - set_processor_id(0); - set_current(INVALID_VCPU); /* debug sanity. */ - idle_vcpu[0] = current; -+ init_shadow_spec_ctrl_state(); - - percpu_init_areas(); - -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index edf607f5a2..005287c65c 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -40,6 +40,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -308,6 +309,7 @@ void start_secondary(void *unused) - set_current(idle_vcpu[cpu]); - this_cpu(curr_vcpu) = idle_vcpu[cpu]; - rdmsrl(MSR_EFER, this_cpu(efer)); -+ init_shadow_spec_ctrl_state(); - - /* - * Just as during early bootstrap, it is convenient here to disable -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index b1a4310974..17f1d77320 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -88,6 +88,7 @@ void __dummy__(void) - OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss); - OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl); - OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags); -+ OFFSET(VCPU_arch_msr, struct vcpu, arch.msr); - OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending); - OFFSET(VCPU_mce_pending, struct vcpu, mce_pending); - OFFSET(VCPU_nmi_old_mask, struct vcpu, nmi_state.old_mask); -@@ -139,6 +140,8 @@ void __dummy__(void) - OFFSET(CPUINFO_cr4, struct cpu_info, cr4); - OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3); - OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3); -+ OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl); -+ OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl); - DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); - BLANK(); - -@@ -154,6 +157,9 @@ void __dummy__(void) - OFFSET(TRAPBOUNCE_eip, struct trap_bounce, eip); - BLANK(); - -+ OFFSET(VCPUMSR_spec_ctrl_raw, struct msr_vcpu_policy, spec_ctrl.raw); -+ BLANK(); -+ - #ifdef CONFIG_PERF_COUNTERS - DEFINE(ASM_PERFC_exceptions, PERFC_exceptions); - BLANK(); -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index e668f00c36..4190c733a3 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -18,6 +18,10 @@ ENTRY(entry_int82) - pushq $0 - movl $HYPERCALL_VECTOR, 4(%rsp) - SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ -+ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ -+ - CR4_PV32_RESTORE - - GET_CURRENT(bx) -@@ -142,6 +146,13 @@ ENTRY(compat_restore_all_guest) - .popsection - or $X86_EFLAGS_IF,%r11 - mov %r11d,UREGS_eflags(%rsp) -+ -+ mov VCPU_arch_msr(%rbx), %rax -+ mov VCPUMSR_spec_ctrl_raw(%rax), %eax -+ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -+ SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ -+ - RESTORE_ALL adj=8 compat=1 - .Lft0: iretq - _ASM_PRE_EXTABLE(.Lft0, handle_exception) -@@ -200,6 +211,9 @@ ENTRY(cstar_enter) - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL - -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ -+ - GET_STACK_END(bx) - mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx - neg %rcx -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 710c0616ba..73bd7ca2ad 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -38,6 +38,10 @@ ENTRY(switch_to_kernel) - restore_all_guest: - ASSERT_INTERRUPTS_DISABLED - -+ /* Stash guest SPEC_CTRL value while we can read struct vcpu. */ -+ mov VCPU_arch_msr(%rbx), %rdx -+ mov VCPUMSR_spec_ctrl_raw(%rdx), %r15d -+ - /* Copy guest mappings and switch to per-CPU root page table. */ - mov %cr3, %r9 - GET_STACK_END(dx) -@@ -65,6 +69,12 @@ restore_all_guest: - write_cr3 rax, rdi, rsi - .Lrag_keep_cr3: - -+ /* Restore stashed SPEC_CTRL value. */ -+ mov %r15d, %eax -+ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -+ SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ -+ - RESTORE_ALL - testw $TRAP_syscall,4(%rsp) - jz iret_exit_to_guest -@@ -103,9 +113,9 @@ restore_all_xen: - * Check whether we need to switch to the per-CPU page tables, in - * case we return to late PV exit code (from an NMI or #MC). - */ -- GET_STACK_END(ax) -- mov STACK_CPUINFO_FIELD(xen_cr3)(%rax), %rdx -- mov STACK_CPUINFO_FIELD(pv_cr3)(%rax), %rax -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rdx -+ mov STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax - test %rdx, %rdx - /* - * Ideally the condition would be "nsz", but such doesn't exist, -@@ -115,6 +125,9 @@ UNLIKELY_START(g, exit_cr3) - write_cr3 rax, rdi, rsi - UNLIKELY_END(exit_cr3) - -+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -+ SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */ -+ - RESTORE_ALL adj=8 - iretq - -@@ -145,6 +158,9 @@ ENTRY(lstar_enter) - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL - -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ -+ - GET_STACK_END(bx) - mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx - neg %rcx -@@ -248,6 +264,9 @@ GLOBAL(sysenter_eflags_saved) - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL - -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ -+ - GET_STACK_END(bx) - mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx - neg %rcx -@@ -294,6 +313,9 @@ ENTRY(int80_direct_trap) - movl $0x80, 4(%rsp) - SAVE_ALL - -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ -+ - GET_STACK_END(bx) - mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx - neg %rcx -@@ -469,6 +491,10 @@ ENTRY(common_interrupt) - SAVE_ALL CLAC - - GET_STACK_END(14) -+ -+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ -+ - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx - mov %rcx, %r15 - neg %rcx -@@ -507,6 +533,10 @@ GLOBAL(handle_exception) - SAVE_ALL CLAC - - GET_STACK_END(14) -+ -+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ -+ - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx - mov %rcx, %r15 - neg %rcx -@@ -700,8 +730,12 @@ ENTRY(double_fault) - /* Set AC to reduce chance of further SMAP faults */ - SAVE_ALL STAC - -- GET_STACK_END(bx) -- mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rbx -+ GET_STACK_END(14) -+ -+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ -+ -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx - test %rbx, %rbx - jz .Ldblf_cr3_okay - jns .Ldblf_cr3_load -@@ -730,6 +764,10 @@ handle_ist_exception: - SAVE_ALL CLAC - - GET_STACK_END(14) -+ -+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ -+ - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx - mov %rcx, %r15 - neg %rcx -diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h -index 61b6d35bff..ba96b0e823 100644 ---- a/xen/include/asm-x86/asm_defns.h -+++ b/xen/include/asm-x86/asm_defns.h -@@ -7,6 +7,7 @@ - #include - #endif - #include -+#include - #include - #include - #include -@@ -374,4 +375,6 @@ static always_inline void stac(void) - 4: .p2align 2 ; \ - .popsection - -+#include -+ - #endif /* __X86_ASM_DEFNS_H__ */ -diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h -index b929c48c85..1009d05e24 100644 ---- a/xen/include/asm-x86/current.h -+++ b/xen/include/asm-x86/current.h -@@ -53,6 +53,12 @@ struct cpu_info { - */ - unsigned long xen_cr3; - unsigned long pv_cr3; -+ -+ /* See asm-x86/spec_ctrl_asm.h for usage. */ -+ unsigned int shadow_spec_ctrl; -+ bool use_shadow_spec_ctrl; -+ -+ unsigned long __pad; - /* get_stack_bottom() must be 16-byte aligned */ - }; - -diff --git a/xen/include/asm-x86/nops.h b/xen/include/asm-x86/nops.h -index 9a6a4e1164..18cb718bac 100644 ---- a/xen/include/asm-x86/nops.h -+++ b/xen/include/asm-x86/nops.h -@@ -61,6 +61,12 @@ - #define ASM_NOP7 _ASM_MK_NOP(K8_NOP7) - #define ASM_NOP8 _ASM_MK_NOP(K8_NOP8) - -+#define ASM_NOP17 ASM_NOP8; ASM_NOP7; ASM_NOP2 -+#define ASM_NOP21 ASM_NOP8; ASM_NOP8; ASM_NOP5 -+#define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8 -+#define ASM_NOP29 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP5 -+#define ASM_NOP32 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8 -+ - #define ASM_NOP_MAX 8 - - #endif /* __X86_ASM_NOPS_H__ */ -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index e088a551da..b451250282 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -20,8 +20,17 @@ - #ifndef __X86_SPEC_CTRL_H__ - #define __X86_SPEC_CTRL_H__ - -+#include -+ - void init_speculation_mitigations(void); - -+static inline void init_shadow_spec_ctrl_state(void) -+{ -+ struct cpu_info *info = get_cpu_info(); -+ -+ info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0; -+} -+ - #endif /* !__X86_SPEC_CTRL_H__ */ - - /* -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -new file mode 100644 -index 0000000000..ba5557436d ---- /dev/null -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -0,0 +1,225 @@ -+/****************************************************************************** -+ * include/asm-x86/spec_ctrl.h -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; If not, see . -+ * -+ * Copyright (c) 2017-2018 Citrix Systems Ltd. -+ */ -+ -+#ifndef __X86_SPEC_CTRL_ASM_H__ -+#define __X86_SPEC_CTRL_ASM_H__ -+ -+#ifdef __ASSEMBLY__ -+#include -+ -+/* -+ * Saving and restoring MSR_SPEC_CTRL state is a little tricky. -+ * -+ * We want the guests choice of SPEC_CTRL while in guest context, and Xen's -+ * choice (set or clear, depending on the hardware) while running in Xen -+ * context. Therefore, a simplistic algorithm is: -+ * -+ * - Set/clear IBRS on entry to Xen -+ * - Set the guests' choice on exit to guest -+ * - Leave SPEC_CTRL unchanged on exit to xen -+ * -+ * There are two complicating factors: -+ * 1) HVM guests can have direct access to the MSR, so it can change -+ * behind Xen's back. -+ * 2) An NMI or MCE can interrupt at any point, including early in the entry -+ * path, or late in the exit path after restoring the guest value. This -+ * will corrupt the guest value. -+ * -+ * Factor 1 is dealt with by relying on NMIs/MCEs being blocked immediately -+ * after VMEXIT. The VMEXIT-specific code reads MSR_SPEC_CTRL and updates -+ * current before loading Xen's MSR_SPEC_CTRL setting. -+ * -+ * Factor 2 is harder. We maintain a shadow_spec_ctrl value, and -+ * use_shadow_spec_ctrl boolean per cpu. The synchronous use is: -+ * -+ * 1) Store guest value in shadow_spec_ctrl -+ * 2) Set use_shadow_spec_ctrl boolean -+ * 3) Load guest value into MSR_SPEC_CTRL -+ * 4) Exit to guest -+ * 5) Entry from guest -+ * 6) Clear use_shadow_spec_ctrl boolean -+ * 7) Load Xen's value into MSR_SPEC_CTRL -+ * -+ * The asynchronous use for interrupts/exceptions is: -+ * - Set/clear IBRS on entry to Xen -+ * - On exit to Xen, check use_shadow_spec_ctrl -+ * - If set, load shadow_spec_ctrl -+ * -+ * Therefore, an interrupt/exception which hits the synchronous path between -+ * steps 2 and 6 will restore the shadow value rather than leaving Xen's value -+ * loaded and corrupting the value used in guest context. -+ * -+ * The following ASM fragments implement this algorithm. See their local -+ * comments for further details. -+ * - SPEC_CTRL_ENTRY_FROM_VMEXIT -+ * - SPEC_CTRL_ENTRY_FROM_PV -+ * - SPEC_CTRL_ENTRY_FROM_INTR -+ * - SPEC_CTRL_EXIT_TO_XEN -+ * - SPEC_CTRL_EXIT_TO_GUEST -+ */ -+ -+.macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req -+/* -+ * Requires %rbx=current, %rsp=regs/cpuinfo -+ * Clobbers %rax, %rcx, %rdx -+ * -+ * The common case is that a guest has direct access to MSR_SPEC_CTRL, at -+ * which point we need to save the guest value before setting IBRS for Xen. -+ * Unilaterally saving the guest value is shorter and faster than checking. -+ */ -+ mov $MSR_SPEC_CTRL, %ecx -+ rdmsr -+ -+ /* Stash the value from hardware. */ -+ mov VCPU_arch_msr(%rbx), %rdx -+ mov %eax, VCPUMSR_spec_ctrl_raw(%rdx) -+ xor %edx, %edx -+ -+ /* Clear SPEC_CTRL shadowing *before* loading Xen's value. */ -+ movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp) -+ -+ /* Load Xen's intended value. */ -+ mov $\ibrs_val, %eax -+ wrmsr -+.endm -+ -+.macro DO_SPEC_CTRL_ENTRY maybexen:req ibrs_val:req -+/* -+ * Requires %rsp=regs (also cpuinfo if !maybexen) -+ * Requires %r14=stack_end (if maybexen) -+ * Clobbers %rax, %rcx, %rdx -+ * -+ * PV guests can't update MSR_SPEC_CTRL behind Xen's back, so no need to read -+ * it back. Entries from guest context need to clear SPEC_CTRL shadowing, -+ * while entries from Xen must leave shadowing in its current state. -+ */ -+ mov $MSR_SPEC_CTRL, %ecx -+ xor %edx, %edx -+ -+ /* -+ * Clear SPEC_CTRL shadowing *before* loading Xen's value. If entering -+ * from a possibly-xen context, %rsp doesn't necessarily alias the cpuinfo -+ * block so calculate the position directly. -+ */ -+ .if \maybexen -+ /* Branchless `if ( !xen ) clear_shadowing` */ -+ testb $3, UREGS_cs(%rsp) -+ setz %al -+ and %al, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14) -+ .else -+ movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp) -+ .endif -+ -+ /* Load Xen's intended value. */ -+ mov $\ibrs_val, %eax -+ wrmsr -+.endm -+ -+.macro DO_SPEC_CTRL_EXIT_TO_XEN -+/* -+ * Requires %rbx=stack_end -+ * Clobbers %rax, %rcx, %rdx -+ * -+ * When returning to Xen context, look to see whether SPEC_CTRL shadowing is -+ * in effect, and reload the shadow value. This covers race conditions which -+ * exist with an NMI/MCE/etc hitting late in the return-to-guest path. -+ */ -+ xor %edx, %edx -+ -+ cmpb %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%rbx) -+ je .L\@_skip -+ -+ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax -+ mov $MSR_SPEC_CTRL, %ecx -+ wrmsr -+ -+.L\@_skip: -+.endm -+ -+.macro DO_SPEC_CTRL_EXIT_TO_GUEST -+/* -+ * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo -+ * Clobbers %rcx, %rdx -+ * -+ * When returning to guest context, set up SPEC_CTRL shadowing and load the -+ * guest value. -+ */ -+ /* Set up shadow value *before* enabling shadowing. */ -+ mov %eax, CPUINFO_shadow_spec_ctrl(%rsp) -+ -+ /* Set SPEC_CTRL shadowing *before* loading the guest value. */ -+ movb $1, CPUINFO_use_shadow_spec_ctrl(%rsp) -+ -+ mov $MSR_SPEC_CTRL, %ecx -+ xor %edx, %edx -+ wrmsr -+.endm -+ -+/* Use after a VMEXIT from an HVM guest. */ -+#define SPEC_CTRL_ENTRY_FROM_VMEXIT \ -+ ALTERNATIVE_2 __stringify(ASM_NOP32), \ -+ __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \ -+ ibrs_val=SPEC_CTRL_IBRS), \ -+ X86_FEATURE_XEN_IBRS_SET, \ -+ __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \ -+ ibrs_val=0), \ -+ X86_FEATURE_XEN_IBRS_CLEAR -+ -+/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ -+#define SPEC_CTRL_ENTRY_FROM_PV \ -+ ALTERNATIVE_2 __stringify(ASM_NOP21), \ -+ __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 \ -+ ibrs_val=SPEC_CTRL_IBRS), \ -+ X86_FEATURE_XEN_IBRS_SET, \ -+ __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 ibrs_val=0), \ -+ X86_FEATURE_XEN_IBRS_CLEAR -+ -+/* Use in interrupt/exception context. May interrupt Xen or PV context. */ -+#define SPEC_CTRL_ENTRY_FROM_INTR \ -+ ALTERNATIVE_2 __stringify(ASM_NOP29), \ -+ __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 \ -+ ibrs_val=SPEC_CTRL_IBRS), \ -+ X86_FEATURE_XEN_IBRS_SET, \ -+ __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 ibrs_val=0), \ -+ X86_FEATURE_XEN_IBRS_CLEAR -+ -+/* Use when exiting to Xen context. */ -+#define SPEC_CTRL_EXIT_TO_XEN \ -+ ALTERNATIVE_2 __stringify(ASM_NOP17), \ -+ DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_SET, \ -+ DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_CLEAR -+ -+/* Use when exiting to guest context. */ -+#define SPEC_CTRL_EXIT_TO_GUEST \ -+ ALTERNATIVE_2 __stringify(ASM_NOP24), \ -+ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_SET, \ -+ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_CLEAR -+ -+#endif /* __ASSEMBLY__ */ -+#endif /* !__X86_SPEC_CTRL_ASM_H__ */ -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-file-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ --- -2.14.3 - - -From ce7d7c01685569d9ff1f971c0f0622573bfe8bf3 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:51:38 +0100 -Subject: [PATCH 44/49] x86/entry: Organise the clobbering of the RSB/RAS on - entry to Xen - -ret instructions are speculated directly to values recorded in the Return -Stack Buffer/Return Address Stack, as there is no uncertainty in well-formed -code. Guests can take advantage of this in two ways: - - 1) If they can find a path in Xen which executes more ret instructions than - call instructions. (At least one in the waitqueue infrastructure, - probably others.) - - 2) Use the fact that the RSB/RAS in hardware is actually a circular stack - without a concept of empty. (When it logically empties, stale values - will start being used.) - -To mitigate, overwrite the RSB on entry to Xen with gadgets which will capture -and contain rogue speculation. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: e6c0128e9ab25bf66df11377a33ee5584d7f99e3 -master date: 2018-01-26 14:10:21 +0000 ---- - xen/include/asm-x86/cpufeatures.h | 2 ++ - xen/include/asm-x86/nops.h | 1 + - xen/include/asm-x86/spec_ctrl_asm.h | 44 +++++++++++++++++++++++++++++++++++++ - 3 files changed, 47 insertions(+) - -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index dd2388f393..b5dae12bfa 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -28,3 +28,5 @@ XEN_CPUFEATURE(IND_THUNK_JMP, (FSCAPINTS+0)*32+14) /* Use IND_THUNK_JMP */ - XEN_CPUFEATURE(XEN_IBPB, (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */ - XEN_CPUFEATURE(XEN_IBRS_SET, (FSCAPINTS+0)*32+16) /* IBRSB && IRBS set in Xen */ - XEN_CPUFEATURE(XEN_IBRS_CLEAR, (FSCAPINTS+0)*32+17) /* IBRSB && IBRS clear in Xen */ -+XEN_CPUFEATURE(RSB_NATIVE, (FSCAPINTS+0)*32+18) /* RSB overwrite needed for native */ -+XEN_CPUFEATURE(RSB_VMEXIT, (FSCAPINTS+0)*32+19) /* RSB overwrite needed for vmexit */ -diff --git a/xen/include/asm-x86/nops.h b/xen/include/asm-x86/nops.h -index 18cb718bac..37f9819e82 100644 ---- a/xen/include/asm-x86/nops.h -+++ b/xen/include/asm-x86/nops.h -@@ -66,6 +66,7 @@ - #define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8 - #define ASM_NOP29 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP5 - #define ASM_NOP32 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8 -+#define ASM_NOP40 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8 - - #define ASM_NOP_MAX 8 - -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index ba5557436d..e27ea2b1e6 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -74,6 +74,44 @@ - * - SPEC_CTRL_EXIT_TO_GUEST - */ - -+.macro DO_OVERWRITE_RSB -+/* -+ * Requires nothing -+ * Clobbers %rax, %rcx -+ * -+ * Requires 256 bytes of stack space, but %rsp has no net change. Based on -+ * Google's performance numbers, the loop is unrolled to 16 iterations and two -+ * calls per iteration. -+ * -+ * The call filling the RSB needs a nonzero displacement. A nop would do, but -+ * we use "1: pause; lfence; jmp 1b" to safely contains any ret-based -+ * speculation, even if the loop is speculatively executed prematurely. -+ * -+ * %rsp is preserved by using an extra GPR because a) we've got plenty spare, -+ * b) the two movs are shorter to encode than `add $32*8, %rsp`, and c) can be -+ * optimised with mov-elimination in modern cores. -+ */ -+ mov $16, %ecx /* 16 iterations, two calls per loop */ -+ mov %rsp, %rax /* Store the current %rsp */ -+ -+.L\@_fill_rsb_loop: -+ -+ .irp n, 1, 2 /* Unrolled twice. */ -+ call .L\@_insert_rsb_entry_\n /* Create an RSB entry. */ -+ -+.L\@_capture_speculation_\n: -+ pause -+ lfence -+ jmp .L\@_capture_speculation_\n /* Capture rogue speculation. */ -+ -+.L\@_insert_rsb_entry_\n: -+ .endr -+ -+ sub $1, %ecx -+ jnz .L\@_fill_rsb_loop -+ mov %rax, %rsp /* Restore old %rsp */ -+.endm -+ - .macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req - /* - * Requires %rbx=current, %rsp=regs/cpuinfo -@@ -173,6 +211,8 @@ - - /* Use after a VMEXIT from an HVM guest. */ - #define SPEC_CTRL_ENTRY_FROM_VMEXIT \ -+ ALTERNATIVE __stringify(ASM_NOP40), \ -+ DO_OVERWRITE_RSB, X86_FEATURE_RSB_VMEXIT; \ - ALTERNATIVE_2 __stringify(ASM_NOP32), \ - __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \ - ibrs_val=SPEC_CTRL_IBRS), \ -@@ -183,6 +223,8 @@ - - /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ - #define SPEC_CTRL_ENTRY_FROM_PV \ -+ ALTERNATIVE __stringify(ASM_NOP40), \ -+ DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE; \ - ALTERNATIVE_2 __stringify(ASM_NOP21), \ - __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 \ - ibrs_val=SPEC_CTRL_IBRS), \ -@@ -192,6 +234,8 @@ - - /* Use in interrupt/exception context. May interrupt Xen or PV context. */ - #define SPEC_CTRL_ENTRY_FROM_INTR \ -+ ALTERNATIVE __stringify(ASM_NOP40), \ -+ DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE; \ - ALTERNATIVE_2 __stringify(ASM_NOP29), \ - __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 \ - ibrs_val=SPEC_CTRL_IBRS), \ --- -2.14.3 - - -From fc81946ceaae2c27fce2ba0f3f29fa9df3975951 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:52:28 +0100 -Subject: [PATCH 45/49] x86/entry: Avoid using alternatives in NMI/#MC paths - -This patch is deliberately arranged to be easy to revert if/when alternatives -patching becomes NMI/#MC safe. - -For safety, there must be a dispatch serialising instruction in (what is -logically) DO_SPEC_CTRL_ENTRY so that, in the case that Xen needs IBRS set in -context, an attacker can't speculate around the WRMSR and reach an indirect -branch within the speculation window. - -Using conditionals opens this attack vector up, so the else clause gets an -LFENCE to force the pipeline to catch up before continuing. This also covers -the safety of RSB conditional, as execution it is guaranteed to either hit the -WRMSR or LFENCE. - -One downside of not using alternatives is that there unconditionally an LFENCE -in the IST path in cases where we are not using the features from IBRS-capable -microcode. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: 3fffaf9c13e9502f09ad4ab1aac3f8b7b9398f6f -master date: 2018-01-26 14:10:21 +0000 ---- - xen/arch/x86/spec_ctrl.c | 8 +++++ - xen/arch/x86/x86_64/asm-offsets.c | 1 + - xen/arch/x86/x86_64/entry.S | 6 ++-- - xen/include/asm-x86/current.h | 1 + - xen/include/asm-x86/spec_ctrl.h | 1 + - xen/include/asm-x86/spec_ctrl_asm.h | 67 +++++++++++++++++++++++++++++++++++++ - 6 files changed, 81 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 89e7287e43..cc1c972845 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -20,8 +20,10 @@ - #include - #include - -+#include - #include - #include -+#include - - static enum ind_thunk { - THUNK_DEFAULT, /* Decide which thunk to use at boot time. */ -@@ -150,6 +152,12 @@ void __init init_speculation_mitigations(void) - print_details(thunk); - } - -+static void __init __maybe_unused build_assertions(void) -+{ -+ /* The optimised assembly relies on this alias. */ -+ BUILD_BUG_ON(BTI_IST_IBRS != SPEC_CTRL_IBRS); -+} -+ - /* - * Local variables: - * mode: C -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index 17f1d77320..51be528f89 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -142,6 +142,7 @@ void __dummy__(void) - OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3); - OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl); - OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl); -+ OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info); - DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); - BLANK(); - -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 73bd7ca2ad..a5a6702468 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -126,7 +126,7 @@ UNLIKELY_START(g, exit_cr3) - UNLIKELY_END(exit_cr3) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -- SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */ -+ SPEC_CTRL_EXIT_TO_XEN_IST /* Req: %rbx=end, Clob: acd */ - - RESTORE_ALL adj=8 - iretq -@@ -732,7 +732,7 @@ ENTRY(double_fault) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx -@@ -765,7 +765,7 @@ handle_ist_exception: - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h -index 1009d05e24..4678a0fcf5 100644 ---- a/xen/include/asm-x86/current.h -+++ b/xen/include/asm-x86/current.h -@@ -57,6 +57,7 @@ struct cpu_info { - /* See asm-x86/spec_ctrl_asm.h for usage. */ - unsigned int shadow_spec_ctrl; - bool use_shadow_spec_ctrl; -+ uint8_t bti_ist_info; - - unsigned long __pad; - /* get_stack_bottom() must be 16-byte aligned */ -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index b451250282..c454b02b66 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -29,6 +29,7 @@ static inline void init_shadow_spec_ctrl_state(void) - struct cpu_info *info = get_cpu_info(); - - info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0; -+ info->bti_ist_info = 0; - } - - #endif /* !__X86_SPEC_CTRL_H__ */ -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index e27ea2b1e6..814f53dffc 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -20,6 +20,11 @@ - #ifndef __X86_SPEC_CTRL_ASM_H__ - #define __X86_SPEC_CTRL_ASM_H__ - -+/* Encoding of the bottom bits in cpuinfo.bti_ist_info */ -+#define BTI_IST_IBRS (1 << 0) -+#define BTI_IST_WRMSR (1 << 1) -+#define BTI_IST_RSB (1 << 2) -+ - #ifdef __ASSEMBLY__ - #include - -@@ -255,6 +260,68 @@ - DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_SET, \ - DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_CLEAR - -+/* TODO: Drop these when the alternatives infrastructure is NMI/#MC safe. */ -+.macro SPEC_CTRL_ENTRY_FROM_INTR_IST -+/* -+ * Requires %rsp=regs, %r14=stack_end -+ * Clobbers %rax, %rcx, %rdx -+ * -+ * This is logical merge of DO_OVERWRITE_RSB and DO_SPEC_CTRL_ENTRY -+ * maybexen=1, but with conditionals rather than alternatives. -+ */ -+ movzbl STACK_CPUINFO_FIELD(bti_ist_info)(%r14), %eax -+ -+ testb $BTI_IST_RSB, %al -+ jz .L\@_skip_rsb -+ -+ DO_OVERWRITE_RSB -+ -+.L\@_skip_rsb: -+ -+ testb $BTI_IST_WRMSR, %al -+ jz .L\@_skip_wrmsr -+ -+ xor %edx, %edx -+ testb $3, UREGS_cs(%rsp) -+ setz %dl -+ and %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14) -+ -+.L\@_entry_from_xen: -+ /* -+ * Load Xen's intended value. SPEC_CTRL_IBRS vs 0 is encoded in the -+ * bottom bit of bti_ist_info, via a deliberate alias with BTI_IST_IBRS. -+ */ -+ mov $MSR_SPEC_CTRL, %ecx -+ and $BTI_IST_IBRS, %eax -+ wrmsr -+ -+ /* Opencoded UNLIKELY_START() with no condition. */ -+UNLIKELY_DISPATCH_LABEL(\@_serialise): -+ .subsection 1 -+ /* -+ * In the case that we might need to set SPEC_CTRL.IBRS for safety, we -+ * need to ensure that an attacker can't poison the `jz .L\@_skip_wrmsr` -+ * to speculate around the WRMSR. As a result, we need a dispatch -+ * serialising instruction in the else clause. -+ */ -+.L\@_skip_wrmsr: -+ lfence -+ UNLIKELY_END(\@_serialise) -+.endm -+ -+.macro SPEC_CTRL_EXIT_TO_XEN_IST -+/* -+ * Requires %rbx=stack_end -+ * Clobbers %rax, %rcx, %rdx -+ */ -+ testb $BTI_IST_WRMSR, STACK_CPUINFO_FIELD(bti_ist_info)(%rbx) -+ jz .L\@_skip -+ -+ DO_SPEC_CTRL_EXIT_TO_XEN -+ -+.L\@_skip: -+.endm -+ - #endif /* __ASSEMBLY__ */ - #endif /* !__X86_SPEC_CTRL_ASM_H__ */ - --- -2.14.3 - - -From bc0e599a83d17f06ec7da1708721cede2df8274e Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:53:10 +0100 -Subject: [PATCH 46/49] x86/boot: Calculate the most appropriate BTI mitigation - to use - -See the logic and comments in init_speculation_mitigations() for further -details. - -There are two controls for RSB overwriting, because in principle there are -cases where it might be safe to forego rsb_native (Off the top of my head, -SMEP active, no 32bit PV guests at all, no use of vmevent/paging subsystems -for HVM guests, but I make no guarantees that this list of restrictions is -exhaustive). - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -x86/spec_ctrl: Fix determination of when to use IBRS - -The original version of this logic was: - - /* - * On Intel hardware, we'd like to use retpoline in preference to - * IBRS, but only if it is safe on this hardware. - */ - else if ( boot_cpu_has(X86_FEATURE_IBRSB) ) - { - if ( retpoline_safe() ) - thunk = THUNK_RETPOLINE; - else - ibrs = true; - } - -but it was changed by a request during review. Sadly, the result is buggy as -it breaks the later fallback logic by allowing IBRS to appear as available -when in fact it isn't. - -This in practice means that on repoline-unsafe hardware without IBRS, we -select THUNK_JUMP despite intending to select THUNK_RETPOLINE. - -Reported-by: Zhenzhong Duan -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: 2713715305ca516f698d58cec5e0b322c3b2c4eb -master date: 2018-01-26 14:10:21 +0000 -master commit: 30cbd0c83ef3d0edac2d5bcc41a9a2b7a843ae58 -master date: 2018-02-06 18:32:58 +0000 ---- - docs/misc/xen-command-line.markdown | 10 ++- - xen/arch/x86/spec_ctrl.c | 141 +++++++++++++++++++++++++++++++++++- - xen/include/asm-x86/spec_ctrl.h | 4 +- - 3 files changed, 149 insertions(+), 6 deletions(-) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 2d95759568..a751a392ac 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -246,7 +246,7 @@ enough. Setting this to a high value may cause boot failure, particularly if - the NMI watchdog is also enabled. - - ### bti (x86) --> `= List of [ thunk=retpoline|lfence|jmp ]` -+> `= List of [ thunk=retpoline|lfence|jmp, ibrs=, rsb_{vmexit,native}= ]` - - Branch Target Injection controls. By default, Xen will pick the most - appropriate BTI mitigations based on compiled in support, loaded microcode, -@@ -261,6 +261,14 @@ locations. The default thunk is `retpoline` (generally preferred for Intel - hardware), with the alternatives being `jmp` (a `jmp *%reg` gadget, minimal - overhead), and `lfence` (an `lfence; jmp *%reg` gadget, preferred for AMD). - -+On hardware supporting IBRS, the `ibrs=` option can be used to force or -+prevent Xen using the feature itself. If Xen is not using IBRS itself, -+functionality is still set up so IBRS can be virtualised for guests. -+ -+The `rsb_vmexit=` and `rsb_native=` options can be used to fine tune when the -+RSB gets overwritten. There are individual controls for an entry from HVM -+context, and an entry from a native (PV or Xen) context. -+ - ### xenheap\_megabytes (arm32) - > `= ` - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index cc1c972845..8aefe29968 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -20,6 +20,7 @@ - #include - #include - -+#include - #include - #include - #include -@@ -33,11 +34,15 @@ static enum ind_thunk { - THUNK_LFENCE, - THUNK_JMP, - } opt_thunk __initdata = THUNK_DEFAULT; -+static int8_t __initdata opt_ibrs = -1; -+static bool __initdata opt_rsb_native = true; -+static bool __initdata opt_rsb_vmexit = true; -+uint8_t __read_mostly default_bti_ist_info; - - static int __init parse_bti(const char *s) - { - const char *ss; -- int rc = 0; -+ int val, rc = 0; - - do { - ss = strchr(s, ','); -@@ -57,6 +62,12 @@ static int __init parse_bti(const char *s) - else - rc = -EINVAL; - } -+ else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 ) -+ opt_ibrs = val; -+ else if ( (val = parse_boolean("rsb_native", s, ss)) >= 0 ) -+ opt_rsb_native = val; -+ else if ( (val = parse_boolean("rsb_vmexit", s, ss)) >= 0 ) -+ opt_rsb_vmexit = val; - else - rc = -EINVAL; - -@@ -93,24 +104,84 @@ static void __init print_details(enum ind_thunk thunk) - printk(XENLOG_DEBUG " Compiled-in support: INDIRECT_THUNK\n"); - - printk(XENLOG_INFO -- "BTI mitigations: Thunk %s\n", -+ "BTI mitigations: Thunk %s, Others:%s%s%s\n", - thunk == THUNK_NONE ? "N/A" : - thunk == THUNK_RETPOLINE ? "RETPOLINE" : - thunk == THUNK_LFENCE ? "LFENCE" : -- thunk == THUNK_JMP ? "JMP" : "?"); -+ thunk == THUNK_JMP ? "JMP" : "?", -+ boot_cpu_has(X86_FEATURE_XEN_IBRS_SET) ? " IBRS+" : -+ boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR) ? " IBRS-" : "", -+ boot_cpu_has(X86_FEATURE_RSB_NATIVE) ? " RSB_NATIVE" : "", -+ boot_cpu_has(X86_FEATURE_RSB_VMEXIT) ? " RSB_VMEXIT" : ""); -+} -+ -+/* Calculate whether Retpoline is known-safe on this CPU. */ -+static bool __init retpoline_safe(void) -+{ -+ unsigned int ucode_rev = this_cpu(ucode_cpu_info).cpu_sig.rev; -+ -+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) -+ return true; -+ -+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || -+ boot_cpu_data.x86 != 6 ) -+ return false; -+ -+ switch ( boot_cpu_data.x86_model ) -+ { -+ case 0x17: /* Penryn */ -+ case 0x1d: /* Dunnington */ -+ case 0x1e: /* Nehalem */ -+ case 0x1f: /* Auburndale / Havendale */ -+ case 0x1a: /* Nehalem EP */ -+ case 0x2e: /* Nehalem EX */ -+ case 0x25: /* Westmere */ -+ case 0x2c: /* Westmere EP */ -+ case 0x2f: /* Westmere EX */ -+ case 0x2a: /* SandyBridge */ -+ case 0x2d: /* SandyBridge EP/EX */ -+ case 0x3a: /* IvyBridge */ -+ case 0x3e: /* IvyBridge EP/EX */ -+ case 0x3c: /* Haswell */ -+ case 0x3f: /* Haswell EX/EP */ -+ case 0x45: /* Haswell D */ -+ case 0x46: /* Haswell H */ -+ return true; -+ -+ /* -+ * Broadwell processors are retpoline-safe after specific microcode -+ * versions. -+ */ -+ case 0x3d: /* Broadwell */ -+ return ucode_rev >= 0x28; -+ case 0x47: /* Broadwell H */ -+ return ucode_rev >= 0x1b; -+ case 0x4f: /* Broadwell EP/EX */ -+ return ucode_rev >= 0xb000025; -+ case 0x56: /* Broadwell D */ -+ return false; /* TBD. */ -+ -+ /* -+ * Skylake and later processors are not retpoline-safe. -+ */ -+ default: -+ return false; -+ } - } - - void __init init_speculation_mitigations(void) - { - enum ind_thunk thunk = THUNK_DEFAULT; -+ bool ibrs = false; - - /* - * Has the user specified any custom BTI mitigations? If so, follow their - * instructions exactly and disable all heuristics. - */ -- if ( opt_thunk != THUNK_DEFAULT ) -+ if ( opt_thunk != THUNK_DEFAULT || opt_ibrs != -1 ) - { - thunk = opt_thunk; -+ ibrs = !!opt_ibrs; - } - else - { -@@ -126,7 +197,18 @@ void __init init_speculation_mitigations(void) - */ - if ( cpu_has_lfence_dispatch ) - thunk = THUNK_LFENCE; -+ /* -+ * On Intel hardware, we'd like to use retpoline in preference to -+ * IBRS, but only if it is safe on this hardware. -+ */ -+ else if ( retpoline_safe() ) -+ thunk = THUNK_RETPOLINE; -+ else if ( boot_cpu_has(X86_FEATURE_IBRSB) ) -+ ibrs = true; - } -+ /* Without compiler thunk support, use IBRS if available. */ -+ else if ( boot_cpu_has(X86_FEATURE_IBRSB) ) -+ ibrs = true; - } - - /* -@@ -136,6 +218,13 @@ void __init init_speculation_mitigations(void) - if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) ) - thunk = THUNK_NONE; - -+ /* -+ * If IBRS is in use and thunks are compiled in, there is no point -+ * suffering extra overhead. Switch to the least-overhead thunk. -+ */ -+ if ( ibrs && thunk == THUNK_DEFAULT ) -+ thunk = THUNK_JMP; -+ - /* - * If there are still no thunk preferences, the compiled default is - * actually retpoline, and it is better than nothing. -@@ -149,6 +238,50 @@ void __init init_speculation_mitigations(void) - else if ( thunk == THUNK_JMP ) - setup_force_cpu_cap(X86_FEATURE_IND_THUNK_JMP); - -+ if ( boot_cpu_has(X86_FEATURE_IBRSB) ) -+ { -+ /* -+ * Even if we've chosen to not have IBRS set in Xen context, we still -+ * need the IBRS entry/exit logic to virtualise IBRS support for -+ * guests. -+ */ -+ if ( ibrs ) -+ setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_SET); -+ else -+ setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_CLEAR); -+ -+ default_bti_ist_info |= BTI_IST_WRMSR | ibrs; -+ } -+ -+ /* -+ * PV guests can poison the RSB to any virtual address from which -+ * they can execute a call instruction. This is necessarily outside -+ * of the Xen supervisor mappings. -+ * -+ * With SMEP enabled, the processor won't speculate into user mappings. -+ * Therefore, in this case, we don't need to worry about poisoned entries -+ * from 64bit PV guests. -+ * -+ * 32bit PV guest kernels run in ring 1, so use supervisor mappings. -+ * If a processors speculates to 32bit PV guest kernel mappings, it is -+ * speculating in 64bit supervisor mode, and can leak data. -+ */ -+ if ( opt_rsb_native ) -+ { -+ setup_force_cpu_cap(X86_FEATURE_RSB_NATIVE); -+ default_bti_ist_info |= BTI_IST_RSB; -+ } -+ -+ /* -+ * HVM guests can always poison the RSB to point at Xen supervisor -+ * mappings. -+ */ -+ if ( opt_rsb_vmexit ) -+ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); -+ -+ /* (Re)init BSP state now that default_bti_ist_info has been calculated. */ -+ init_shadow_spec_ctrl_state(); -+ - print_details(thunk); - } - -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index c454b02b66..6120e4f561 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -24,12 +24,14 @@ - - void init_speculation_mitigations(void); - -+extern uint8_t default_bti_ist_info; -+ - static inline void init_shadow_spec_ctrl_state(void) - { - struct cpu_info *info = get_cpu_info(); - - info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0; -- info->bti_ist_info = 0; -+ info->bti_ist_info = default_bti_ist_info; - } - - #endif /* !__X86_SPEC_CTRL_H__ */ --- -2.14.3 - - -From db12743f2d24fc59d5b9cefc15eb3d56cdaf549d Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:53:40 +0100 -Subject: [PATCH 47/49] x86/ctxt: Issue a speculation barrier between vcpu - contexts - -Issuing an IBPB command flushes the Branch Target Buffer, so that any poison -left by one vcpu won't remain when beginning to execute the next. - -The cost of IBPB is substantial, and skipped on transition to idle, as Xen's -idle code is robust already. All transitions into vcpu context are fully -serialising in practice (and under consideration for being retroactively -declared architecturally serialising), so a cunning attacker cannot use SP1 to -try and skip the flush. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: a2ed643ed783020f885035432e9c0919756921d1 -master date: 2018-01-26 14:10:21 +0000 ---- - docs/misc/xen-command-line.markdown | 5 ++++- - xen/arch/x86/domain.c | 29 +++++++++++++++++++++++++++++ - xen/arch/x86/spec_ctrl.c | 10 +++++++++- - xen/include/asm-x86/spec_ctrl.h | 1 + - 4 files changed, 43 insertions(+), 2 deletions(-) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index a751a392ac..e9b65ac555 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -246,7 +246,7 @@ enough. Setting this to a high value may cause boot failure, particularly if - the NMI watchdog is also enabled. - - ### bti (x86) --> `= List of [ thunk=retpoline|lfence|jmp, ibrs=, rsb_{vmexit,native}= ]` -+> `= List of [ thunk=retpoline|lfence|jmp, ibrs=, ibpb=, rsb_{vmexit,native}= ]` - - Branch Target Injection controls. By default, Xen will pick the most - appropriate BTI mitigations based on compiled in support, loaded microcode, -@@ -265,6 +265,9 @@ On hardware supporting IBRS, the `ibrs=` option can be used to force or - prevent Xen using the feature itself. If Xen is not using IBRS itself, - functionality is still set up so IBRS can be virtualised for guests. - -+On hardware supporting IBPB, the `ibpb=` option can be used to prevent Xen -+from issuing Branch Prediction Barriers on vcpu context switches. -+ - The `rsb_vmexit=` and `rsb_native=` options can be used to fine tune when the - RSB gets overwritten. There are individual controls for an entry from HVM - context, and an entry from a native (PV or Xen) context. -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index aaa2b28413..8e936c8547 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -65,6 +65,7 @@ - #include - #include - #include -+#include - - DEFINE_PER_CPU(struct vcpu *, curr_vcpu); - -@@ -1739,6 +1740,34 @@ void context_switch(struct vcpu *prev, struct vcpu *next) - } - - ctxt_switch_levelling(next); -+ -+ if ( opt_ibpb && !is_idle_domain(nextd) ) -+ { -+ static DEFINE_PER_CPU(unsigned int, last); -+ unsigned int *last_id = &this_cpu(last); -+ -+ /* -+ * Squash the domid and vcpu id together for comparison -+ * efficiency. We could in principle stash and compare the struct -+ * vcpu pointer, but this risks a false alias if a domain has died -+ * and the same 4k page gets reused for a new vcpu. -+ */ -+ unsigned int next_id = (((unsigned int)nextd->domain_id << 16) | -+ (uint16_t)next->vcpu_id); -+ BUILD_BUG_ON(MAX_VIRT_CPUS > 0xffff); -+ -+ /* -+ * When scheduling from a vcpu, to idle, and back to the same vcpu -+ * (which might be common in a lightly loaded system, or when -+ * using vcpu pinning), there is no need to issue IBPB, as we are -+ * returning to the same security context. -+ */ -+ if ( *last_id != next_id ) -+ { -+ wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB); -+ *last_id = next_id; -+ } -+ } - } - - context_saved(prev); -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 8aefe29968..8ad992a700 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -37,6 +37,7 @@ static enum ind_thunk { - static int8_t __initdata opt_ibrs = -1; - static bool __initdata opt_rsb_native = true; - static bool __initdata opt_rsb_vmexit = true; -+bool __read_mostly opt_ibpb = true; - uint8_t __read_mostly default_bti_ist_info; - - static int __init parse_bti(const char *s) -@@ -64,6 +65,8 @@ static int __init parse_bti(const char *s) - } - else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 ) - opt_ibrs = val; -+ else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) -+ opt_ibpb = val; - else if ( (val = parse_boolean("rsb_native", s, ss)) >= 0 ) - opt_rsb_native = val; - else if ( (val = parse_boolean("rsb_vmexit", s, ss)) >= 0 ) -@@ -104,13 +107,14 @@ static void __init print_details(enum ind_thunk thunk) - printk(XENLOG_DEBUG " Compiled-in support: INDIRECT_THUNK\n"); - - printk(XENLOG_INFO -- "BTI mitigations: Thunk %s, Others:%s%s%s\n", -+ "BTI mitigations: Thunk %s, Others:%s%s%s%s\n", - thunk == THUNK_NONE ? "N/A" : - thunk == THUNK_RETPOLINE ? "RETPOLINE" : - thunk == THUNK_LFENCE ? "LFENCE" : - thunk == THUNK_JMP ? "JMP" : "?", - boot_cpu_has(X86_FEATURE_XEN_IBRS_SET) ? " IBRS+" : - boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR) ? " IBRS-" : "", -+ opt_ibpb ? " IBPB" : "", - boot_cpu_has(X86_FEATURE_RSB_NATIVE) ? " RSB_NATIVE" : "", - boot_cpu_has(X86_FEATURE_RSB_VMEXIT) ? " RSB_VMEXIT" : ""); - } -@@ -279,6 +283,10 @@ void __init init_speculation_mitigations(void) - if ( opt_rsb_vmexit ) - setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); - -+ /* Check we have hardware IBPB support before using it... */ -+ if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) ) -+ opt_ibpb = false; -+ - /* (Re)init BSP state now that default_bti_ist_info has been calculated. */ - init_shadow_spec_ctrl_state(); - -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index 6120e4f561..e328b0f509 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -24,6 +24,7 @@ - - void init_speculation_mitigations(void); - -+extern bool opt_ibpb; - extern uint8_t default_bti_ist_info; - - static inline void init_shadow_spec_ctrl_state(void) --- -2.14.3 - - -From 5644514050b9ae7d75cdd95fd07912b9930cae08 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:54:12 +0100 -Subject: [PATCH 48/49] x86/cpuid: Offer Indirect Branch Controls to guests - -With all infrastructure in place, it is now safe to let guests see and use -these features. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -Acked-by: Wei Liu -master commit: 67c6838ddacfa646f9d1ae802bd0f16a935665b8 -master date: 2018-01-26 14:10:21 +0000 ---- - xen/include/public/arch-x86/cpufeatureset.h | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 0f21fed161..fa81af14b7 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -237,13 +237,13 @@ XEN_CPUFEATURE(EFRO, 7*32+10) /* APERF/MPERF Read Only interface */ - - /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */ - XEN_CPUFEATURE(CLZERO, 8*32+ 0) /*A CLZERO instruction */ --XEN_CPUFEATURE(IBPB, 8*32+12) /* IBPB support only (no IBRS, used by AMD) */ -+XEN_CPUFEATURE(IBPB, 8*32+12) /*A IBPB support only (no IBRS, used by AMD) */ - - /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */ - XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ - XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ --XEN_CPUFEATURE(IBRSB, 9*32+26) /* IBRS and IBPB support (used by Intel) */ --XEN_CPUFEATURE(STIBP, 9*32+27) /*! STIBP */ -+XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ -+XEN_CPUFEATURE(STIBP, 9*32+27) /*A! STIBP */ - - #endif /* XEN_CPUFEATURE */ - --- -2.14.3 - - -From 3181472a5ca45ae5e77abbcf024d025d9ba79ced Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Feb 2018 11:54:52 +0100 -Subject: [PATCH 49/49] x86/idle: Clear SPEC_CTRL while idle - -On contemporary hardware, setting IBRS/STIBP has a performance impact on -adjacent hyperthreads. It is therefore recommended to clear the setting -before becoming idle, to avoid an idle core preventing adjacent userspace -execution from running at full performance. - -Care must be taken to ensure there are no ret or indirect branch instructions -between spec_ctrl_{enter,exit}_idle() invocations, which are forced always -inline. Care must also be taken to avoid using spec_ctrl_enter_idle() between -flushing caches and becoming idle, in cases where that matters. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: 4c7e478d597b0346eef3a256cfd6794ac778b608 -master date: 2018-01-26 14:10:21 +0000 ---- - xen/arch/x86/acpi/cpu_idle.c | 21 +++++++++++++++++++++ - xen/arch/x86/cpu/mwait-idle.c | 7 +++++++ - xen/arch/x86/domain.c | 8 ++++++++ - xen/include/asm-x86/spec_ctrl.h | 34 ++++++++++++++++++++++++++++++++++ - 4 files changed, 70 insertions(+) - -diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c -index b605a87083..5feda704c6 100644 ---- a/xen/arch/x86/acpi/cpu_idle.c -+++ b/xen/arch/x86/acpi/cpu_idle.c -@@ -55,6 +55,7 @@ - #include - #include - #include -+#include - - /*#define DEBUG_PM_CX*/ - -@@ -417,8 +418,14 @@ void mwait_idle_with_hints(unsigned int eax, unsigned int ecx) - */ - if ( (expires > NOW() || expires == 0) && !softirq_pending(cpu) ) - { -+ struct cpu_info *info = get_cpu_info(); -+ - cpumask_set_cpu(cpu, &cpuidle_mwait_flags); -+ -+ spec_ctrl_enter_idle(info); - __mwait(eax, ecx); -+ spec_ctrl_exit_idle(info); -+ - cpumask_clear_cpu(cpu, &cpuidle_mwait_flags); - } - -@@ -433,6 +440,8 @@ static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) - - static void acpi_idle_do_entry(struct acpi_processor_cx *cx) - { -+ struct cpu_info *info = get_cpu_info(); -+ - switch ( cx->entry_method ) - { - case ACPI_CSTATE_EM_FFH: -@@ -440,15 +449,19 @@ static void acpi_idle_do_entry(struct acpi_processor_cx *cx) - acpi_processor_ffh_cstate_enter(cx); - return; - case ACPI_CSTATE_EM_SYSIO: -+ spec_ctrl_enter_idle(info); - /* IO port based C-state */ - inb(cx->address); - /* Dummy wait op - must do something useless after P_LVL2 read - because chipsets cannot guarantee that STPCLK# signal - gets asserted in time to freeze execution properly. */ - inl(pmtmr_ioport); -+ spec_ctrl_exit_idle(info); - return; - case ACPI_CSTATE_EM_HALT: -+ spec_ctrl_enter_idle(info); - safe_halt(); -+ spec_ctrl_exit_idle(info); - local_irq_disable(); - return; - } -@@ -576,7 +589,13 @@ static void acpi_processor_idle(void) - if ( pm_idle_save ) - pm_idle_save(); - else -+ { -+ struct cpu_info *info = get_cpu_info(); -+ -+ spec_ctrl_enter_idle(info); - safe_halt(); -+ spec_ctrl_exit_idle(info); -+ } - return; - } - -@@ -755,6 +774,7 @@ void acpi_dead_idle(void) - * Otherwise, CPU may still hold dirty data, breaking cache coherency, - * leading to strange errors. - */ -+ spec_ctrl_enter_idle(get_cpu_info()); - wbinvd(); - - while ( 1 ) -@@ -784,6 +804,7 @@ void acpi_dead_idle(void) - u32 address = cx->address; - u32 pmtmr_ioport_local = pmtmr_ioport; - -+ spec_ctrl_enter_idle(get_cpu_info()); - wbinvd(); - - while ( 1 ) -diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c -index 762dff1cba..e357f29208 100644 ---- a/xen/arch/x86/cpu/mwait-idle.c -+++ b/xen/arch/x86/cpu/mwait-idle.c -@@ -58,6 +58,7 @@ - #include - #include - #include -+#include - #include - - #define MWAIT_IDLE_VERSION "0.4.1" -@@ -736,7 +737,13 @@ static void mwait_idle(void) - if (pm_idle_save) - pm_idle_save(); - else -+ { -+ struct cpu_info *info = get_cpu_info(); -+ -+ spec_ctrl_enter_idle(info); - safe_halt(); -+ spec_ctrl_exit_idle(info); -+ } - return; - } - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index 8e936c8547..7e10a2a07d 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -55,6 +55,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -75,9 +76,15 @@ void (*dead_idle) (void) __read_mostly = default_dead_idle; - - static void default_idle(void) - { -+ struct cpu_info *info = get_cpu_info(); -+ - local_irq_disable(); - if ( cpu_is_haltable(smp_processor_id()) ) -+ { -+ spec_ctrl_enter_idle(info); - safe_halt(); -+ spec_ctrl_exit_idle(info); -+ } - else - local_irq_enable(); - } -@@ -89,6 +96,7 @@ void default_dead_idle(void) - * held by the CPUs spinning here indefinitely, and get discarded by - * a subsequent INIT. - */ -+ spec_ctrl_enter_idle(get_cpu_info()); - wbinvd(); - for ( ; ; ) - halt(); -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index e328b0f509..5ab4ff3f68 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -20,7 +20,9 @@ - #ifndef __X86_SPEC_CTRL_H__ - #define __X86_SPEC_CTRL_H__ - -+#include - #include -+#include - - void init_speculation_mitigations(void); - -@@ -35,6 +37,38 @@ static inline void init_shadow_spec_ctrl_state(void) - info->bti_ist_info = default_bti_ist_info; - } - -+/* WARNING! `ret`, `call *`, `jmp *` not safe after this call. */ -+static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) -+{ -+ uint32_t val = 0; -+ -+ /* -+ * Latch the new shadow value, then enable shadowing, then update the MSR. -+ * There are no SMP issues here; only local processor ordering concerns. -+ */ -+ info->shadow_spec_ctrl = val; -+ barrier(); -+ info->use_shadow_spec_ctrl = true; -+ barrier(); -+ asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET) -+ :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); -+} -+ -+/* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */ -+static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) -+{ -+ uint32_t val = SPEC_CTRL_IBRS; -+ -+ /* -+ * Disable shadowing before updating the MSR. There are no SMP issues -+ * here; only local processor ordering concerns. -+ */ -+ info->use_shadow_spec_ctrl = false; -+ barrier(); -+ asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET) -+ :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); -+} -+ - #endif /* !__X86_SPEC_CTRL_H__ */ - - /* --- -2.14.3 - diff --git a/xen.xsa254.pti.patch b/xen.xsa254.pti.patch deleted file mode 100644 index 611a881..0000000 --- a/xen.xsa254.pti.patch +++ /dev/null @@ -1,1410 +0,0 @@ -From 910dd005da20f27f3415b7eccdf436874989506b Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 17 Jan 2018 16:54:44 +0100 -Subject: [PATCH 1/5] x86/entry: Remove support for partial cpu_user_regs - frames - -Save all GPRs on entry to Xen. - -The entry_int82() path is via a DPL1 gate, only usable by 32bit PV guests, so -can get away with only saving the 32bit registers. All other entrypoints can -be reached from 32 or 64bit contexts. - -This is part of XSA-254. - -Signed-off-by: Andrew Cooper -Reviewed-by: Wei Liu -Acked-by: Jan Beulich -master commit: f9eb74789af77e985ae653193f3622263499f674 -master date: 2018-01-05 19:57:07 +0000 ---- - tools/tests/x86_emulator/x86-emulate.c | 1 - - xen/arch/x86/pv/domain.c | 1 - - xen/arch/x86/pv/emul-priv-op.c | 2 - - xen/arch/x86/x86_64/compat/entry.S | 7 ++- - xen/arch/x86/x86_64/entry.S | 12 ++-- - xen/arch/x86/x86_64/traps.c | 13 ++-- - xen/arch/x86/x86_emulate.c | 1 - - xen/arch/x86/x86_emulate/x86_emulate.c | 8 +-- - xen/common/wait.c | 1 - - xen/include/asm-x86/asm_defns.h | 105 +++------------------------------ - 10 files changed, 26 insertions(+), 125 deletions(-) - -diff --git a/tools/tests/x86_emulator/x86-emulate.c b/tools/tests/x86_emulator/x86-emulate.c -index 975ddc7e53..9056610907 100644 ---- a/tools/tests/x86_emulator/x86-emulate.c -+++ b/tools/tests/x86_emulator/x86-emulate.c -@@ -3,7 +3,6 @@ - #include - - #define cpu_has_amd_erratum(nr) 0 --#define mark_regs_dirty(r) ((void)(r)) - #define cpu_has_mpx false - #define read_bndcfgu() 0 - #define xstate_set_init(what) -diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c -index 2234128bb3..74e9e667d2 100644 ---- a/xen/arch/x86/pv/domain.c -+++ b/xen/arch/x86/pv/domain.c -@@ -20,7 +20,6 @@ - static void noreturn continue_nonidle_domain(struct vcpu *v) - { - check_wakeup_from_wait(); -- mark_regs_dirty(guest_cpu_user_regs()); - reset_stack_and_jump(ret_from_intr); - } - -diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c -index 2f9264548a..5f23c2cfbf 100644 ---- a/xen/arch/x86/pv/emul-priv-op.c -+++ b/xen/arch/x86/pv/emul-priv-op.c -@@ -337,7 +337,6 @@ static int read_io(unsigned int port, unsigned int bytes, - io_emul_stub_t *io_emul = - io_emul_stub_setup(poc, ctxt->opcode, port, bytes); - -- mark_regs_dirty(ctxt->regs); - io_emul(ctxt->regs); - return X86EMUL_DONE; - } -@@ -436,7 +435,6 @@ static int write_io(unsigned int port, unsigned int bytes, - io_emul_stub_t *io_emul = - io_emul_stub_setup(poc, ctxt->opcode, port, bytes); - -- mark_regs_dirty(ctxt->regs); - io_emul(ctxt->regs); - if ( (bytes == 1) && pv_post_outb_hook ) - pv_post_outb_hook(port, val); -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index ba6e941837..3fea54ee9d 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -16,7 +16,8 @@ - ENTRY(entry_int82) - ASM_CLAC - pushq $0 -- SAVE_VOLATILE type=HYPERCALL_VECTOR compat=1 -+ movl $HYPERCALL_VECTOR, 4(%rsp) -+ SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ - CR4_PV32_RESTORE - - GET_CURRENT(bx) -@@ -60,7 +61,6 @@ compat_test_guest_events: - /* %rbx: struct vcpu */ - compat_process_softirqs: - sti -- andl $~TRAP_regs_partial,UREGS_entry_vector(%rsp) - call do_softirq - jmp compat_test_all_events - -@@ -197,7 +197,8 @@ ENTRY(cstar_enter) - pushq $FLAT_USER_CS32 - pushq %rcx - pushq $0 -- SAVE_VOLATILE TRAP_syscall -+ movl $TRAP_syscall, 4(%rsp) -+ SAVE_ALL - GET_CURRENT(bx) - movq VCPU_domain(%rbx),%rcx - cmpb $0,DOMAIN_is_32bit_pv(%rcx) -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 6066ed8b18..1dd9ccf6a2 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -98,7 +98,8 @@ ENTRY(lstar_enter) - pushq $FLAT_KERNEL_CS64 - pushq %rcx - pushq $0 -- SAVE_VOLATILE TRAP_syscall -+ movl $TRAP_syscall, 4(%rsp) -+ SAVE_ALL - GET_CURRENT(bx) - testb $TF_kernel_mode,VCPU_thread_flags(%rbx) - jz switch_to_kernel -@@ -140,7 +141,6 @@ test_guest_events: - /* %rbx: struct vcpu */ - process_softirqs: - sti -- SAVE_PRESERVED - call do_softirq - jmp test_all_events - -@@ -190,7 +190,8 @@ GLOBAL(sysenter_eflags_saved) - pushq $3 /* ring 3 null cs */ - pushq $0 /* null rip */ - pushq $0 -- SAVE_VOLATILE TRAP_syscall -+ movl $TRAP_syscall, 4(%rsp) -+ SAVE_ALL - GET_CURRENT(bx) - cmpb $0,VCPU_sysenter_disables_events(%rbx) - movq VCPU_sysenter_addr(%rbx),%rax -@@ -207,7 +208,6 @@ UNLIKELY_END(sysenter_nt_set) - leal (,%rcx,TBF_INTERRUPT),%ecx - UNLIKELY_START(z, sysenter_gpf) - movq VCPU_trap_ctxt(%rbx),%rsi -- SAVE_PRESERVED - movl $TRAP_gp_fault,UREGS_entry_vector(%rsp) - movl %eax,TRAPBOUNCE_error_code(%rdx) - movq TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rsi),%rax -@@ -225,7 +225,8 @@ UNLIKELY_END(sysenter_gpf) - ENTRY(int80_direct_trap) - ASM_CLAC - pushq $0 -- SAVE_VOLATILE 0x80 -+ movl $0x80, 4(%rsp) -+ SAVE_ALL - - cmpb $0,untrusted_msi(%rip) - UNLIKELY_START(ne, msi_check) -@@ -253,7 +254,6 @@ int80_slow_path: - * IDT entry with DPL==0. - */ - movl $((0x80 << 3) | X86_XEC_IDT),UREGS_error_code(%rsp) -- SAVE_PRESERVED - movl $TRAP_gp_fault,UREGS_entry_vector(%rsp) - /* A GPF wouldn't have incremented the instruction pointer. */ - subq $2,UREGS_rip(%rsp) -diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c -index 2a326be58e..3652f5ff21 100644 ---- a/xen/arch/x86/x86_64/traps.c -+++ b/xen/arch/x86/x86_64/traps.c -@@ -80,15 +80,10 @@ static void _show_registers( - regs->rbp, regs->rsp, regs->r8); - printk("r9: %016lx r10: %016lx r11: %016lx\n", - regs->r9, regs->r10, regs->r11); -- if ( !(regs->entry_vector & TRAP_regs_partial) ) -- { -- printk("r12: %016lx r13: %016lx r14: %016lx\n", -- regs->r12, regs->r13, regs->r14); -- printk("r15: %016lx cr0: %016lx cr4: %016lx\n", -- regs->r15, crs[0], crs[4]); -- } -- else -- printk("cr0: %016lx cr4: %016lx\n", crs[0], crs[4]); -+ printk("r12: %016lx r13: %016lx r14: %016lx\n", -+ regs->r12, regs->r13, regs->r14); -+ printk("r15: %016lx cr0: %016lx cr4: %016lx\n", -+ regs->r15, crs[0], crs[4]); - printk("cr3: %016lx cr2: %016lx\n", crs[3], crs[2]); - printk("fsb: %016lx gsb: %016lx gss: %016lx\n", - crs[5], crs[6], crs[7]); -diff --git a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c -index cc334ca8f9..c7ba221d11 100644 ---- a/xen/arch/x86/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate.c -@@ -11,7 +11,6 @@ - - #include - #include --#include /* mark_regs_dirty() */ - #include /* current_cpu_info */ - #include - #include /* cpu_has_amd_erratum() */ -diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c -index 54a275664a..820495fb9c 100644 ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -1956,10 +1956,10 @@ decode_register( - case 9: p = ®s->r9; break; - case 10: p = ®s->r10; break; - case 11: p = ®s->r11; break; -- case 12: mark_regs_dirty(regs); p = ®s->r12; break; -- case 13: mark_regs_dirty(regs); p = ®s->r13; break; -- case 14: mark_regs_dirty(regs); p = ®s->r14; break; -- case 15: mark_regs_dirty(regs); p = ®s->r15; break; -+ case 12: p = ®s->r12; break; -+ case 13: p = ®s->r13; break; -+ case 14: p = ®s->r14; break; -+ case 15: p = ®s->r15; break; - #endif - default: BUG(); p = NULL; break; - } -diff --git a/xen/common/wait.c b/xen/common/wait.c -index 9490a17dc2..c5fc094e2c 100644 ---- a/xen/common/wait.c -+++ b/xen/common/wait.c -@@ -127,7 +127,6 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv) - unsigned long dummy; - u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector; - -- cpu_info->guest_cpu_user_regs.entry_vector &= ~TRAP_regs_partial; - ASSERT(wqv->esp == 0); - - /* Save current VCPU affinity; force wakeup on *this* CPU only. */ -diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h -index 388fc93b9d..98192eb4e6 100644 ---- a/xen/include/asm-x86/asm_defns.h -+++ b/xen/include/asm-x86/asm_defns.h -@@ -17,15 +17,6 @@ - void ret_from_intr(void); - #endif - --#ifdef CONFIG_FRAME_POINTER --/* Indicate special exception stack frame by inverting the frame pointer. */ --#define SETUP_EXCEPTION_FRAME_POINTER(offs) \ -- leaq offs(%rsp),%rbp; \ -- notq %rbp --#else --#define SETUP_EXCEPTION_FRAME_POINTER(offs) --#endif -- - #ifndef NDEBUG - #define ASSERT_INTERRUPT_STATUS(x, msg) \ - pushf; \ -@@ -42,31 +33,6 @@ void ret_from_intr(void); - #define ASSERT_INTERRUPTS_DISABLED \ - ASSERT_INTERRUPT_STATUS(z, "INTERRUPTS DISABLED") - --/* -- * This flag is set in an exception frame when registers R12-R15 did not get -- * saved. -- */ --#define _TRAP_regs_partial 16 --#define TRAP_regs_partial (1 << _TRAP_regs_partial) --/* -- * This flag gets set in an exception frame when registers R12-R15 possibly -- * get modified from their originally saved values and hence need to be -- * restored even if the normal call flow would restore register values. -- * -- * The flag being set implies _TRAP_regs_partial to be unset. Restoring -- * R12-R15 thus is -- * - required when this flag is set, -- * - safe when _TRAP_regs_partial is unset. -- */ --#define _TRAP_regs_dirty 17 --#define TRAP_regs_dirty (1 << _TRAP_regs_dirty) -- --#define mark_regs_dirty(r) ({ \ -- struct cpu_user_regs *r__ = (r); \ -- ASSERT(!((r__)->entry_vector & TRAP_regs_partial)); \ -- r__->entry_vector |= TRAP_regs_dirty; \ --}) -- - #ifdef __ASSEMBLY__ - # define _ASM_EX(p) p-. - #else -@@ -236,7 +202,7 @@ static always_inline void stac(void) - #endif - - #ifdef __ASSEMBLY__ --.macro SAVE_ALL op -+.macro SAVE_ALL op, compat=0 - .ifeqs "\op", "CLAC" - ASM_CLAC - .else -@@ -255,40 +221,6 @@ static always_inline void stac(void) - movq %rdx,UREGS_rdx(%rsp) - movq %rcx,UREGS_rcx(%rsp) - movq %rax,UREGS_rax(%rsp) -- movq %r8,UREGS_r8(%rsp) -- movq %r9,UREGS_r9(%rsp) -- movq %r10,UREGS_r10(%rsp) -- movq %r11,UREGS_r11(%rsp) -- movq %rbx,UREGS_rbx(%rsp) -- movq %rbp,UREGS_rbp(%rsp) -- SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp) -- movq %r12,UREGS_r12(%rsp) -- movq %r13,UREGS_r13(%rsp) -- movq %r14,UREGS_r14(%rsp) -- movq %r15,UREGS_r15(%rsp) --.endm -- --/* -- * Save all registers not preserved by C code or used in entry/exit code. Mark -- * the frame as partial. -- * -- * @type: exception type -- * @compat: R8-R15 don't need saving, and the frame nevertheless is complete -- */ --.macro SAVE_VOLATILE type compat=0 --.if \compat -- movl $\type,UREGS_entry_vector-UREGS_error_code(%rsp) --.else -- movl $\type|TRAP_regs_partial,\ -- UREGS_entry_vector-UREGS_error_code(%rsp) --.endif -- addq $-(UREGS_error_code-UREGS_r15),%rsp -- cld -- movq %rdi,UREGS_rdi(%rsp) -- movq %rsi,UREGS_rsi(%rsp) -- movq %rdx,UREGS_rdx(%rsp) -- movq %rcx,UREGS_rcx(%rsp) -- movq %rax,UREGS_rax(%rsp) - .if !\compat - movq %r8,UREGS_r8(%rsp) - movq %r9,UREGS_r9(%rsp) -@@ -297,20 +229,17 @@ static always_inline void stac(void) - .endif - movq %rbx,UREGS_rbx(%rsp) - movq %rbp,UREGS_rbp(%rsp) -- SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp) --.endm -- --/* -- * Complete a frame potentially only partially saved. -- */ --.macro SAVE_PRESERVED -- btrl $_TRAP_regs_partial,UREGS_entry_vector(%rsp) -- jnc 987f -+#ifdef CONFIG_FRAME_POINTER -+/* Indicate special exception stack frame by inverting the frame pointer. */ -+ leaq UREGS_rbp(%rsp), %rbp -+ notq %rbp -+#endif -+.if !\compat - movq %r12,UREGS_r12(%rsp) - movq %r13,UREGS_r13(%rsp) - movq %r14,UREGS_r14(%rsp) - movq %r15,UREGS_r15(%rsp) --987: -+.endif - .endm - - #define LOAD_ONE_REG(reg, compat) \ -@@ -330,7 +259,6 @@ static always_inline void stac(void) - */ - .macro RESTORE_ALL adj=0 compat=0 - .if !\compat -- testl $TRAP_regs_dirty,UREGS_entry_vector(%rsp) - movq UREGS_r11(%rsp),%r11 - movq UREGS_r10(%rsp),%r10 - movq UREGS_r9(%rsp),%r9 -@@ -347,33 +275,16 @@ static always_inline void stac(void) - LOAD_ONE_REG(si, \compat) - LOAD_ONE_REG(di, \compat) - .if !\compat -- jz 987f - movq UREGS_r15(%rsp),%r15 - movq UREGS_r14(%rsp),%r14 - movq UREGS_r13(%rsp),%r13 - movq UREGS_r12(%rsp),%r12 --#ifndef NDEBUG -- .subsection 1 --987: testl $TRAP_regs_partial,UREGS_entry_vector(%rsp) -- jnz 987f -- cmpq UREGS_r15(%rsp),%r15 -- jne 789f -- cmpq UREGS_r14(%rsp),%r14 -- jne 789f -- cmpq UREGS_r13(%rsp),%r13 -- jne 789f -- cmpq UREGS_r12(%rsp),%r12 -- je 987f --789: BUG /* Corruption of partial register state. */ -- .subsection 0 --#endif - .else - xor %r15, %r15 - xor %r14, %r14 - xor %r13, %r13 - xor %r12, %r12 - .endif --987: - LOAD_ONE_REG(bp, \compat) - LOAD_ONE_REG(bx, \compat) - subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp --- -2.14.3 - - -From 57dc197cf0d36c56ba1d9d32c6a1454bb52605bb Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 17 Jan 2018 16:56:03 +0100 -Subject: [PATCH 3/5] x86/mm: Always set _PAGE_ACCESSED on L4e updates - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: bd61fe94bee0556bc2f64999a4a8315b93f90f21 -master date: 2018-01-15 13:53:16 +0000 ---- - xen/arch/x86/pv/mm.h | 18 +++++++++++++++--- - 1 file changed, 15 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/pv/mm.h b/xen/arch/x86/pv/mm.h -index 7502d533c6..976209ba4c 100644 ---- a/xen/arch/x86/pv/mm.h -+++ b/xen/arch/x86/pv/mm.h -@@ -144,9 +144,21 @@ static inline l3_pgentry_t unadjust_guest_l3e(l3_pgentry_t l3e, - static inline l4_pgentry_t adjust_guest_l4e(l4_pgentry_t l4e, - const struct domain *d) - { -- if ( likely(l4e_get_flags(l4e) & _PAGE_PRESENT) && -- likely(!is_pv_32bit_domain(d)) ) -- l4e_add_flags(l4e, _PAGE_USER); -+ /* -+ * When shadowing an L4 behind the guests back (e.g. for per-pcpu -+ * purposes), we cannot efficiently sync access bit updates from hardware -+ * (on the shadow tables) back into the guest view. -+ * -+ * We therefore unconditionally set _PAGE_ACCESSED even in the guests -+ * view. This will appear to the guest as a CPU which proactively pulls -+ * all valid L4e's into its TLB, which is compatible with the x86 ABI. -+ * -+ * At the time of writing, all PV guests set the access bit anyway, so -+ * this is no actual change in their behaviour. -+ */ -+ if ( likely(l4e_get_flags(l4e) & _PAGE_PRESENT) ) -+ l4e_add_flags(l4e, (_PAGE_ACCESSED | -+ (is_pv_32bit_domain(d) ? 0 : _PAGE_USER))); - - return l4e; - } --- -2.14.3 - - -From 234f481337ea1a93db968d614649a6bdfdc8418a Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Wed, 17 Jan 2018 16:56:57 +0100 -Subject: [PATCH 4/5] x86: Meltdown band-aid against malicious 64-bit PV guests - -This is a very simplistic change limiting the amount of memory a running -64-bit PV guest has mapped (and hence available for attacking): Only the -mappings of stack, IDT, and TSS are being cloned from the direct map -into per-CPU page tables. Guest controlled parts of the page tables are -being copied into those per-CPU page tables upon entry into the guest. -Cross-vCPU synchronization of top level page table entry changes is -being effected by forcing other active vCPU-s of the guest into the -hypervisor. - -The change to context_switch() isn't strictly necessary, but there's no -reason to keep switching page tables once a PV guest is being scheduled -out. - -This isn't providing full isolation yet, but it should be covering all -pieces of information exposure of which would otherwise require an XSA. - -There is certainly much room for improvement, especially of performance, -here - first and foremost suppressing all the negative effects on AMD -systems. But in the interest of backportability (including to really old -hypervisors, which may not even have alternative patching) any such is -being left out here. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -master commit: 5784de3e2067ed73efc2fe42e62831e8ae7f46c4 -master date: 2018-01-16 17:49:03 +0100 ---- - xen/arch/x86/domain.c | 5 + - xen/arch/x86/mm.c | 21 ++++ - xen/arch/x86/smpboot.c | 198 +++++++++++++++++++++++++++++++++++++ - xen/arch/x86/x86_64/asm-offsets.c | 2 + - xen/arch/x86/x86_64/compat/entry.S | 11 +++ - xen/arch/x86/x86_64/entry.S | 149 +++++++++++++++++++++++++++- - xen/include/asm-x86/asm_defns.h | 30 ++++++ - xen/include/asm-x86/current.h | 12 +++ - xen/include/asm-x86/processor.h | 1 + - xen/include/asm-x86/x86_64/page.h | 5 +- - 10 files changed, 428 insertions(+), 6 deletions(-) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index b44c95b493..f4a3d7445b 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -1507,6 +1507,9 @@ void paravirt_ctxt_switch_to(struct vcpu *v) - { - unsigned long cr4; - -+ this_cpu(root_pgt)[root_table_offset(PERDOMAIN_VIRT_START)] = -+ l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW); -+ - cr4 = pv_guest_cr4_to_real_cr4(v); - if ( unlikely(cr4 != read_cr4()) ) - write_cr4(cr4); -@@ -1676,6 +1679,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next) - - ASSERT(local_irq_is_enabled()); - -+ get_cpu_info()->xen_cr3 = 0; -+ - cpumask_copy(&dirty_mask, next->vcpu_dirty_cpumask); - /* Allow at most one CPU at a time to be dirty. */ - ASSERT(cpumask_weight(&dirty_mask) <= 1); -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index a7a76a71db..6c7d12034b 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -3509,6 +3509,7 @@ long do_mmu_update( - struct vcpu *curr = current, *v = curr; - struct domain *d = v->domain, *pt_owner = d, *pg_owner; - mfn_t map_mfn = INVALID_MFN; -+ bool sync_guest = false; - uint32_t xsm_needed = 0; - uint32_t xsm_checked = 0; - int rc = put_old_guest_table(curr); -@@ -3663,6 +3664,8 @@ long do_mmu_update( - case PGT_l4_page_table: - rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, v); -+ if ( !rc ) -+ sync_guest = true; - break; - case PGT_writable_page: - perfc_incr(writable_mmu_updates); -@@ -3765,6 +3768,24 @@ long do_mmu_update( - if ( va ) - unmap_domain_page(va); - -+ if ( sync_guest ) -+ { -+ /* -+ * Force other vCPU-s of the affected guest to pick up L4 entry -+ * changes (if any). Issue a flush IPI with empty operation mask to -+ * facilitate this (including ourselves waiting for the IPI to -+ * actually have arrived). Utilize the fact that FLUSH_VA_VALID is -+ * meaningless without FLUSH_CACHE, but will allow to pass the no-op -+ * check in flush_area_mask(). -+ */ -+ unsigned int cpu = smp_processor_id(); -+ cpumask_t *mask = per_cpu(scratch_cpumask, cpu); -+ -+ cpumask_andnot(mask, pt_owner->domain_dirty_cpumask, cpumask_of(cpu)); -+ if ( !cpumask_empty(mask) ) -+ flush_area_mask(mask, ZERO_BLOCK_PTR, FLUSH_VA_VALID); -+ } -+ - perfc_add(num_page_updates, i); - - out: -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 1609b627ae..b1fbb57a81 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -327,6 +327,9 @@ void start_secondary(void *unused) - */ - spin_debug_disable(); - -+ get_cpu_info()->xen_cr3 = 0; -+ get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt)); -+ - load_system_tables(); - - /* Full exception support from here on in. */ -@@ -635,6 +638,187 @@ void cpu_exit_clear(unsigned int cpu) - set_cpu_state(CPU_STATE_DEAD); - } - -+static int clone_mapping(const void *ptr, root_pgentry_t *rpt) -+{ -+ unsigned long linear = (unsigned long)ptr, pfn; -+ unsigned int flags; -+ l3_pgentry_t *pl3e = l4e_to_l3e(idle_pg_table[root_table_offset(linear)]) + -+ l3_table_offset(linear); -+ l2_pgentry_t *pl2e; -+ l1_pgentry_t *pl1e; -+ -+ if ( linear < DIRECTMAP_VIRT_START ) -+ return 0; -+ -+ flags = l3e_get_flags(*pl3e); -+ ASSERT(flags & _PAGE_PRESENT); -+ if ( flags & _PAGE_PSE ) -+ { -+ pfn = (l3e_get_pfn(*pl3e) & ~((1UL << (2 * PAGETABLE_ORDER)) - 1)) | -+ (PFN_DOWN(linear) & ((1UL << (2 * PAGETABLE_ORDER)) - 1)); -+ flags &= ~_PAGE_PSE; -+ } -+ else -+ { -+ pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(linear); -+ flags = l2e_get_flags(*pl2e); -+ ASSERT(flags & _PAGE_PRESENT); -+ if ( flags & _PAGE_PSE ) -+ { -+ pfn = (l2e_get_pfn(*pl2e) & ~((1UL << PAGETABLE_ORDER) - 1)) | -+ (PFN_DOWN(linear) & ((1UL << PAGETABLE_ORDER) - 1)); -+ flags &= ~_PAGE_PSE; -+ } -+ else -+ { -+ pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(linear); -+ flags = l1e_get_flags(*pl1e); -+ if ( !(flags & _PAGE_PRESENT) ) -+ return 0; -+ pfn = l1e_get_pfn(*pl1e); -+ } -+ } -+ -+ if ( !(root_get_flags(rpt[root_table_offset(linear)]) & _PAGE_PRESENT) ) -+ { -+ pl3e = alloc_xen_pagetable(); -+ if ( !pl3e ) -+ return -ENOMEM; -+ clear_page(pl3e); -+ l4e_write(&rpt[root_table_offset(linear)], -+ l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR)); -+ } -+ else -+ pl3e = l4e_to_l3e(rpt[root_table_offset(linear)]); -+ -+ pl3e += l3_table_offset(linear); -+ -+ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) -+ { -+ pl2e = alloc_xen_pagetable(); -+ if ( !pl2e ) -+ return -ENOMEM; -+ clear_page(pl2e); -+ l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR)); -+ } -+ else -+ { -+ ASSERT(!(l3e_get_flags(*pl3e) & _PAGE_PSE)); -+ pl2e = l3e_to_l2e(*pl3e); -+ } -+ -+ pl2e += l2_table_offset(linear); -+ -+ if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) -+ { -+ pl1e = alloc_xen_pagetable(); -+ if ( !pl1e ) -+ return -ENOMEM; -+ clear_page(pl1e); -+ l2e_write(pl2e, l2e_from_paddr(__pa(pl1e), __PAGE_HYPERVISOR)); -+ } -+ else -+ { -+ ASSERT(!(l2e_get_flags(*pl2e) & _PAGE_PSE)); -+ pl1e = l2e_to_l1e(*pl2e); -+ } -+ -+ pl1e += l1_table_offset(linear); -+ -+ if ( l1e_get_flags(*pl1e) & _PAGE_PRESENT ) -+ { -+ ASSERT(l1e_get_pfn(*pl1e) == pfn); -+ ASSERT(l1e_get_flags(*pl1e) == flags); -+ } -+ else -+ l1e_write(pl1e, l1e_from_pfn(pfn, flags)); -+ -+ return 0; -+} -+ -+DEFINE_PER_CPU(root_pgentry_t *, root_pgt); -+ -+static int setup_cpu_root_pgt(unsigned int cpu) -+{ -+ root_pgentry_t *rpt = alloc_xen_pagetable(); -+ unsigned int off; -+ int rc; -+ -+ if ( !rpt ) -+ return -ENOMEM; -+ -+ clear_page(rpt); -+ per_cpu(root_pgt, cpu) = rpt; -+ -+ rpt[root_table_offset(RO_MPT_VIRT_START)] = -+ idle_pg_table[root_table_offset(RO_MPT_VIRT_START)]; -+ /* SH_LINEAR_PT inserted together with guest mappings. */ -+ /* PERDOMAIN inserted during context switch. */ -+ rpt[root_table_offset(XEN_VIRT_START)] = -+ idle_pg_table[root_table_offset(XEN_VIRT_START)]; -+ -+ /* Install direct map page table entries for stack, IDT, and TSS. */ -+ for ( off = rc = 0; !rc && off < STACK_SIZE; off += PAGE_SIZE ) -+ rc = clone_mapping(__va(__pa(stack_base[cpu])) + off, rpt); -+ -+ if ( !rc ) -+ rc = clone_mapping(idt_tables[cpu], rpt); -+ if ( !rc ) -+ rc = clone_mapping(&per_cpu(init_tss, cpu), rpt); -+ -+ return rc; -+} -+ -+static void cleanup_cpu_root_pgt(unsigned int cpu) -+{ -+ root_pgentry_t *rpt = per_cpu(root_pgt, cpu); -+ unsigned int r; -+ -+ if ( !rpt ) -+ return; -+ -+ per_cpu(root_pgt, cpu) = NULL; -+ -+ for ( r = root_table_offset(DIRECTMAP_VIRT_START); -+ r < root_table_offset(HYPERVISOR_VIRT_END); ++r ) -+ { -+ l3_pgentry_t *l3t; -+ unsigned int i3; -+ -+ if ( !(root_get_flags(rpt[r]) & _PAGE_PRESENT) ) -+ continue; -+ -+ l3t = l4e_to_l3e(rpt[r]); -+ -+ for ( i3 = 0; i3 < L3_PAGETABLE_ENTRIES; ++i3 ) -+ { -+ l2_pgentry_t *l2t; -+ unsigned int i2; -+ -+ if ( !(l3e_get_flags(l3t[i3]) & _PAGE_PRESENT) ) -+ continue; -+ -+ ASSERT(!(l3e_get_flags(l3t[i3]) & _PAGE_PSE)); -+ l2t = l3e_to_l2e(l3t[i3]); -+ -+ for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; ++i2 ) -+ { -+ if ( !(l2e_get_flags(l2t[i2]) & _PAGE_PRESENT) ) -+ continue; -+ -+ ASSERT(!(l2e_get_flags(l2t[i2]) & _PAGE_PSE)); -+ free_xen_pagetable(l2e_to_l1e(l2t[i2])); -+ } -+ -+ free_xen_pagetable(l2t); -+ } -+ -+ free_xen_pagetable(l3t); -+ } -+ -+ free_xen_pagetable(rpt); -+} -+ - static void cpu_smpboot_free(unsigned int cpu) - { - unsigned int order, socket = cpu_to_socket(cpu); -@@ -673,6 +857,8 @@ static void cpu_smpboot_free(unsigned int cpu) - free_domheap_page(mfn_to_page(mfn)); - } - -+ cleanup_cpu_root_pgt(cpu); -+ - order = get_order_from_pages(NR_RESERVED_GDT_PAGES); - free_xenheap_pages(per_cpu(gdt_table, cpu), order); - -@@ -728,6 +914,9 @@ static int cpu_smpboot_alloc(unsigned int cpu) - set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); - set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); - -+ if ( setup_cpu_root_pgt(cpu) ) -+ goto oom; -+ - for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1); - i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i ) - if ( cpu_online(i) && cpu_to_node(i) == node ) -@@ -783,6 +972,8 @@ static struct notifier_block cpu_smpboot_nfb = { - - void __init smp_prepare_cpus(unsigned int max_cpus) - { -+ int rc; -+ - register_cpu_notifier(&cpu_smpboot_nfb); - - mtrr_aps_sync_begin(); -@@ -796,6 +987,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) - - stack_base[0] = stack_start; - -+ rc = setup_cpu_root_pgt(0); -+ if ( rc ) -+ panic("Error %d setting up PV root page table\n", rc); -+ get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0)); -+ - set_nr_sockets(); - - socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets); -@@ -864,6 +1060,8 @@ void __init smp_prepare_boot_cpu(void) - #if NR_CPUS > 2 * BITS_PER_LONG - per_cpu(scratch_cpumask, cpu) = &scratch_cpu0mask; - #endif -+ -+ get_cpu_info()->xen_cr3 = 0; - } - - static void -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index e136af6b99..b1a4310974 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -137,6 +137,8 @@ void __dummy__(void) - OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id); - OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); - OFFSET(CPUINFO_cr4, struct cpu_info, cr4); -+ OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3); -+ OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3); - DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); - BLANK(); - -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index 3fea54ee9d..e668f00c36 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -199,6 +199,17 @@ ENTRY(cstar_enter) - pushq $0 - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL -+ -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ neg %rcx -+ jz .Lcstar_cr3_okay -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ neg %rcx -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+.Lcstar_cr3_okay: -+ - GET_CURRENT(bx) - movq VCPU_domain(%rbx),%rcx - cmpb $0,DOMAIN_is_32bit_pv(%rcx) -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 1dd9ccf6a2..fc38874b1f 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -37,6 +37,32 @@ ENTRY(switch_to_kernel) - /* %rbx: struct vcpu, interrupts disabled */ - restore_all_guest: - ASSERT_INTERRUPTS_DISABLED -+ -+ /* Copy guest mappings and switch to per-CPU root page table. */ -+ mov %cr3, %r9 -+ GET_STACK_END(dx) -+ mov STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi -+ movabs $PADDR_MASK & PAGE_MASK, %rsi -+ movabs $DIRECTMAP_VIRT_START, %rcx -+ mov %rdi, %rax -+ and %rsi, %rdi -+ and %r9, %rsi -+ add %rcx, %rdi -+ add %rcx, %rsi -+ mov $ROOT_PAGETABLE_FIRST_XEN_SLOT, %ecx -+ mov root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rsi), %r8 -+ mov %r8, root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rdi) -+ rep movsq -+ mov $ROOT_PAGETABLE_ENTRIES - \ -+ ROOT_PAGETABLE_LAST_XEN_SLOT - 1, %ecx -+ sub $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \ -+ ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rsi -+ sub $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \ -+ ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi -+ rep movsq -+ mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx) -+ write_cr3 rax, rdi, rsi -+ - RESTORE_ALL - testw $TRAP_syscall,4(%rsp) - jz iret_exit_to_guest -@@ -71,6 +97,22 @@ iret_exit_to_guest: - ALIGN - /* No special register assumptions. */ - restore_all_xen: -+ /* -+ * Check whether we need to switch to the per-CPU page tables, in -+ * case we return to late PV exit code (from an NMI or #MC). -+ */ -+ GET_STACK_END(ax) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rax), %rdx -+ mov STACK_CPUINFO_FIELD(pv_cr3)(%rax), %rax -+ test %rdx, %rdx -+ /* -+ * Ideally the condition would be "nsz", but such doesn't exist, -+ * so "g" will have to do. -+ */ -+UNLIKELY_START(g, exit_cr3) -+ write_cr3 rax, rdi, rsi -+UNLIKELY_END(exit_cr3) -+ - RESTORE_ALL adj=8 - iretq - -@@ -100,7 +142,18 @@ ENTRY(lstar_enter) - pushq $0 - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL -- GET_CURRENT(bx) -+ -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ neg %rcx -+ jz .Llstar_cr3_okay -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ neg %rcx -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+.Llstar_cr3_okay: -+ -+ __GET_CURRENT(bx) - testb $TF_kernel_mode,VCPU_thread_flags(%rbx) - jz switch_to_kernel - -@@ -192,7 +245,18 @@ GLOBAL(sysenter_eflags_saved) - pushq $0 - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL -- GET_CURRENT(bx) -+ -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ neg %rcx -+ jz .Lsyse_cr3_okay -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ neg %rcx -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+.Lsyse_cr3_okay: -+ -+ __GET_CURRENT(bx) - cmpb $0,VCPU_sysenter_disables_events(%rbx) - movq VCPU_sysenter_addr(%rbx),%rax - setne %cl -@@ -228,13 +292,23 @@ ENTRY(int80_direct_trap) - movl $0x80, 4(%rsp) - SAVE_ALL - -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ neg %rcx -+ jz .Lint80_cr3_okay -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ neg %rcx -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+.Lint80_cr3_okay: -+ - cmpb $0,untrusted_msi(%rip) - UNLIKELY_START(ne, msi_check) - movl $0x80,%edi - call check_for_unexpected_msi - UNLIKELY_END(msi_check) - -- GET_CURRENT(bx) -+ __GET_CURRENT(bx) - - /* Check that the callback is non-null. */ - leaq VCPU_int80_bounce(%rbx),%rdx -@@ -391,9 +465,27 @@ ENTRY(dom_crash_sync_extable) - - ENTRY(common_interrupt) - SAVE_ALL CLAC -+ -+ GET_STACK_END(14) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -+ mov %rcx, %r15 -+ neg %rcx -+ jz .Lintr_cr3_okay -+ jns .Lintr_cr3_load -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ neg %rcx -+.Lintr_cr3_load: -+ write_cr3 rcx, rdi, rsi -+ xor %ecx, %ecx -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ testb $3, UREGS_cs(%rsp) -+ cmovnz %rcx, %r15 -+.Lintr_cr3_okay: -+ - CR4_PV32_RESTORE - movq %rsp,%rdi - callq do_IRQ -+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - jmp ret_from_intr - - /* No special register assumptions. */ -@@ -411,6 +503,23 @@ ENTRY(page_fault) - /* No special register assumptions. */ - GLOBAL(handle_exception) - SAVE_ALL CLAC -+ -+ GET_STACK_END(14) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -+ mov %rcx, %r15 -+ neg %rcx -+ jz .Lxcpt_cr3_okay -+ jns .Lxcpt_cr3_load -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ neg %rcx -+.Lxcpt_cr3_load: -+ write_cr3 rcx, rdi, rsi -+ xor %ecx, %ecx -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ testb $3, UREGS_cs(%rsp) -+ cmovnz %rcx, %r15 -+.Lxcpt_cr3_okay: -+ - handle_exception_saved: - GET_CURRENT(bx) - testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp) -@@ -475,6 +584,7 @@ handle_exception_saved: - leaq exception_table(%rip),%rdx - PERFC_INCR(exceptions, %rax, %rbx) - callq *(%rdx,%rax,8) -+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - testb $3,UREGS_cs(%rsp) - jz restore_all_xen - leaq VCPU_trap_bounce(%rbx),%rdx -@@ -507,6 +617,7 @@ exception_with_ints_disabled: - rep; movsq # make room for ec/ev - 1: movq UREGS_error_code(%rsp),%rax # ec/ev - movq %rax,UREGS_kernel_sizeof(%rsp) -+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - jmp restore_all_xen # return to fixup code - - /* No special register assumptions. */ -@@ -585,6 +696,17 @@ ENTRY(double_fault) - movl $TRAP_double_fault,4(%rsp) - /* Set AC to reduce chance of further SMAP faults */ - SAVE_ALL STAC -+ -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rbx -+ test %rbx, %rbx -+ jz .Ldblf_cr3_okay -+ jns .Ldblf_cr3_load -+ neg %rbx -+.Ldblf_cr3_load: -+ write_cr3 rbx, rdi, rsi -+.Ldblf_cr3_okay: -+ - movq %rsp,%rdi - call do_double_fault - BUG /* do_double_fault() shouldn't return. */ -@@ -603,10 +725,28 @@ ENTRY(nmi) - movl $TRAP_nmi,4(%rsp) - handle_ist_exception: - SAVE_ALL CLAC -+ -+ GET_STACK_END(14) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -+ mov %rcx, %r15 -+ neg %rcx -+ jz .List_cr3_okay -+ jns .List_cr3_load -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ neg %rcx -+.List_cr3_load: -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+.List_cr3_okay: -+ - CR4_PV32_RESTORE - testb $3,UREGS_cs(%rsp) - jz 1f -- /* Interrupted guest context. Copy the context to stack bottom. */ -+ /* -+ * Interrupted guest context. Clear the restore value for xen_cr3 -+ * and copy the context to stack bottom. -+ */ -+ xor %r15, %r15 - GET_CPUINFO_FIELD(guest_cpu_user_regs,di) - movq %rsp,%rsi - movl $UREGS_kernel_sizeof/8,%ecx -@@ -616,6 +756,7 @@ handle_ist_exception: - movzbl UREGS_entry_vector(%rsp),%eax - leaq exception_table(%rip),%rdx - callq *(%rdx,%rax,8) -+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - cmpb $TRAP_nmi,UREGS_entry_vector(%rsp) - jne ret_from_intr - -diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h -index 98192eb4e6..fb0fee9286 100644 ---- a/xen/include/asm-x86/asm_defns.h -+++ b/xen/include/asm-x86/asm_defns.h -@@ -93,9 +93,30 @@ void ret_from_intr(void); - UNLIKELY_DONE(mp, tag); \ - __UNLIKELY_END(tag) - -+ .equ .Lrax, 0 -+ .equ .Lrcx, 1 -+ .equ .Lrdx, 2 -+ .equ .Lrbx, 3 -+ .equ .Lrsp, 4 -+ .equ .Lrbp, 5 -+ .equ .Lrsi, 6 -+ .equ .Lrdi, 7 -+ .equ .Lr8, 8 -+ .equ .Lr9, 9 -+ .equ .Lr10, 10 -+ .equ .Lr11, 11 -+ .equ .Lr12, 12 -+ .equ .Lr13, 13 -+ .equ .Lr14, 14 -+ .equ .Lr15, 15 -+ - #define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field) - #define GET_STACK_END(reg) \ -+ .if .Lr##reg > 8; \ -+ movq $STACK_SIZE-1, %r##reg; \ -+ .else; \ - movl $STACK_SIZE-1, %e##reg; \ -+ .endif; \ - orq %rsp, %r##reg - - #define GET_CPUINFO_FIELD(field, reg) \ -@@ -177,6 +198,15 @@ void ret_from_intr(void); - #define ASM_STAC ASM_AC(STAC) - #define ASM_CLAC ASM_AC(CLAC) - -+.macro write_cr3 val:req, tmp1:req, tmp2:req -+ mov %cr4, %\tmp1 -+ mov %\tmp1, %\tmp2 -+ and $~X86_CR4_PGE, %\tmp1 -+ mov %\tmp1, %cr4 -+ mov %\val, %cr3 -+ mov %\tmp2, %cr4 -+.endm -+ - #define CR4_PV32_RESTORE \ - 667: ASM_NOP5; \ - .pushsection .altinstr_replacement, "ax"; \ -diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h -index 89849929eb..b929c48c85 100644 ---- a/xen/include/asm-x86/current.h -+++ b/xen/include/asm-x86/current.h -@@ -41,6 +41,18 @@ struct cpu_info { - struct vcpu *current_vcpu; - unsigned long per_cpu_offset; - unsigned long cr4; -+ /* -+ * Of the two following fields the latter is being set to the CR3 value -+ * to be used on the given pCPU for loading whenever 64-bit PV guest -+ * context is being entered. The value never changes once set. -+ * The former is the value to restore when re-entering Xen, if any. IOW -+ * its value being zero means there's nothing to restore. However, its -+ * value can also be negative, indicating to the exit-to-Xen code that -+ * restoring is not necessary, but allowing any nested entry code paths -+ * to still know the value to put back into CR3. -+ */ -+ unsigned long xen_cr3; -+ unsigned long pv_cr3; - /* get_stack_bottom() must be 16-byte aligned */ - }; - -diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h -index 41a8d8c32f..2962e83464 100644 ---- a/xen/include/asm-x86/processor.h -+++ b/xen/include/asm-x86/processor.h -@@ -462,6 +462,7 @@ extern idt_entry_t idt_table[]; - extern idt_entry_t *idt_tables[]; - - DECLARE_PER_CPU(struct tss_struct, init_tss); -+DECLARE_PER_CPU(root_pgentry_t *, root_pgt); - - extern void init_int80_direct_trap(struct vcpu *v); - -diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h -index 6fb7cd5553..05a0334893 100644 ---- a/xen/include/asm-x86/x86_64/page.h -+++ b/xen/include/asm-x86/x86_64/page.h -@@ -24,8 +24,8 @@ - /* These are architectural limits. Current CPUs support only 40-bit phys. */ - #define PADDR_BITS 52 - #define VADDR_BITS 48 --#define PADDR_MASK ((1UL << PADDR_BITS)-1) --#define VADDR_MASK ((1UL << VADDR_BITS)-1) -+#define PADDR_MASK ((_AC(1,UL) << PADDR_BITS) - 1) -+#define VADDR_MASK ((_AC(1,UL) << VADDR_BITS) - 1) - - #define VADDR_TOP_BIT (1UL << (VADDR_BITS - 1)) - #define CANONICAL_MASK (~0UL & ~VADDR_MASK) -@@ -107,6 +107,7 @@ typedef l4_pgentry_t root_pgentry_t; - : (((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) || \ - ((_s) > ROOT_PAGETABLE_LAST_XEN_SLOT))) - -+#define root_table_offset l4_table_offset - #define root_get_pfn l4e_get_pfn - #define root_get_flags l4e_get_flags - #define root_get_intpte l4e_get_intpte --- -2.14.3 - - -From 7cccd6f748ec724cf9408cec6b3ec8e54a8a2c1f Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Wed, 17 Jan 2018 16:57:33 +0100 -Subject: [PATCH 5/5] x86: allow Meltdown band-aid to be disabled - -First of all we don't need it on AMD systems. Additionally allow its use -to be controlled by command line option. For best backportability, this -intentionally doesn't use alternative instruction patching to achieve -the intended effect - while we likely want it, this will be later -follow-up. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -master commit: e871e80c38547d9faefc6604532ba3e985e65873 -master date: 2018-01-16 17:50:59 +0100 ---- - docs/misc/xen-command-line.markdown | 12 ++++++++++++ - xen/arch/x86/domain.c | 7 +++++-- - xen/arch/x86/mm.c | 2 +- - xen/arch/x86/smpboot.c | 17 ++++++++++++++--- - xen/arch/x86/x86_64/entry.S | 2 ++ - 5 files changed, 34 insertions(+), 6 deletions(-) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 781110d4b2..49539b4d1c 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -1849,6 +1849,18 @@ In the case that x2apic is in use, this option switches between physical and - clustered mode. The default, given no hint from the **FADT**, is cluster - mode. - -+### xpti -+> `= ` -+ -+> Default: `false` on AMD hardware -+> Default: `true` everywhere else -+ -+Override default selection of whether to isolate 64-bit PV guest page -+tables. -+ -+** WARNING: Not yet a complete isolation implementation, but better than -+nothing. ** -+ - ### xsave - > `= ` - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index f4a3d7445b..b357b60f73 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -1505,10 +1505,13 @@ void paravirt_ctxt_switch_from(struct vcpu *v) - - void paravirt_ctxt_switch_to(struct vcpu *v) - { -+ root_pgentry_t *root_pgt = this_cpu(root_pgt); - unsigned long cr4; - -- this_cpu(root_pgt)[root_table_offset(PERDOMAIN_VIRT_START)] = -- l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW); -+ if ( root_pgt ) -+ root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] = -+ l4e_from_page(v->domain->arch.perdomain_l3_pg, -+ __PAGE_HYPERVISOR_RW); - - cr4 = pv_guest_cr4_to_real_cr4(v); - if ( unlikely(cr4 != read_cr4()) ) -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 6c7d12034b..53295f85b7 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -3665,7 +3665,7 @@ long do_mmu_update( - rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, v); - if ( !rc ) -- sync_guest = true; -+ sync_guest = this_cpu(root_pgt); - break; - case PGT_writable_page: - perfc_incr(writable_mmu_updates); -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index b1fbb57a81..edf607f5a2 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -328,7 +328,7 @@ void start_secondary(void *unused) - spin_debug_disable(); - - get_cpu_info()->xen_cr3 = 0; -- get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt)); -+ get_cpu_info()->pv_cr3 = this_cpu(root_pgt) ? __pa(this_cpu(root_pgt)) : 0; - - load_system_tables(); - -@@ -736,14 +736,20 @@ static int clone_mapping(const void *ptr, root_pgentry_t *rpt) - return 0; - } - -+static __read_mostly int8_t opt_xpti = -1; -+boolean_param("xpti", opt_xpti); - DEFINE_PER_CPU(root_pgentry_t *, root_pgt); - - static int setup_cpu_root_pgt(unsigned int cpu) - { -- root_pgentry_t *rpt = alloc_xen_pagetable(); -+ root_pgentry_t *rpt; - unsigned int off; - int rc; - -+ if ( !opt_xpti ) -+ return 0; -+ -+ rpt = alloc_xen_pagetable(); - if ( !rpt ) - return -ENOMEM; - -@@ -987,10 +993,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus) - - stack_base[0] = stack_start; - -+ if ( opt_xpti < 0 ) -+ opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD; -+ - rc = setup_cpu_root_pgt(0); - if ( rc ) - panic("Error %d setting up PV root page table\n", rc); -- get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0)); -+ if ( per_cpu(root_pgt, 0) ) -+ get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0)); - - set_nr_sockets(); - -@@ -1062,6 +1072,7 @@ void __init smp_prepare_boot_cpu(void) - #endif - - get_cpu_info()->xen_cr3 = 0; -+ get_cpu_info()->pv_cr3 = 0; - } - - static void -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index fc38874b1f..a8825c89df 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -46,6 +46,7 @@ restore_all_guest: - movabs $DIRECTMAP_VIRT_START, %rcx - mov %rdi, %rax - and %rsi, %rdi -+ jz .Lrag_keep_cr3 - and %r9, %rsi - add %rcx, %rdi - add %rcx, %rsi -@@ -62,6 +63,7 @@ restore_all_guest: - rep movsq - mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx) - write_cr3 rax, rdi, rsi -+.Lrag_keep_cr3: - - RESTORE_ALL - testw $TRAP_syscall,4(%rsp) --- -2.14.3 - -From 05eba93a0a344ec189e71722bd542cdc7949a8a5 Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Thu, 8 Feb 2018 11:45:19 +0100 -Subject: [PATCH] x86: fix GET_STACK_END - -AIUI the purpose of having the .if directive is to make GET_STACK_END -work with any general purpose registers. The code as-is would produce -the wrong result for r8. Fix it. - -Signed-off-by: Wei Liu -Acked-by: Andrew Cooper -master commit: 8155476765a5bdecea1534b46562cf28e0113a9a -master date: 2018-01-25 11:34:17 +0000 ---- - xen/include/asm-x86/asm_defns.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h -index 9cfd79f..61b6d35 100644 ---- a/xen/include/asm-x86/asm_defns.h -+++ b/xen/include/asm-x86/asm_defns.h -@@ -120,7 +120,7 @@ void ret_from_intr(void); - - #define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field) - #define GET_STACK_END(reg) \ -- .if .Lr##reg > 8; \ -+ .if .Lr##reg >= 8; \ - movq $STACK_SIZE-1, %r##reg; \ - .else; \ - movl $STACK_SIZE-1, %e##reg; \ --- -2.1.4 - diff --git a/xsa252.patch b/xsa252.patch deleted file mode 100644 index 8615928..0000000 --- a/xsa252.patch +++ /dev/null @@ -1,27 +0,0 @@ -From: Jan Beulich -Subject: memory: don't implicitly unpin for decrease-reservation - -It very likely was a mistake (copy-and-paste from domain cleanup code) -to implicitly unpin here: The caller should really unpin itself before -(or after, if they so wish) requesting the page to be removed. - -This is XSA-252. - -Reported-by: Jann Horn -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -357,11 +357,6 @@ int guest_remove_page(struct domain *d, - - rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0); - --#ifdef _PGT_pinned -- if ( !rc && test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) -- put_page_and_type(page); --#endif -- - /* - * With the lack of an IOMMU on some platforms, domains with DMA-capable - * device must retrieve the same pfn when the hypercall populate_physmap diff --git a/xsa253.patch b/xsa253.patch deleted file mode 100644 index 19e4269..0000000 --- a/xsa253.patch +++ /dev/null @@ -1,26 +0,0 @@ -From: Andrew Cooper -Subject: x86/msr: Free msr_vcpu_policy during vcpu destruction - -c/s 4187f79dc7 "x86/msr: introduce struct msr_vcpu_policy" introduced a -per-vcpu memory allocation, but failed to free it in the clean vcpu -destruction case. - -This is XSA-253 - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index b17468c..0ae715d 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -382,6 +382,9 @@ void vcpu_destroy(struct vcpu *v) - - vcpu_destroy_fpu(v); - -+ xfree(v->arch.msr); -+ v->arch.msr = NULL; -+ - if ( !is_idle_domain(v->domain) ) - vpmu_destroy(v); - diff --git a/xsa255-1.patch b/xsa255-1.patch deleted file mode 100644 index f8bba9e..0000000 --- a/xsa255-1.patch +++ /dev/null @@ -1,133 +0,0 @@ -From: Jan Beulich -Subject: gnttab/ARM: don't corrupt shared GFN array - -... by writing status GFNs to it. Introduce a second array instead. -Also implement gnttab_status_gmfn() properly now that the information is -suitably being tracked. - -While touching it anyway, remove a misguided (but luckily benign) upper -bound check from gnttab_shared_gmfn(): We should never access beyond the -bounds of that array. - -This is part of XSA-255. - -Signed-off-by: Jan Beulich -Reviewed-by: Stefano Stabellini -Reviewed-by: Andrew Cooper ---- -v3: Don't init the ARM GFN arrays to zero anymore, use INVALID_GFN. -v2: New. - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -3775,6 +3775,7 @@ int gnttab_map_frame(struct domain *d, u - { - int rc = 0; - struct grant_table *gt = d->grant_table; -+ bool status = false; - - grant_write_lock(gt); - -@@ -3785,6 +3786,7 @@ int gnttab_map_frame(struct domain *d, u - (idx & XENMAPIDX_grant_table_status) ) - { - idx &= ~XENMAPIDX_grant_table_status; -+ status = true; - if ( idx < nr_status_frames(gt) ) - *mfn = _mfn(virt_to_mfn(gt->status[idx])); - else -@@ -3802,7 +3804,7 @@ int gnttab_map_frame(struct domain *d, u - } - - if ( !rc ) -- gnttab_set_frame_gfn(gt, idx, gfn); -+ gnttab_set_frame_gfn(gt, status, idx, gfn); - - grant_write_unlock(gt); - ---- a/xen/include/asm-arm/grant_table.h -+++ b/xen/include/asm-arm/grant_table.h -@@ -9,7 +9,8 @@ - #define INITIAL_NR_GRANT_FRAMES 1U - - struct grant_table_arch { -- gfn_t *gfn; -+ gfn_t *shared_gfn; -+ gfn_t *status_gfn; - }; - - void gnttab_clear_flag(unsigned long nr, uint16_t *addr); -@@ -21,7 +22,6 @@ int replace_grant_host_mapping(unsigned - unsigned long new_gpaddr, unsigned int flags); - void gnttab_mark_dirty(struct domain *d, unsigned long l); - #define gnttab_create_status_page(d, t, i) do {} while (0) --#define gnttab_status_gmfn(d, t, i) (0) - #define gnttab_release_host_mappings(domain) 1 - static inline int replace_grant_supported(void) - { -@@ -42,19 +42,35 @@ static inline unsigned int gnttab_dom0_m - - #define gnttab_init_arch(gt) \ - ({ \ -- (gt)->arch.gfn = xzalloc_array(gfn_t, (gt)->max_grant_frames); \ -- ( (gt)->arch.gfn ? 0 : -ENOMEM ); \ -+ unsigned int ngf_ = (gt)->max_grant_frames; \ -+ unsigned int nsf_ = grant_to_status_frames(ngf_); \ -+ \ -+ (gt)->arch.shared_gfn = xmalloc_array(gfn_t, ngf_); \ -+ (gt)->arch.status_gfn = xmalloc_array(gfn_t, nsf_); \ -+ if ( (gt)->arch.shared_gfn && (gt)->arch.status_gfn ) \ -+ { \ -+ while ( ngf_-- ) \ -+ (gt)->arch.shared_gfn[ngf_] = INVALID_GFN; \ -+ while ( nsf_-- ) \ -+ (gt)->arch.status_gfn[nsf_] = INVALID_GFN; \ -+ } \ -+ else \ -+ gnttab_destroy_arch(gt); \ -+ (gt)->arch.shared_gfn ? 0 : -ENOMEM; \ - }) - - #define gnttab_destroy_arch(gt) \ - do { \ -- xfree((gt)->arch.gfn); \ -- (gt)->arch.gfn = NULL; \ -+ xfree((gt)->arch.shared_gfn); \ -+ (gt)->arch.shared_gfn = NULL; \ -+ xfree((gt)->arch.status_gfn); \ -+ (gt)->arch.status_gfn = NULL; \ - } while ( 0 ) - --#define gnttab_set_frame_gfn(gt, idx, gfn) \ -+#define gnttab_set_frame_gfn(gt, st, idx, gfn) \ - do { \ -- (gt)->arch.gfn[idx] = gfn; \ -+ ((st) ? (gt)->arch.status_gfn : (gt)->arch.shared_gfn)[idx] = \ -+ (gfn); \ - } while ( 0 ) - - #define gnttab_create_shared_page(d, t, i) \ -@@ -65,8 +81,10 @@ static inline unsigned int gnttab_dom0_m - } while ( 0 ) - - #define gnttab_shared_gmfn(d, t, i) \ -- ( ((i >= nr_grant_frames(t)) && \ -- (i < (t)->max_grant_frames))? 0 : gfn_x((t)->arch.gfn[i])) -+ gfn_x(((i) >= nr_grant_frames(t)) ? INVALID_GFN : (t)->arch.shared_gfn[i]) -+ -+#define gnttab_status_gmfn(d, t, i) \ -+ gfn_x(((i) >= nr_status_frames(t)) ? INVALID_GFN : (t)->arch.status_gfn[i]) - - #define gnttab_need_iommu_mapping(d) \ - (is_domain_direct_mapped(d) && need_iommu(d)) ---- a/xen/include/asm-x86/grant_table.h -+++ b/xen/include/asm-x86/grant_table.h -@@ -46,7 +46,7 @@ static inline unsigned int gnttab_dom0_m - - #define gnttab_init_arch(gt) 0 - #define gnttab_destroy_arch(gt) do {} while ( 0 ) --#define gnttab_set_frame_gfn(gt, idx, gfn) do {} while ( 0 ) -+#define gnttab_set_frame_gfn(gt, st, idx, gfn) do {} while ( 0 ) - - #define gnttab_create_shared_page(d, t, i) \ - do { \ diff --git a/xsa255-2.patch b/xsa255-2.patch deleted file mode 100644 index 402b6ef..0000000 --- a/xsa255-2.patch +++ /dev/null @@ -1,167 +0,0 @@ -From: Jan Beulich -Subject: gnttab: don't blindly free status pages upon version change - -There may still be active mappings, which would trigger the respective -BUG_ON(). Split the loop into one dealing with the page attributes and -the second (when the first fully passed) freeing the pages. Return an -error if any pages still have pending references. - -This is part of XSA-255. - -Signed-off-by: Jan Beulich -Reviewed-by: Stefano Stabellini -Reviewed-by: Andrew Cooper ---- -v4: Add gprintk(XENLOG_ERR, ...) to domain_crash() invocations. -v3: Call guest_physmap_remove_page() from gnttab_map_frame(), making the - code unconditional at the same time. Re-base over changes to first - patch. -v2: Also deal with translated guests. - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -1636,23 +1636,74 @@ status_alloc_failed: - return -ENOMEM; - } - --static void -+static int - gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt) - { -- int i; -+ unsigned int i; - - for ( i = 0; i < nr_status_frames(gt); i++ ) - { - struct page_info *pg = virt_to_page(gt->status[i]); -+ gfn_t gfn = gnttab_get_frame_gfn(gt, true, i); -+ -+ /* -+ * For translated domains, recovering from failure after partial -+ * changes were made is more complicated than it seems worth -+ * implementing at this time. Hence respective error paths below -+ * crash the domain in such a case. -+ */ -+ if ( paging_mode_translate(d) ) -+ { -+ int rc = gfn_eq(gfn, INVALID_GFN) -+ ? 0 -+ : guest_physmap_remove_page(d, gfn, -+ _mfn(page_to_mfn(pg)), 0); -+ -+ if ( rc ) -+ { -+ gprintk(XENLOG_ERR, -+ "Could not remove status frame %u (GFN %#lx) from P2M\n", -+ i, gfn_x(gfn)); -+ domain_crash(d); -+ return rc; -+ } -+ gnttab_set_frame_gfn(gt, true, i, INVALID_GFN); -+ } - - BUG_ON(page_get_owner(pg) != d); - if ( test_and_clear_bit(_PGC_allocated, &pg->count_info) ) - put_page(pg); -- BUG_ON(pg->count_info & ~PGC_xen_heap); -+ -+ if ( pg->count_info & ~PGC_xen_heap ) -+ { -+ if ( paging_mode_translate(d) ) -+ { -+ gprintk(XENLOG_ERR, -+ "Wrong page state %#lx of status frame %u (GFN %#lx)\n", -+ pg->count_info, i, gfn_x(gfn)); -+ domain_crash(d); -+ } -+ else -+ { -+ if ( get_page(pg, d) ) -+ set_bit(_PGC_allocated, &pg->count_info); -+ while ( i-- ) -+ gnttab_create_status_page(d, gt, i); -+ } -+ return -EBUSY; -+ } -+ -+ page_set_owner(pg, NULL); -+ } -+ -+ for ( i = 0; i < nr_status_frames(gt); i++ ) -+ { - free_xenheap_page(gt->status[i]); - gt->status[i] = NULL; - } - gt->nr_status_frames = 0; -+ -+ return 0; - } - - /* -@@ -2962,8 +3013,9 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA - break; - } - -- if ( op.version < 2 && gt->gt_version == 2 ) -- gnttab_unpopulate_status_frames(currd, gt); -+ if ( op.version < 2 && gt->gt_version == 2 && -+ (res = gnttab_unpopulate_status_frames(currd, gt)) != 0 ) -+ goto out_unlock; - - /* Make sure there's no crud left over from the old version. */ - for ( i = 0; i < nr_grant_frames(gt); i++ ) -@@ -3803,6 +3855,11 @@ int gnttab_map_frame(struct domain *d, u - rc = -EINVAL; - } - -+ if ( !rc && paging_mode_translate(d) && -+ !gfn_eq(gnttab_get_frame_gfn(gt, status, idx), INVALID_GFN) ) -+ rc = guest_physmap_remove_page(d, gnttab_get_frame_gfn(gt, status, idx), -+ *mfn, 0); -+ - if ( !rc ) - gnttab_set_frame_gfn(gt, status, idx, gfn); - ---- a/xen/include/asm-arm/grant_table.h -+++ b/xen/include/asm-arm/grant_table.h -@@ -73,6 +73,11 @@ static inline unsigned int gnttab_dom0_m - (gfn); \ - } while ( 0 ) - -+#define gnttab_get_frame_gfn(gt, st, idx) ({ \ -+ _gfn((st) ? gnttab_status_gmfn(NULL, gt, idx) \ -+ : gnttab_shared_gmfn(NULL, gt, idx)); \ -+}) -+ - #define gnttab_create_shared_page(d, t, i) \ - do { \ - share_xen_page_with_guest( \ ---- a/xen/include/asm-x86/grant_table.h -+++ b/xen/include/asm-x86/grant_table.h -@@ -47,6 +47,12 @@ static inline unsigned int gnttab_dom0_m - #define gnttab_init_arch(gt) 0 - #define gnttab_destroy_arch(gt) do {} while ( 0 ) - #define gnttab_set_frame_gfn(gt, st, idx, gfn) do {} while ( 0 ) -+#define gnttab_get_frame_gfn(gt, st, idx) ({ \ -+ unsigned long mfn_ = (st) ? gnttab_status_mfn(gt, idx) \ -+ : gnttab_shared_mfn(gt, idx); \ -+ unsigned long gpfn_ = get_gpfn_from_mfn(mfn_); \ -+ VALID_M2P(gpfn_) ? _gfn(gpfn_) : INVALID_GFN; \ -+}) - - #define gnttab_create_shared_page(d, t, i) \ - do { \ -@@ -63,11 +69,11 @@ static inline unsigned int gnttab_dom0_m - } while ( 0 ) - - --#define gnttab_shared_mfn(d, t, i) \ -+#define gnttab_shared_mfn(t, i) \ - ((virt_to_maddr((t)->shared_raw[i]) >> PAGE_SHIFT)) - - #define gnttab_shared_gmfn(d, t, i) \ -- (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i))) -+ (mfn_to_gmfn(d, gnttab_shared_mfn(t, i))) - - - #define gnttab_status_mfn(t, i) \ diff --git a/xsa256.patch b/xsa256.patch deleted file mode 100644 index 50ff24e..0000000 --- a/xsa256.patch +++ /dev/null @@ -1,40 +0,0 @@ -From: Andrew Cooper -Subject: x86/hvm: Disallow the creation of HVM domains without Local APIC emulation - -There are multiple problems, not necesserily limited to: - - * Guests which configure event channels via hvmop_set_evtchn_upcall_vector(), - or which hit %cr8 emulation will cause Xen to fall over a NULL vlapic->regs - pointer. - - * On Intel hardware, disabling the TPR_SHADOW execution control without - reenabling CR8_{LOAD,STORE} interception means that the guests %cr8 - accesses interact with the real TPR. Amongst other things, setting the - real TPR to 0xf blocks even IPIs from interrupting this CPU. - - * On hardware which sets up the use of Interrupt Posting, including - IOMMU-Posting, guests run without the appropriate non-root configuration, - which at a minimum will result in dropped interrupts. - -Whether no-LAPIC mode is of any use at all remains to be seen. - -This is XSA-256. - -Reported-by: Ian Jackson -Reviewed-by: Roger Pau Monné -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index f93327b..f65fc12 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -413,7 +413,7 @@ static bool emulation_flags_ok(const struct domain *d, uint32_t emflags) - if ( is_hardware_domain(d) && - emflags != (XEN_X86_EMU_LAPIC|XEN_X86_EMU_IOAPIC) ) - return false; -- if ( !is_hardware_domain(d) && emflags && -+ if ( !is_hardware_domain(d) && - emflags != XEN_X86_EMU_ALL && emflags != XEN_X86_EMU_LAPIC ) - return false; - } diff --git a/xsa258.patch b/xsa258.patch deleted file mode 100644 index 900b78b..0000000 --- a/xsa258.patch +++ /dev/null @@ -1,108 +0,0 @@ -From bf9ab0ec0b632739fe6366391e89a7d4dcf9993b Mon Sep 17 00:00:00 2001 -From: Anthony PERARD -Date: Thu, 8 Mar 2018 18:16:41 +0000 -Subject: [PATCH] libxl: Specify format of inserted cdrom - -Without this extra parameter on the QMP command, QEMU will guess the -format of the new file. - -This is XSA-258. - -Reported-by: Anthony PERARD -Signed-off-by: Anthony PERARD -Acked-by: Ian Jackson ---- - tools/libxl/libxl_device.c | 13 +++++++++++++ - tools/libxl/libxl_dm.c | 17 ++--------------- - tools/libxl/libxl_internal.h | 1 + - tools/libxl/libxl_qmp.c | 2 ++ - 4 files changed, 18 insertions(+), 15 deletions(-) - -diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c -index c60cafe774..a4a8e9ac32 100644 ---- a/tools/libxl/libxl_device.c -+++ b/tools/libxl/libxl_device.c -@@ -462,6 +462,19 @@ char *libxl__device_disk_string_of_backend(libxl_disk_backend backend) - } - } - -+const char *libxl__qemu_disk_format_string(libxl_disk_format format) -+{ -+ switch (format) { -+ case LIBXL_DISK_FORMAT_QCOW: return "qcow"; -+ case LIBXL_DISK_FORMAT_QCOW2: return "qcow2"; -+ case LIBXL_DISK_FORMAT_VHD: return "vpc"; -+ case LIBXL_DISK_FORMAT_RAW: return "raw"; -+ case LIBXL_DISK_FORMAT_EMPTY: return NULL; -+ case LIBXL_DISK_FORMAT_QED: return "qed"; -+ default: return NULL; -+ } -+} -+ - int libxl__device_physdisk_major_minor(const char *physpath, int *major, int *minor) - { - struct stat buf; -diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c -index a3cddce8b7..b51178b9fd 100644 ---- a/tools/libxl/libxl_dm.c -+++ b/tools/libxl/libxl_dm.c -@@ -677,19 +677,6 @@ static int libxl__build_device_model_args_old(libxl__gc *gc, - return 0; - } - --static const char *qemu_disk_format_string(libxl_disk_format format) --{ -- switch (format) { -- case LIBXL_DISK_FORMAT_QCOW: return "qcow"; -- case LIBXL_DISK_FORMAT_QCOW2: return "qcow2"; -- case LIBXL_DISK_FORMAT_VHD: return "vpc"; -- case LIBXL_DISK_FORMAT_RAW: return "raw"; -- case LIBXL_DISK_FORMAT_EMPTY: return NULL; -- case LIBXL_DISK_FORMAT_QED: return "qed"; -- default: return NULL; -- } --} -- - static char *dm_spice_options(libxl__gc *gc, - const libxl_spice_info *spice) - { -@@ -1516,9 +1503,9 @@ static int libxl__build_device_model_args_new(libxl__gc *gc, - * always raw - */ - if (disks[i].backend == LIBXL_DISK_BACKEND_QDISK) -- format = qemu_disk_format_string(disks[i].format); -+ format = libxl__qemu_disk_format_string(disks[i].format); - else -- format = qemu_disk_format_string(LIBXL_DISK_FORMAT_RAW); -+ format = libxl__qemu_disk_format_string(LIBXL_DISK_FORMAT_RAW); - - if (disks[i].format == LIBXL_DISK_FORMAT_EMPTY) { - if (!disks[i].is_cdrom) { -diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h -index 506687fbe9..0812be5376 100644 ---- a/tools/libxl/libxl_internal.h -+++ b/tools/libxl/libxl_internal.h -@@ -1202,6 +1202,7 @@ _hidden int libxl__domain_pvcontrol_write(libxl__gc *gc, xs_transaction_t t, - /* from xl_device */ - _hidden char *libxl__device_disk_string_of_backend(libxl_disk_backend backend); - _hidden char *libxl__device_disk_string_of_format(libxl_disk_format format); -+_hidden const char *libxl__qemu_disk_format_string(libxl_disk_format format); - _hidden int libxl__device_disk_set_backend(libxl__gc*, libxl_device_disk*); - - _hidden int libxl__device_physdisk_major_minor(const char *physpath, int *major, int *minor); -diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c -index eab993aca9..567ed1e772 100644 ---- a/tools/libxl/libxl_qmp.c -+++ b/tools/libxl/libxl_qmp.c -@@ -982,6 +982,8 @@ int libxl__qmp_insert_cdrom(libxl__gc *gc, int domid, - return qmp_run_command(gc, domid, "eject", args, NULL, NULL); - } else { - qmp_parameters_add_string(gc, &args, "target", disk->pdev_path); -+ qmp_parameters_add_string(gc, &args, "arg", -+ libxl__qemu_disk_format_string(disk->format)); - return qmp_run_command(gc, domid, "change", args, NULL, NULL); - } - } --- -2.16.2 - diff --git a/xsa259.patch b/xsa259.patch deleted file mode 100644 index 3d6c3b6..0000000 --- a/xsa259.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: Jan Beulich -Subject: x86: fix slow int80 path after XPTI additions - -For the int80 slow path to jump to handle_exception_saved, %r14 needs to -be set up suitably for XPTI purposes. This is because of the difference -in nature between the int80 path (which is synchronous WRT guest -actions) and the exception path which is potentially asynchronous. - -This is XSA-259. - -Reported-by: Andrew Cooper -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -396,6 +396,12 @@ int80_slow_path: - movl $TRAP_gp_fault,UREGS_entry_vector(%rsp) - /* A GPF wouldn't have incremented the instruction pointer. */ - subq $2,UREGS_rip(%rsp) -+ /* -+ * While we've cleared xen_cr3 above already, normal exception handling -+ * code has logic to restore the original value from %r15. Therefore we -+ * need to set up %r14 here, while %r15 is required to still be zero. -+ */ -+ GET_STACK_END(14) - jmp handle_exception_saved - - /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK: */