From 7c54ce20c6caed4e4a38a26aa38b517156ae3914 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Apr 14 2022 20:06:05 +0000 Subject: update to xen-4.16.1 strip .efi file to help EFI partitions with limited space --- diff --git a/.gitignore b/.gitignore index dcc3690..8f4e85c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ lwip-1.3.0.tar.gz pciutils-2.2.9.tar.bz2 zlib-1.2.3.tar.gz polarssl-1.1.4-gpl.tgz -/xen-4.16.0.tar.gz +/xen-4.16.1.tar.gz diff --git a/sources b/sources index 10bfdea..6a26e23 100644 --- a/sources +++ b/sources @@ -4,4 +4,4 @@ SHA512 (newlib-1.16.0.tar.gz) = 40eb96bbc6736a16b6399e0cdb73e853d0d90b685c967e77 SHA512 (zlib-1.2.3.tar.gz) = 021b958fcd0d346c4ba761bcf0cc40f3522de6186cf5a0a6ea34a70504ce9622b1c2626fce40675bc8282cf5f5ade18473656abc38050f72f5d6480507a2106e SHA512 (polarssl-1.1.4-gpl.tgz) = 88da614e4d3f4409c4fd3bb3e44c7587ba051e3fed4e33d526069a67e8180212e1ea22da984656f50e290049f60ddca65383e5983c0f8884f648d71f698303ad SHA512 (pciutils-2.2.9.tar.bz2) = 2b3d98d027e46d8c08037366dde6f0781ca03c610ef2b380984639e4ef39899ed8d8b8e4cd9c9dc54df101279b95879bd66bfd4d04ad07fef41e847ea7ae32b5 -SHA512 (xen-4.16.0.tar.gz) = 2869ed90d1779c9754d7f2397f5fc67a655304d9c32953ac20655ef96cb154521d8fce9f23915ac0c91f984dc54f72c67e5e619e2da318b5997748f44cf21b87 +SHA512 (xen-4.16.1.tar.gz) = eeabba9c263cd2425bca083e32b5ebfc6c716c00553759c144fd4b6f64a89836b260787fa25ba22c1f5c4ea65aaad7c95b8c2c1070d3377b1c43c9517aa7032a diff --git a/xen.git-08fc03c855c071e9b1aaaa96403f2a90433336a7.patch b/xen.git-08fc03c855c071e9b1aaaa96403f2a90433336a7.patch deleted file mode 100644 index 84514e0..0000000 --- a/xen.git-08fc03c855c071e9b1aaaa96403f2a90433336a7.patch +++ /dev/null @@ -1,94 +0,0 @@ -From: Andrew Cooper -Date: Tue, 25 Jan 2022 17:14:48 +0000 (+0000) -Subject: x86/spec-ctrl: Introduce new has_spec_ctrl boolean -X-Git-Url: http://xenbits.xenproject.org/gitweb/?p=xen.git;a=commitdiff_plain;h=08fc03c855c071e9b1aaaa96403f2a90433336a7 - -x86/spec-ctrl: Introduce new has_spec_ctrl boolean - -Most MSR_SPEC_CTRL setup will be common between Intel and AMD. Instead of -opencoding an OR of two features everywhere, introduce has_spec_ctrl instead. - -Reword the comment above the Intel specific alternatives block to highlight -that it is Intel specific, and pull the setting of default_xen_spec_ctrl.IBRS -out because it will want to be common. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 5d9eff3a312763d889cfbf3c8468b6dfb3ab490c) ---- - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 8a550d0a09..2072daf662 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -927,7 +927,7 @@ static __init void mds_calculations(uint64_t caps) - void __init init_speculation_mitigations(void) - { - enum ind_thunk thunk = THUNK_DEFAULT; -- bool ibrs = false, hw_smt_enabled; -+ bool has_spec_ctrl, ibrs = false, hw_smt_enabled; - bool cpu_has_bug_taa; - uint64_t caps = 0; - -@@ -936,6 +936,8 @@ void __init init_speculation_mitigations(void) - - hw_smt_enabled = check_smt_enabled(); - -+ has_spec_ctrl = boot_cpu_has(X86_FEATURE_IBRSB); -+ - /* - * First, disable the use of retpolines if Xen is using shadow stacks, as - * they are incompatible. -@@ -973,11 +975,11 @@ void __init init_speculation_mitigations(void) - */ - else if ( retpoline_safe(caps) ) - thunk = THUNK_RETPOLINE; -- else if ( boot_cpu_has(X86_FEATURE_IBRSB) ) -+ else if ( has_spec_ctrl ) - ibrs = true; - } - /* Without compiler thunk support, use IBRS if available. */ -- else if ( boot_cpu_has(X86_FEATURE_IBRSB) ) -+ else if ( has_spec_ctrl ) - ibrs = true; - } - -@@ -1008,10 +1010,7 @@ void __init init_speculation_mitigations(void) - else if ( thunk == THUNK_JMP ) - setup_force_cpu_cap(X86_FEATURE_IND_THUNK_JMP); - -- /* -- * If we are on hardware supporting MSR_SPEC_CTRL, see about setting up -- * the alternatives blocks so we can virtualise support for guests. -- */ -+ /* Intel hardware: MSR_SPEC_CTRL alternatives setup. */ - if ( boot_cpu_has(X86_FEATURE_IBRSB) ) - { - if ( opt_msr_sc_pv ) -@@ -1030,11 +1029,12 @@ void __init init_speculation_mitigations(void) - default_spec_ctrl_flags |= SCF_ist_wrmsr; - setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM); - } -- -- if ( ibrs ) -- default_xen_spec_ctrl |= SPEC_CTRL_IBRS; - } - -+ /* If we have IBRS available, see whether we should use it. */ -+ if ( has_spec_ctrl && ibrs ) -+ default_xen_spec_ctrl |= SPEC_CTRL_IBRS; -+ - /* If we have SSBD available, see whether we should use it. */ - if ( boot_cpu_has(X86_FEATURE_SSBD) && opt_ssbd ) - default_xen_spec_ctrl |= SPEC_CTRL_SSBD; -@@ -1268,7 +1268,7 @@ void __init init_speculation_mitigations(void) - * boot won't have any other code running in a position to mount an - * attack. - */ -- if ( boot_cpu_has(X86_FEATURE_IBRSB) ) -+ if ( has_spec_ctrl ) - { - bsp_delay_spec_ctrl = !cpu_has_hypervisor && default_xen_spec_ctrl; - diff --git a/xen.git-21d70feed10571543061abeaedd21ce8adc60114.patch b/xen.git-21d70feed10571543061abeaedd21ce8adc60114.patch deleted file mode 100644 index 2164b76..0000000 --- a/xen.git-21d70feed10571543061abeaedd21ce8adc60114.patch +++ /dev/null @@ -1,125 +0,0 @@ -From: Andrew Cooper -Date: Tue, 25 Jan 2022 12:39:31 +0000 (+0100) -Subject: x86/spec-ctrl: Drop SPEC_CTRL_{ENTRY_FROM,EXIT_TO}_HVM -X-Git-Url: http://xenbits.xenproject.org/gitweb/?p=xen.git;a=commitdiff_plain;h=21d70feed10571543061abeaedd21ce8adc60114 - -x86/spec-ctrl: Drop SPEC_CTRL_{ENTRY_FROM,EXIT_TO}_HVM - -These were written before Spectre/Meltdown went public, and there was large -uncertainty in how the protections would evolve. As it turns out, they're -very specific to Intel hardware, and not very suitable for AMD. - -Drop the macros, opencoding the relevant subset of functionality, and leaving -grep-fodder to locate the logic. No change at all for VT-x. - -For AMD, the only relevant piece of functionality is DO_OVERWRITE_RSB, -although we will soon be adding (different) logic to handle MSR_SPEC_CTRL. - -This has a marginal improvement of removing an unconditional pile of long-nops -from the vmentry/exit path. - -Signed-off-by: Andrew Cooper -Reviewed-by: Roger Pau Monné -master commit: 95b13fa43e0753b7514bef13abe28253e8614f62 -master date: 2022-01-20 16:32:11 +0000 ---- - -diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S -index e208a4b32a..276215d36a 100644 ---- a/xen/arch/x86/hvm/svm/entry.S -+++ b/xen/arch/x86/hvm/svm/entry.S -@@ -59,7 +59,7 @@ __UNLIKELY_END(nsvm_hap) - mov VCPUMSR_spec_ctrl_raw(%rax), %eax - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -- SPEC_CTRL_EXIT_TO_HVM /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ -+ /* SPEC_CTRL_EXIT_TO_SVM (nothing currently) */ - - pop %r15 - pop %r14 -@@ -86,7 +86,8 @@ __UNLIKELY_END(nsvm_hap) - - GET_CURRENT(bx) - -- SPEC_CTRL_ENTRY_FROM_HVM /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ -+ /* SPEC_CTRL_ENTRY_FROM_SVM Req: b=curr %rsp=regs/cpuinfo, Clob: ac */ -+ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - stgi -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index 27c8c5ca49..30139ae58e 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -33,7 +33,9 @@ ENTRY(vmx_asm_vmexit_handler) - movb $1,VCPU_vmx_launched(%rbx) - mov %rax,VCPU_hvm_guest_cr2(%rbx) - -- SPEC_CTRL_ENTRY_FROM_HVM /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ -+ /* SPEC_CTRL_ENTRY_FROM_VMX Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ -+ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM -+ ALTERNATIVE "", DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR_HVM - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - /* Hardware clears MSR_DEBUGCTL on VMExit. Reinstate it if debugging Xen. */ -@@ -80,7 +82,9 @@ UNLIKELY_END(realmode) - mov VCPUMSR_spec_ctrl_raw(%rax), %eax - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -- SPEC_CTRL_EXIT_TO_HVM /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ -+ /* SPEC_CTRL_EXIT_TO_VMX Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ -+ ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM -+ ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), X86_FEATURE_SC_VERW_HVM - - mov VCPU_hvm_guest_cr2(%rbx),%rax - -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index cb34299a86..2b3f123cb5 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -68,14 +68,16 @@ - * - * The following ASM fragments implement this algorithm. See their local - * comments for further details. -- * - SPEC_CTRL_ENTRY_FROM_HVM - * - SPEC_CTRL_ENTRY_FROM_PV - * - SPEC_CTRL_ENTRY_FROM_INTR - * - SPEC_CTRL_ENTRY_FROM_INTR_IST - * - SPEC_CTRL_EXIT_TO_XEN_IST - * - SPEC_CTRL_EXIT_TO_XEN - * - SPEC_CTRL_EXIT_TO_PV -- * - SPEC_CTRL_EXIT_TO_HVM -+ * -+ * Additionally, the following grep-fodder exists to find the HVM logic. -+ * - SPEC_CTRL_ENTRY_FROM_{SVM,VMX} -+ * - SPEC_CTRL_EXIT_TO_{SVM,VMX} - */ - - .macro DO_OVERWRITE_RSB tmp=rax -@@ -225,12 +227,6 @@ - wrmsr - .endm - --/* Use after a VMEXIT from an HVM guest. */ --#define SPEC_CTRL_ENTRY_FROM_HVM \ -- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM; \ -- ALTERNATIVE "", DO_SPEC_CTRL_ENTRY_FROM_HVM, \ -- X86_FEATURE_SC_MSR_HVM -- - /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ - #define SPEC_CTRL_ENTRY_FROM_PV \ - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ -@@ -255,13 +251,6 @@ - ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), \ - X86_FEATURE_SC_VERW_PV - --/* Use when exiting to HVM guest context. */ --#define SPEC_CTRL_EXIT_TO_HVM \ -- ALTERNATIVE "", \ -- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM; \ -- ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), \ -- X86_FEATURE_SC_VERW_HVM -- - /* - * Use in IST interrupt/exception context. May interrupt Xen or PV context. - * Fine grain control of SCF_ist_wrmsr is needed for safety in the S3 resume diff --git a/xen.git-243026a2c5ad64c05281dc8ed2f1f57c0ee5988c.patch b/xen.git-243026a2c5ad64c05281dc8ed2f1f57c0ee5988c.patch deleted file mode 100644 index 5693970..0000000 --- a/xen.git-243026a2c5ad64c05281dc8ed2f1f57c0ee5988c.patch +++ /dev/null @@ -1,32 +0,0 @@ -From: Andrew Cooper -Date: Thu, 6 Jan 2022 13:15:14 +0000 (+0100) -Subject: x86/spec-ctrl: Fix default calculation of opt_srb_lock -X-Git-Url: http://xenbits.xenproject.org/gitweb/?p=xen.git;a=commitdiff_plain;h=243026a2c5ad64c05281dc8ed2f1f57c0ee5988c - -x86/spec-ctrl: Fix default calculation of opt_srb_lock - -Since this logic was introduced, opt_tsx has become more complicated and -shouldn't be compared to 0 directly. While there are no buggy logic paths, -the correct expression is !(opt_tsx & 1) but the rtm_disabled boolean is -easier and clearer to use. - -Fixes: 8fe24090d940 ("x86/cpuid: Rework HLE and RTM handling") -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: 31f3bc97f4508687215e459a5e35676eecf1772b -master date: 2022-01-05 09:44:26 +0000 ---- - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index a5569c7f2b..c18cc8aa49 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1245,7 +1245,7 @@ void __init init_speculation_mitigations(void) - */ - if ( opt_srb_lock == -1 && - (caps & (ARCH_CAPS_MDS_NO|ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO && -- (!cpu_has_hle || ((caps & ARCH_CAPS_TSX_CTRL) && opt_tsx == 0)) ) -+ (!cpu_has_hle || ((caps & ARCH_CAPS_TSX_CTRL) && rtm_disabled)) ) - opt_srb_lock = 0; - - val &= ~MCU_OPT_CTRL_RNGDS_MITG_DIS; diff --git a/xen.git-41e477b4f367269dc1b768a335cfa16f48f7f02f.patch b/xen.git-41e477b4f367269dc1b768a335cfa16f48f7f02f.patch deleted file mode 100644 index 4bd651a..0000000 --- a/xen.git-41e477b4f367269dc1b768a335cfa16f48f7f02f.patch +++ /dev/null @@ -1,209 +0,0 @@ -From: Andrew Cooper -Date: Wed, 19 May 2021 18:40:28 +0000 (+0100) -Subject: x86/spec-ctrl: Clean up MSR_MCU_OPT_CTRL handling -X-Git-Url: http://xenbits.xenproject.org/gitweb/?p=xen.git;a=commitdiff_plain;h=41e477b4f367269dc1b768a335cfa16f48f7f02f - -x86/spec-ctrl: Clean up MSR_MCU_OPT_CTRL handling - -Introduce cpu_has_srbds_ctrl as more users are going to appear shortly. - -MSR_MCU_OPT_CTRL is gaining extra functionality, meaning that the current -default_xen_mcu_opt_ctrl is no longer a good fit. - -Introduce two new helpers, update_mcu_opt_ctrl() which does a full RMW cycle -on the MSR, and set_in_mcu_opt_ctrl() which lets callers configure specific -bits at a time without clobbering each others settings. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 39a40f3835efcc25c1b05a25c321a01d7e11cbd7) ---- - -diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c -index d4bdc3e7df..5eaa77f66a 100644 ---- a/xen/arch/x86/acpi/power.c -+++ b/xen/arch/x86/acpi/power.c -@@ -301,8 +301,7 @@ static int enter_state(u32 state) - ci->last_spec_ctrl = default_xen_spec_ctrl; - } - -- if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) ) -- wrmsrl(MSR_MCU_OPT_CTRL, default_xen_mcu_opt_ctrl); -+ update_mcu_opt_ctrl(); - - /* (re)initialise SYSCALL/SYSENTER state, amongst other things. */ - percpu_traps_init(); -diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c -index 9b011c3446..e7d4dd652f 100644 ---- a/xen/arch/x86/cpu/intel.c -+++ b/xen/arch/x86/cpu/intel.c -@@ -14,6 +14,38 @@ - - #include "cpu.h" - -+/* -+ * MSR_MCU_OPT_CTRL is a collection of unrelated functionality, with separate -+ * enablement requirements, but which want to be consistent across the system. -+ */ -+static uint32_t __read_mostly mcu_opt_ctrl_mask; -+static uint32_t __read_mostly mcu_opt_ctrl_val; -+ -+void update_mcu_opt_ctrl(void) -+{ -+ uint32_t mask = mcu_opt_ctrl_mask, lo, hi; -+ -+ if ( !mask ) -+ return; -+ -+ rdmsr(MSR_MCU_OPT_CTRL, lo, hi); -+ -+ lo &= ~mask; -+ lo |= mcu_opt_ctrl_val; -+ -+ wrmsr(MSR_MCU_OPT_CTRL, lo, hi); -+} -+ -+void __init set_in_mcu_opt_ctrl(uint32_t mask, uint32_t val) -+{ -+ mcu_opt_ctrl_mask |= mask; -+ -+ mcu_opt_ctrl_val &= ~mask; -+ mcu_opt_ctrl_val |= (val & mask); -+ -+ update_mcu_opt_ctrl(); -+} -+ - /* - * Processors which have self-snooping capability can handle conflicting - * memory type across CPUs by snooping its own cache. However, there exists -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 54237c6c6d..2596e4374b 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -384,8 +384,7 @@ void start_secondary(void *unused) - wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl); - info->last_spec_ctrl = default_xen_spec_ctrl; - } -- if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) ) -- wrmsrl(MSR_MCU_OPT_CTRL, default_xen_mcu_opt_ctrl); -+ update_mcu_opt_ctrl(); - - tsx_init(); /* Needs microcode. May change HLE/RTM feature bits. */ - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index ee862089b7..3628b4b415 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -67,7 +67,6 @@ static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */ - static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. */ - - static int8_t __initdata opt_srb_lock = -1; --uint64_t __read_mostly default_xen_mcu_opt_ctrl; - - static int __init parse_spec_ctrl(const char *s) - { -@@ -376,7 +375,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - (default_xen_spec_ctrl & SPEC_CTRL_SSBD) ? " SSBD+" : " SSBD-", - !(caps & ARCH_CAPS_TSX_CTRL) ? "" : - (opt_tsx & 1) ? " TSX+" : " TSX-", -- !boot_cpu_has(X86_FEATURE_SRBDS_CTRL) ? "" : -+ !cpu_has_srbds_ctrl ? "" : - opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-", - opt_ibpb ? " IBPB" : "", - opt_l1d_flush ? " L1D_FLUSH" : "", -@@ -1251,32 +1250,24 @@ void __init init_speculation_mitigations(void) - tsx_init(); - } - -- /* Calculate suitable defaults for MSR_MCU_OPT_CTRL */ -- if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) ) -+ /* -+ * On some SRBDS-affected hardware, it may be safe to relax srb-lock by -+ * default. -+ * -+ * On parts which enumerate MDS_NO and not TAA_NO, TSX is the only known -+ * way to access the Fill Buffer. If TSX isn't available (inc. SKU -+ * reasons on some models), or TSX is explicitly disabled, then there is -+ * no need for the extra overhead to protect RDRAND/RDSEED. -+ */ -+ if ( cpu_has_srbds_ctrl ) - { -- uint64_t val; -- -- rdmsrl(MSR_MCU_OPT_CTRL, val); -- -- /* -- * On some SRBDS-affected hardware, it may be safe to relax srb-lock -- * by default. -- * -- * On parts which enumerate MDS_NO and not TAA_NO, TSX is the only way -- * to access the Fill Buffer. If TSX isn't available (inc. SKU -- * reasons on some models), or TSX is explicitly disabled, then there -- * is no need for the extra overhead to protect RDRAND/RDSEED. -- */ - if ( opt_srb_lock == -1 && - (caps & (ARCH_CAPS_MDS_NO|ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO && - (!cpu_has_hle || ((caps & ARCH_CAPS_TSX_CTRL) && rtm_disabled)) ) - opt_srb_lock = 0; - -- val &= ~MCU_OPT_CTRL_RNGDS_MITG_DIS; -- if ( !opt_srb_lock ) -- val |= MCU_OPT_CTRL_RNGDS_MITG_DIS; -- -- default_xen_mcu_opt_ctrl = val; -+ set_in_mcu_opt_ctrl(MCU_OPT_CTRL_RNGDS_MITG_DIS, -+ opt_srb_lock ? 0 : MCU_OPT_CTRL_RNGDS_MITG_DIS); - } - - print_details(thunk, caps); -@@ -1314,9 +1305,6 @@ void __init init_speculation_mitigations(void) - wrmsrl(MSR_SPEC_CTRL, val); - info->last_spec_ctrl = val; - } -- -- if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) ) -- wrmsrl(MSR_MCU_OPT_CTRL, default_xen_mcu_opt_ctrl); - } - - static void __init __maybe_unused build_assertions(void) -diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h -index ba0fe7c0aa..0ff6d899f9 100644 ---- a/xen/include/asm-x86/cpufeature.h -+++ b/xen/include/asm-x86/cpufeature.h -@@ -133,6 +133,7 @@ - #define cpu_has_avx512_4vnniw boot_cpu_has(X86_FEATURE_AVX512_4VNNIW) - #define cpu_has_avx512_4fmaps boot_cpu_has(X86_FEATURE_AVX512_4FMAPS) - #define cpu_has_avx512_vp2intersect boot_cpu_has(X86_FEATURE_AVX512_VP2INTERSECT) -+#define cpu_has_srbds_ctrl boot_cpu_has(X86_FEATURE_SRBDS_CTRL) - #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) - #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) - #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE) -diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h -index bc4dc69253..3d8aacd3aa 100644 ---- a/xen/include/asm-x86/processor.h -+++ b/xen/include/asm-x86/processor.h -@@ -630,6 +630,9 @@ extern int8_t opt_tsx, cpu_has_tsx_ctrl; - extern bool rtm_disabled; - void tsx_init(void); - -+void update_mcu_opt_ctrl(void); -+void set_in_mcu_opt_ctrl(uint32_t mask, uint32_t val); -+ - enum ap_boot_method { - AP_BOOT_NORMAL, - AP_BOOT_SKINIT, -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index a803d16f90..f760295236 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -54,8 +54,6 @@ extern int8_t opt_pv_l1tf_hwdom, opt_pv_l1tf_domu; - */ - extern paddr_t l1tf_addr_mask, l1tf_safe_maddr; - --extern uint64_t default_xen_mcu_opt_ctrl; -- - static inline void init_shadow_spec_ctrl_state(void) - { - struct cpu_info *info = get_cpu_info(); diff --git a/xen.git-6ef732726add103ee8f63293e326ad43b1643239.patch b/xen.git-6ef732726add103ee8f63293e326ad43b1643239.patch deleted file mode 100644 index c5983a1..0000000 --- a/xen.git-6ef732726add103ee8f63293e326ad43b1643239.patch +++ /dev/null @@ -1,148 +0,0 @@ -From: Andrew Cooper -Date: Fri, 28 Jan 2022 11:57:19 +0000 (+0000) -Subject: x86/spec-ctrl: Record the last write to MSR_SPEC_CTRL -X-Git-Url: http://xenbits.xenproject.org/gitweb/?p=xen.git;a=commitdiff_plain;h=6ef732726add103ee8f63293e326ad43b1643239 - -x86/spec-ctrl: Record the last write to MSR_SPEC_CTRL - -In some cases, writes to MSR_SPEC_CTRL do not have interesting side effects, -and we should implement lazy context switching like we do with other MSRs. - -In the short term, this will be used by the SVM infrastructure, but I expect -to extend it to other contexts in due course. - -Introduce cpu_info.last_spec_ctrl for the purpose, and cache writes made from -the boot/resume paths. The value can't live in regular per-cpu data when it -is eventually used for PV guests when XPTI might be active. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 00f2992b6c7a9d4090443c1a85bf83224a87eeb9) ---- - -diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c -index 0837a3ead4..bac9c16389 100644 ---- a/xen/arch/x86/acpi/power.c -+++ b/xen/arch/x86/acpi/power.c -@@ -296,7 +296,10 @@ static int enter_state(u32 state) - ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_ist_wrmsr); - - if ( boot_cpu_has(X86_FEATURE_IBRSB) ) -+ { - wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl); -+ ci->last_spec_ctrl = default_xen_spec_ctrl; -+ } - - if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) ) - wrmsrl(MSR_MCU_OPT_CTRL, default_xen_mcu_opt_ctrl); -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index da47cdea14..369691dd13 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -1944,9 +1944,12 @@ void __init noreturn __start_xen(unsigned long mbi_p) - - if ( bsp_delay_spec_ctrl ) - { -- get_cpu_info()->spec_ctrl_flags &= ~SCF_use_shadow; -+ struct cpu_info *info = get_cpu_info(); -+ -+ info->spec_ctrl_flags &= ~SCF_use_shadow; - barrier(); - wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl); -+ info->last_spec_ctrl = default_xen_spec_ctrl; - } - - /* Jump to the 1:1 virtual mappings of cpu0_stack. */ -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 329cfdb6c9..ee3e86cc78 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -322,6 +322,8 @@ static void set_cpu_sibling_map(unsigned int cpu) - - void start_secondary(void *unused) - { -+ struct cpu_info *info = get_cpu_info(); -+ - /* - * Dont put anything before smp_callin(), SMP booting is so fragile that we - * want to limit the things done here to the most necessary things. -@@ -378,7 +380,10 @@ void start_secondary(void *unused) - * microcode. - */ - if ( boot_cpu_has(X86_FEATURE_IBRSB) ) -+ { - wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl); -+ info->last_spec_ctrl = default_xen_spec_ctrl; -+ } - if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) ) - wrmsrl(MSR_MCU_OPT_CTRL, default_xen_mcu_opt_ctrl); - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 2072daf662..b2fd86ebe5 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1270,6 +1270,9 @@ void __init init_speculation_mitigations(void) - */ - if ( has_spec_ctrl ) - { -+ struct cpu_info *info = get_cpu_info(); -+ unsigned int val; -+ - bsp_delay_spec_ctrl = !cpu_has_hypervisor && default_xen_spec_ctrl; - - /* -@@ -1278,15 +1281,16 @@ void __init init_speculation_mitigations(void) - */ - if ( bsp_delay_spec_ctrl ) - { -- struct cpu_info *info = get_cpu_info(); -- - info->shadow_spec_ctrl = 0; - barrier(); - info->spec_ctrl_flags |= SCF_use_shadow; - barrier(); - } - -- wrmsrl(MSR_SPEC_CTRL, bsp_delay_spec_ctrl ? 0 : default_xen_spec_ctrl); -+ val = bsp_delay_spec_ctrl ? 0 : default_xen_spec_ctrl; -+ -+ wrmsrl(MSR_SPEC_CTRL, val); -+ info->last_spec_ctrl = val; - } - - if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) ) -diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h -index a74ad4bc4c..8ea4aecc5e 100644 ---- a/xen/include/asm-x86/current.h -+++ b/xen/include/asm-x86/current.h -@@ -56,6 +56,7 @@ struct cpu_info { - /* See asm-x86/spec_ctrl_asm.h for usage. */ - unsigned int shadow_spec_ctrl; - uint8_t xen_spec_ctrl; -+ uint8_t last_spec_ctrl; - uint8_t spec_ctrl_flags; - - /* -@@ -73,7 +74,6 @@ struct cpu_info { - */ - bool use_pv_cr3; - -- unsigned long __pad; - /* get_stack_bottom() must be 16-byte aligned */ - }; - -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index bf82528a12..9c0c7622c4 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -67,6 +67,10 @@ - * steps 2 and 6 will restore the shadow value rather than leaving Xen's value - * loaded and corrupting the value used in guest context. - * -+ * Additionally, in some cases it is safe to skip writes to MSR_SPEC_CTRL when -+ * we don't require any of the side effects of an identical write. Maintain a -+ * per-cpu last_spec_ctrl value for this purpose. -+ * - * The following ASM fragments implement this algorithm. See their local - * comments for further details. - * - SPEC_CTRL_ENTRY_FROM_PV diff --git a/xen.git-72ef02da23861f686c349a6808b2f4c9adc15f9f.patch b/xen.git-72ef02da23861f686c349a6808b2f4c9adc15f9f.patch deleted file mode 100644 index 3231b6a..0000000 --- a/xen.git-72ef02da23861f686c349a6808b2f4c9adc15f9f.patch +++ /dev/null @@ -1,45 +0,0 @@ -From: Andrew Cooper -Date: Fri, 28 Jan 2022 12:03:42 +0000 (+0000) -Subject: x86/spec-ctrl: Don't use spec_ctrl_{enter,exit}_idle() for S3 -X-Git-Url: http://xenbits.xenproject.org/gitweb/?p=xen.git;a=commitdiff_plain;h=72ef02da23861f686c349a6808b2f4c9adc15f9f - -x86/spec-ctrl: Don't use spec_ctrl_{enter,exit}_idle() for S3 - -'idle' here refers to hlt/mwait. The S3 path isn't an idle path - it is a -platform reset. - -We need to load default_xen_spec_ctrl unilaterally on the way back up. -Currently it happens as a side effect of X86_FEATURE_SC_MSR_IDLE or the next -return-to-guest, but that's fragile behaviour. - -Conversely, there is no need to clear IBRS and flush the store buffers on the -way down; we're microseconds away from cutting power. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 71fac402e05ade7b0af2c34f77517449f6f7e2c1) ---- - -diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c -index 31a56f02d0..0837a3ead4 100644 ---- a/xen/arch/x86/acpi/power.c -+++ b/xen/arch/x86/acpi/power.c -@@ -248,7 +248,6 @@ static int enter_state(u32 state) - error = 0; - - ci = get_cpu_info(); -- spec_ctrl_enter_idle(ci); - /* Avoid NMI/#MC using MSR_SPEC_CTRL until we've reloaded microcode. */ - ci->spec_ctrl_flags &= ~SCF_ist_wrmsr; - -@@ -295,7 +294,9 @@ static int enter_state(u32 state) - - /* Re-enabled default NMI/#MC use of MSR_SPEC_CTRL. */ - ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_ist_wrmsr); -- spec_ctrl_exit_idle(ci); -+ -+ if ( boot_cpu_has(X86_FEATURE_IBRSB) ) -+ wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl); - - if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) ) - wrmsrl(MSR_MCU_OPT_CTRL, default_xen_mcu_opt_ctrl); diff --git a/xen.git-7f34b6a895d10744bab32fc843246c45da444d8b.patch b/xen.git-7f34b6a895d10744bab32fc843246c45da444d8b.patch deleted file mode 100644 index 8ce520d..0000000 --- a/xen.git-7f34b6a895d10744bab32fc843246c45da444d8b.patch +++ /dev/null @@ -1,62 +0,0 @@ -From: Andrew Cooper -Date: Tue, 25 Jan 2022 16:09:59 +0000 (+0000) -Subject: x86/spec-ctrl: Drop use_spec_ctrl boolean -X-Git-Url: http://xenbits.xenproject.org/gitweb/?p=xen.git;a=commitdiff_plain;h=7f34b6a895d10744bab32fc843246c45da444d8b - -x86/spec-ctrl: Drop use_spec_ctrl boolean - -Several bugfixes have reduced the utility of this variable from it's original -purpose, and now all it does is aid in the setup of SCF_ist_wrmsr. - -Simplify the logic by drop the variable, and doubling up the setting of -SCF_ist_wrmsr for the PV and HVM blocks, which will make the AMD SPEC_CTRL -support easier to follow. Leave a comment explaining why SCF_ist_wrmsr is -still necessary for the VMExit case. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit ec083bf552c35e10347449e21809f4780f8155d2) ---- - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index c18cc8aa49..8a550d0a09 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -927,7 +927,7 @@ static __init void mds_calculations(uint64_t caps) - void __init init_speculation_mitigations(void) - { - enum ind_thunk thunk = THUNK_DEFAULT; -- bool use_spec_ctrl = false, ibrs = false, hw_smt_enabled; -+ bool ibrs = false, hw_smt_enabled; - bool cpu_has_bug_taa; - uint64_t caps = 0; - -@@ -1016,19 +1016,21 @@ void __init init_speculation_mitigations(void) - { - if ( opt_msr_sc_pv ) - { -- use_spec_ctrl = true; -+ default_spec_ctrl_flags |= SCF_ist_wrmsr; - setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV); - } - - if ( opt_msr_sc_hvm ) - { -- use_spec_ctrl = true; -+ /* -+ * While the guest MSR_SPEC_CTRL value is loaded/saved atomically, -+ * Xen's value is not restored atomically. An early NMI hitting -+ * the VMExit path needs to restore Xen's value for safety. -+ */ -+ default_spec_ctrl_flags |= SCF_ist_wrmsr; - setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM); - } - -- if ( use_spec_ctrl ) -- default_spec_ctrl_flags |= SCF_ist_wrmsr; -- - if ( ibrs ) - default_xen_spec_ctrl |= SPEC_CTRL_IBRS; - } diff --git a/xen.git-84977e8b53935de9a1123f677213f1b146843a0e.patch b/xen.git-84977e8b53935de9a1123f677213f1b146843a0e.patch deleted file mode 100644 index 1b73e73..0000000 --- a/xen.git-84977e8b53935de9a1123f677213f1b146843a0e.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 84977e8b53935de9a1123f677213f1b146843a0e Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Thu, 6 Jan 2022 14:11:23 +0100 -Subject: [PATCH] VT-d: don't leak domid mapping on error path - -While domain_context_mapping() invokes domain_context_unmap() in a sub- -case of handling DEV_TYPE_PCI when encountering an error, thus avoiding -a leak, individual calls to domain_context_mapping_one() aren't -similarly covered. Such a leak might persist until domain destruction. -Leverage that these cases can be recognized by pdev being non-NULL. - -Fixes: dec403cc668f ("VT-d: fix iommu_domid for PCI/PCIx devices assignment") -Signed-off-by: Jan Beulich -Reviewed-by: Kevin Tian -master commit: e6252a51faf42c892eb5fc71f8a2617580832196 -master date: 2021-11-24 11:07:11 +0100 ---- - xen/drivers/passthrough/vtd/iommu.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c -index de11c258ca..3b37bad25e 100644 ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -1517,7 +1517,12 @@ int domain_context_mapping_one( - rc = me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC); - - if ( rc ) -- domain_context_unmap_one(domain, iommu, bus, devfn); -+ { -+ ret = domain_context_unmap_one(domain, iommu, bus, devfn); -+ -+ if ( !ret && pdev && pdev->devfn == devfn ) -+ check_cleanup_domid_map(domain, pdev, iommu); -+ } - - return rc; - } --- -2.30.2 - diff --git a/xen.git-b2db518e952c3a8fe5b9ec6a2d007cda73fd05a4.patch b/xen.git-b2db518e952c3a8fe5b9ec6a2d007cda73fd05a4.patch deleted file mode 100644 index 0b2d15a..0000000 --- a/xen.git-b2db518e952c3a8fe5b9ec6a2d007cda73fd05a4.patch +++ /dev/null @@ -1,36 +0,0 @@ -From b2db518e952c3a8fe5b9ec6a2d007cda73fd05a4 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Thu, 10 Mar 2022 09:43:50 +0100 -Subject: [PATCH] VT-d: drop undue address-of from check_cleanup_domid_map() -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -For an unknown reason I added back the operator while backporting, -despite 4.16 having c06e3d810314 ("VT-d: per-domain IOMMU bitmap needs -to have dynamic size"). I can only assume that I mistakenly took the -4.15 backport as basis and/or reference. - -Fixes: fa45f6b5560e ("VT-d: split domid map cleanup check into a function") -Signed-off-by: Jan Beulich -Reviewed-by: Roger Pau Monné ---- - xen/drivers/passthrough/vtd/iommu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c -index 3b37bad25e..ead12db6a4 100644 ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -197,7 +197,7 @@ static void check_cleanup_domid_map(struct domain *d, - - if ( !found ) - { -- clear_bit(iommu->index, &dom_iommu(d)->arch.vtd.iommu_bitmap); -+ clear_bit(iommu->index, dom_iommu(d)->arch.vtd.iommu_bitmap); - cleanup_domid_map(d, iommu); - } - } --- -2.30.2 - diff --git a/xen.git-fa45f6b5560e738955993fe061a04d64c6f71c14.patch b/xen.git-fa45f6b5560e738955993fe061a04d64c6f71c14.patch deleted file mode 100644 index a5fad75..0000000 --- a/xen.git-fa45f6b5560e738955993fe061a04d64c6f71c14.patch +++ /dev/null @@ -1,142 +0,0 @@ -From: Jan Beulich -Date: Thu, 6 Jan 2022 13:10:05 +0000 (+0100) -Subject: VT-d: split domid map cleanup check into a function -X-Git-Url: http://xenbits.xenproject.org/gitweb/?p=xen.git;a=commitdiff_plain;h=fa45f6b5560e738955993fe061a04d64c6f71c14 - -VT-d: split domid map cleanup check into a function - -This logic will want invoking from elsewhere. - -No functional change intended. - -Signed-off-by: Jan Beulich -Reviewed-by: Roger Pau Monné -Reviewed-by: Kevin Tian -master commit: 9fdc10abe9457e4c9879a266f82372cb08e88ffb -master date: 2021-11-24 11:06:20 +0100 ---- - -diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c -index f9ce402f22..de11c258ca 100644 ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -157,6 +157,51 @@ static void cleanup_domid_map(struct domain *domain, struct vtd_iommu *iommu) - } - } - -+static bool any_pdev_behind_iommu(const struct domain *d, -+ const struct pci_dev *exclude, -+ const struct vtd_iommu *iommu) -+{ -+ const struct pci_dev *pdev; -+ -+ for_each_pdev ( d, pdev ) -+ { -+ const struct acpi_drhd_unit *drhd; -+ -+ if ( pdev == exclude ) -+ continue; -+ -+ drhd = acpi_find_matched_drhd_unit(pdev); -+ if ( drhd && drhd->iommu == iommu ) -+ return true; -+ } -+ -+ return false; -+} -+ -+/* -+ * If no other devices under the same iommu owned by this domain, -+ * clear iommu in iommu_bitmap and clear domain_id in domid_bitmap. -+ */ -+static void check_cleanup_domid_map(struct domain *d, -+ const struct pci_dev *exclude, -+ struct vtd_iommu *iommu) -+{ -+ bool found = any_pdev_behind_iommu(d, exclude, iommu); -+ -+ /* -+ * Hidden devices are associated with DomXEN but usable by the hardware -+ * domain. Hence they need considering here as well. -+ */ -+ if ( !found && is_hardware_domain(d) ) -+ found = any_pdev_behind_iommu(dom_xen, exclude, iommu); -+ -+ if ( !found ) -+ { -+ clear_bit(iommu->index, &dom_iommu(d)->arch.vtd.iommu_bitmap); -+ cleanup_domid_map(d, iommu); -+ } -+} -+ - static void sync_cache(const void *addr, unsigned int size) - { - static unsigned long clflush_size = 0; -@@ -1674,27 +1719,6 @@ int domain_context_unmap_one( - return rc; - } - --static bool any_pdev_behind_iommu(const struct domain *d, -- const struct pci_dev *exclude, -- const struct vtd_iommu *iommu) --{ -- const struct pci_dev *pdev; -- -- for_each_pdev ( d, pdev ) -- { -- const struct acpi_drhd_unit *drhd; -- -- if ( pdev == exclude ) -- continue; -- -- drhd = acpi_find_matched_drhd_unit(pdev); -- if ( drhd && drhd->iommu == iommu ) -- return true; -- } -- -- return false; --} -- - static int domain_context_unmap(struct domain *domain, u8 devfn, - struct pci_dev *pdev) - { -@@ -1703,7 +1727,6 @@ static int domain_context_unmap(struct domain *domain, u8 devfn, - int ret; - uint16_t seg = pdev->seg; - uint8_t bus = pdev->bus, tmp_bus, tmp_devfn, secbus; -- bool found; - - switch ( pdev->type ) - { -@@ -1779,28 +1802,10 @@ static int domain_context_unmap(struct domain *domain, u8 devfn, - return -EINVAL; - } - -- if ( ret || QUARANTINE_SKIP(domain) || pdev->devfn != devfn ) -- return ret; -+ if ( !ret && !QUARANTINE_SKIP(domain) && pdev->devfn == devfn ) -+ check_cleanup_domid_map(domain, pdev, iommu); - -- /* -- * If no other devices under the same iommu owned by this domain, -- * clear iommu in iommu_bitmap and clear domain_id in domid_bitmap. -- */ -- found = any_pdev_behind_iommu(domain, pdev, iommu); -- /* -- * Hidden devices are associated with DomXEN but usable by the hardware -- * domain. Hence they need considering here as well. -- */ -- if ( !found && is_hardware_domain(domain) ) -- found = any_pdev_behind_iommu(dom_xen, pdev, iommu); -- -- if ( !found ) -- { -- clear_bit(iommu->index, dom_iommu(domain)->arch.vtd.iommu_bitmap); -- cleanup_domid_map(domain, iommu); -- } -- -- return 0; -+ return ret; - } - - static void iommu_clear_root_pgtable(struct domain *d) diff --git a/xen.spec b/xen.spec index d14f502..01ae49b 100644 --- a/xen.spec +++ b/xen.spec @@ -54,8 +54,8 @@ Summary: Xen is a virtual machine monitor Name: xen -Version: 4.16.0 -Release: 6%{?dist} +Version: 4.16.1 +Release: 1%{?dist} License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz @@ -109,41 +109,8 @@ Patch41: xen.gcc9.fixes.patch Patch42: xen.gcc10.fixes.patch Patch43: xen.gcc11.fixes.patch Patch44: xsa376.patch -Patch45: xsa393.patch -Patch46: xsa394.patch -Patch47: xsa395.patch -Patch48: xen.gcc12.fixes.patch -Patch49: xsa398-4.16-1-xen-arm-Introduce-new-Arm-processors.patch -Patch50: xsa398-4.16-2-xen-arm-move-errata-CSV2-check-earlier.patch -Patch51: xsa398-4.16-3-xen-arm-Add-ECBHB-and-CLEARBHB-ID-fields.patch -Patch52: xsa398-4.16-4-xen-arm-Add-Spectre-BHB-handling.patch -Patch53: xsa398-4.16-5-xen-arm-Allow-to-discover-and-use-SMCCC_ARCH_WORKARO.patch -Patch54: xen.git-7f34b6a895d10744bab32fc843246c45da444d8b.patch -Patch55: xen.git-08fc03c855c071e9b1aaaa96403f2a90433336a7.patch -Patch56: xen.git-21d70feed10571543061abeaedd21ce8adc60114.patch -Patch57: xen.git-72ef02da23861f686c349a6808b2f4c9adc15f9f.patch -Patch58: xen.git-6ef732726add103ee8f63293e326ad43b1643239.patch -Patch59: xen.git-243026a2c5ad64c05281dc8ed2f1f57c0ee5988c.patch -Patch60: xen.git-41e477b4f367269dc1b768a335cfa16f48f7f02f.patch -Patch61: xsa398-4.16-6-x86-spec-ctrl-Cease-using-thunk-lfence-on-AMD.patch -Patch62: xen.efi.build.patch -Patch63: xsa397-4.16.patch -Patch64: xsa399-4.16.patch -Patch65: xen.git-fa45f6b5560e738955993fe061a04d64c6f71c14.patch -Patch66: xen.git-84977e8b53935de9a1123f677213f1b146843a0e.patch -Patch67: xen.git-b2db518e952c3a8fe5b9ec6a2d007cda73fd05a4.patch -Patch68: xsa400-4.16-01.patch -Patch69: xsa400-4.16-02.patch -Patch70: xsa400-4.16-03.patch -Patch71: xsa400-4.16-04.patch -Patch72: xsa400-4.16-05.patch -Patch73: xsa400-4.16-06.patch -Patch74: xsa400-4.16-07.patch -Patch75: xsa400-4.16-08.patch -Patch76: xsa400-4.16-09.patch -Patch77: xsa400-4.16-10.patch -Patch78: xsa400-4.16-11.patch -Patch79: xsa400-4.16-12.patch +Patch45: xen.gcc12.fixes.patch +Patch46: xen.efi.build.patch %if %build_qemutrad @@ -356,39 +323,6 @@ manage Xen virtual machines. %patch44 -p1 %patch45 -p1 %patch46 -p1 -%patch47 -p1 -%patch48 -p1 -%patch49 -p1 -%patch50 -p1 -%patch51 -p1 -%patch52 -p1 -%patch53 -p1 -%patch54 -p1 -%patch55 -p1 -%patch56 -p1 -%patch57 -p1 -%patch58 -p1 -%patch59 -p1 -%patch60 -p1 -%patch61 -p1 -%patch62 -p1 -%patch63 -p1 -%patch64 -p1 -%patch65 -p1 -%patch66 -p1 -%patch67 -p1 -%patch68 -p1 -%patch69 -p1 -%patch70 -p1 -%patch71 -p1 -%patch72 -p1 -%patch73 -p1 -%patch74 -p1 -%patch75 -p1 -%patch76 -p1 -%patch77 -p1 -%patch78 -p1 -%patch79 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -559,10 +493,14 @@ rm -rf %{buildroot}/%{_libdir}/*.a rm -f %{buildroot}/usr/lib64/efi/xen-%{hv_abi}.efi rm -f %{buildroot}/usr/lib64/efi/xen-4.efi rm -f %{buildroot}/usr/lib64/efi/xen.efi +cp -p %{buildroot}/usr/lib64/efi/xen-%{version}{,.notstripped}.efi +strip -s %{buildroot}/usr/lib64/efi/xen-%{version}.efi %else rm -f %{buildroot}/%{_libdir}/efi/xen-%{hv_abi}.efi rm -f %{buildroot}/%{_libdir}/efi/xen-4.efi rm -f %{buildroot}/%{_libdir}/efi/xen.efi +cp -p %{buildroot}/%{_libdir}/efi/xen-%{version}{,.notstripped}.efi +strip -s %{buildroot}/%{_libdir}/efi/xen-%{version}.efi %endif %endif @@ -1000,6 +938,12 @@ fi %endif %changelog +* Thu Apr 14 2022 Michael Young - 4.16.1-1 +- update to xen-4.16.1 + remove or adjust patches now included or superceded upstream + renumber patches +- strip .efi file to help EFI partitions with limited space + * Tue Apr 05 2022 Michael Young - 4.16.0-6 - Racy interactions between dirty vram tracking and paging log dirty hypercalls [XSA-397, CVE-2022-26356] diff --git a/xsa393.patch b/xsa393.patch deleted file mode 100644 index 57af36b..0000000 --- a/xsa393.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 7ff58ab770157a03c92604155a0c745bcab834c2 Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Tue, 14 Dec 2021 09:53:44 +0000 -Subject: [PATCH] xen/arm: p2m: Always clear the P2M entry when the mapping is - removed - -Commit 2148a125b73b ("xen/arm: Track page accessed between batch of -Set/Way operations") allowed an entry to be invalid from the CPU PoV -(lpae_is_valid()) but valid for Xen (p2m_is_valid()). This is useful -to track which page is accessed and only perform an action on them -(e.g. clean & invalidate the cache after a set/way instruction). - -Unfortunately, __p2m_set_entry() is only zeroing the P2M entry when -lpae_is_valid() returns true. This means the entry will not be zeroed -if the entry was valid from Xen PoV but invalid from the CPU PoV for -tracking purpose. - -As a consequence, this will allow a domain to continue to access the -page after it was removed. - -Resolve the issue by always zeroing the entry if it the LPAE bit is -set or the entry is about to be removed. - -This is CVE-2022-23033 / XSA-393. - -Reported-by: Dmytro Firsov -Fixes: 2148a125b73b ("xen/arm: Track page accessed between batch of Set/Way operations") -Reviewed-by: Stefano Stabellini -Signed-off-by: Julien Grall ---- - xen/arch/arm/p2m.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c -index 8b20b430777e..fb71fa4c1c90 100644 ---- a/xen/arch/arm/p2m.c -+++ b/xen/arch/arm/p2m.c -@@ -1016,7 +1016,7 @@ static int __p2m_set_entry(struct p2m_domain *p2m, - * sequence when updating the translation table (D4.7.1 in ARM DDI - * 0487A.j). - */ -- if ( lpae_is_valid(orig_pte) ) -+ if ( lpae_is_valid(orig_pte) || removing_mapping ) - p2m_remove_pte(entry, p2m->clean_pte); - - if ( removing_mapping ) --- -2.32.0 - diff --git a/xsa394.patch b/xsa394.patch deleted file mode 100644 index 1704c5b..0000000 --- a/xsa394.patch +++ /dev/null @@ -1,63 +0,0 @@ -From a8bdee7a30d0cd13341d2ca1753569b171daf5b8 Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Fri, 19 Nov 2021 11:27:47 +0000 -Subject: [PATCH] xen/grant-table: Only decrement the refcounter when grant is - fully unmapped - -The grant unmapping hypercall (GNTTABOP_unmap_grant_ref) is not a -simple revert of the changes done by the grant mapping hypercall -(GNTTABOP_map_grant_ref). - -Instead, it is possible to partially (or even not) clear some flags. -This will leave the grant is mapped until a future call where all -the flags would be cleared. - -XSA-380 introduced a refcounting that is meant to only be dropped -when the grant is fully unmapped. Unfortunately, unmap_common() will -decrement the refcount for every successful call. - -A consequence is a domain would be able to underflow the refcount -and trigger a BUG(). - -Looking at the code, it is not clear to me why a domain would -want to partially clear some flags in the grant-table. But as -this is part of the ABI, it is better to not change the behavior -for now. - -Fix it by checking if the maptrack handle has been released before -decrementing the refcounting. - -This is CVE-2022-23034 / XSA-394. - -Fixes: 9781b51efde2 ("gnttab: replace mapkind()") -Signed-off-by: Julien Grall -Reviewed-by: Jan Beulich ---- - xen/common/grant_table.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index 0262f2c48af8..ed1e2fabcea6 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -1488,8 +1488,15 @@ unmap_common( - if ( put_handle ) - put_maptrack_handle(lgt, op->handle); - -- /* See the respective comment in map_grant_ref(). */ -- if ( rc == GNTST_okay && ld != rd && gnttab_need_iommu_mapping(ld) ) -+ /* -+ * map_grant_ref() will only increment the refcount (and update the -+ * IOMMU) once per mapping. So we only want to decrement it once the -+ * maptrack handle has been put, alongside the further IOMMU update. -+ * -+ * For the second and third check, see the respective comment in -+ * map_grant_ref(). -+ */ -+ if ( put_handle && ld != rd && gnttab_need_iommu_mapping(ld) ) - { - void **slot; - union maptrack_node node; --- -2.32.0 - diff --git a/xsa395.patch b/xsa395.patch deleted file mode 100644 index 13b7311..0000000 --- a/xsa395.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 4cc924c3e3a0d53306d08b04720c427d1c298ba8 Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Wed, 5 Jan 2022 18:09:20 +0000 -Subject: [PATCH] passthrough/x86: stop pirq iteration immediately in case of - error - -pt_pirq_iterate() will iterate in batch over all the PIRQs. The outer -loop will bail out if 'rc' is non-zero but the inner loop will continue. - -This means 'rc' will get clobbered and we may miss any errors (such as --ERESTART in the case of the callback pci_clean_dpci_irq()). - -This is CVE-2022-23035 / XSA-395. - -Fixes: c24536b636f2 ("replace d->nr_pirqs sized arrays with radix tree") -Fixes: f6dd295381f4 ("dpci: replace tasklet with softirq") -Signed-off-by: Julien Grall -Signed-off-by: Jan Beulich -Reviewed-by: Roger Pau Monné ---- - xen/drivers/passthrough/x86/hvm.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/xen/drivers/passthrough/x86/hvm.c b/xen/drivers/passthrough/x86/hvm.c -index 351daafdc9bf..0b37cd145b60 100644 ---- a/xen/drivers/passthrough/x86/hvm.c -+++ b/xen/drivers/passthrough/x86/hvm.c -@@ -732,7 +732,11 @@ int pt_pirq_iterate(struct domain *d, - - pirq = pirqs[i]->pirq; - if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) -+ { - rc = cb(d, pirq_dpci, arg); -+ if ( rc ) -+ break; -+ } - } - } while ( !rc && ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) ); - --- -2.32.0 - diff --git a/xsa397-4.16.patch b/xsa397-4.16.patch deleted file mode 100644 index c04cd54..0000000 --- a/xsa397-4.16.patch +++ /dev/null @@ -1,98 +0,0 @@ -From: Roger Pau Monne -Subject: x86/hap: do not switch on log dirty for VRAM tracking - -XEN_DMOP_track_dirty_vram possibly calls into paging_log_dirty_enable -when using HAP mode, and it can interact badly with other ongoing -paging domctls, as XEN_DMOP_track_dirty_vram is not holding the domctl -lock. - -This was detected as a result of the following assert triggering when -doing repeated migrations of a HAP HVM domain with a stubdom: - -Assertion 'd->arch.paging.log_dirty.allocs == 0' failed at paging.c:198 -----[ Xen-4.17-unstable x86_64 debug=y Not tainted ]---- -CPU: 34 -RIP: e008:[] arch/x86/mm/paging.c#paging_free_log_dirty_bitmap+0x606/0x6 -RFLAGS: 0000000000010206 CONTEXT: hypervisor (d0v23) -[...] -Xen call trace: - [] R arch/x86/mm/paging.c#paging_free_log_dirty_bitmap+0x606/0x63a - [] S xsm/flask/hooks.c#domain_has_perm+0x5a/0x67 - [] F paging_domctl+0x251/0xd41 - [] F paging_domctl_continuation+0x19d/0x202 - [] F pv_hypercall+0x150/0x2a7 - [] F lstar_enter+0x12d/0x140 - -Such assert triggered because the stubdom used -XEN_DMOP_track_dirty_vram while dom0 was in the middle of executing -XEN_DOMCTL_SHADOW_OP_OFF, and so log dirty become enabled while -retiring the old structures, thus leading to new entries being -populated in already clear slots. - -Fix this by not enabling log dirty for VRAM tracking, similar to what -is done when using shadow instead of HAP. Call -p2m_enable_hardware_log_dirty when enabling VRAM tracking in order to -get some hardware assistance if available. As a side effect the memory -pressure on the p2m pool should go down if only VRAM tracking is -enabled, as the dirty bitmap is no longer allocated. - -Note that paging_log_dirty_range (used to get the dirty bitmap for -VRAM tracking) doesn't use the log dirty bitmap, and instead relies on -checking whether each gfn on the range has been switched from -p2m_ram_logdirty to p2m_ram_rw in order to account for dirty pages. - -This is CVE-2022-26356 / XSA-397. - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich - ---- a/xen/include/asm-x86/paging.h -+++ b/xen/include/asm-x86/paging.h -@@ -162,9 +162,6 @@ void paging_log_dirty_range(struct domai - unsigned long nr, - uint8_t *dirty_bitmap); - --/* enable log dirty */ --int paging_log_dirty_enable(struct domain *d, bool log_global); -- - /* log dirty initialization */ - void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops); - ---- a/xen/arch/x86/mm/hap/hap.c -+++ b/xen/arch/x86/mm/hap/hap.c -@@ -69,13 +69,6 @@ int hap_track_dirty_vram(struct domain * - { - unsigned int size = DIV_ROUND_UP(nr_frames, BITS_PER_BYTE); - -- if ( !paging_mode_log_dirty(d) ) -- { -- rc = paging_log_dirty_enable(d, false); -- if ( rc ) -- goto out; -- } -- - rc = -ENOMEM; - dirty_bitmap = vzalloc(size); - if ( !dirty_bitmap ) -@@ -107,6 +100,10 @@ int hap_track_dirty_vram(struct domain * - - paging_unlock(d); - -+ domain_pause(d); -+ p2m_enable_hardware_log_dirty(d); -+ domain_unpause(d); -+ - if ( oend > ostart ) - p2m_change_type_range(d, ostart, oend, - p2m_ram_logdirty, p2m_ram_rw); ---- a/xen/arch/x86/mm/paging.c -+++ b/xen/arch/x86/mm/paging.c -@@ -211,7 +211,7 @@ static int paging_free_log_dirty_bitmap( - return rc; - } - --int paging_log_dirty_enable(struct domain *d, bool log_global) -+static int paging_log_dirty_enable(struct domain *d, bool log_global) - { - int ret; - diff --git a/xsa398-4.16-1-xen-arm-Introduce-new-Arm-processors.patch b/xsa398-4.16-1-xen-arm-Introduce-new-Arm-processors.patch deleted file mode 100644 index 7f61307..0000000 --- a/xsa398-4.16-1-xen-arm-Introduce-new-Arm-processors.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 8d18b03c95850239a1a9ebaeb565936c7c9ae070 Mon Sep 17 00:00:00 2001 -From: Bertrand Marquis -Date: Tue, 15 Feb 2022 10:37:51 +0000 -Subject: xen/arm: Introduce new Arm processors - -Add some new processor identifiers in processor.h and sync Xen -definitions with status of Linux 5.17 (declared in -arch/arm64/include/asm/cputype.h). - -This is part of XSA-398 / CVE-2022-23960. - -Signed-off-by: Bertrand Marquis -Acked-by: Julien Grall -(cherry picked from commit 35d1b85a6b43483f6bd007d48757434e54743e98) - -diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h -index 8ab2940f688e..852b5f3c24b8 100644 ---- a/xen/include/asm-arm/processor.h -+++ b/xen/include/asm-arm/processor.h -@@ -65,6 +65,7 @@ - #define ARM_CPU_PART_CORTEX_A17 0xC0E - #define ARM_CPU_PART_CORTEX_A15 0xC0F - #define ARM_CPU_PART_CORTEX_A53 0xD03 -+#define ARM_CPU_PART_CORTEX_A35 0xD04 - #define ARM_CPU_PART_CORTEX_A55 0xD05 - #define ARM_CPU_PART_CORTEX_A57 0xD07 - #define ARM_CPU_PART_CORTEX_A72 0xD08 -@@ -72,11 +73,20 @@ - #define ARM_CPU_PART_CORTEX_A75 0xD0A - #define ARM_CPU_PART_CORTEX_A76 0xD0B - #define ARM_CPU_PART_NEOVERSE_N1 0xD0C -+#define ARM_CPU_PART_CORTEX_A77 0xD0D -+#define ARM_CPU_PART_NEOVERSE_V1 0xD40 -+#define ARM_CPU_PART_CORTEX_A78 0xD41 -+#define ARM_CPU_PART_CORTEX_X1 0xD44 -+#define ARM_CPU_PART_CORTEX_A710 0xD47 -+#define ARM_CPU_PART_CORTEX_X2 0xD48 -+#define ARM_CPU_PART_NEOVERSE_N2 0xD49 -+#define ARM_CPU_PART_CORTEX_A78C 0xD4B - - #define MIDR_CORTEX_A12 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A12) - #define MIDR_CORTEX_A17 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A17) - #define MIDR_CORTEX_A15 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A15) - #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) -+#define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) - #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) - #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) - #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) -@@ -84,6 +94,14 @@ - #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) - #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) - #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) -+#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) -+#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) -+#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) -+#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) -+#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) -+#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) -+#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) -+#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) - - /* MPIDR Multiprocessor Affinity Register */ - #define _MPIDR_UP (30) diff --git a/xsa398-4.16-2-xen-arm-move-errata-CSV2-check-earlier.patch b/xsa398-4.16-2-xen-arm-move-errata-CSV2-check-earlier.patch deleted file mode 100644 index 4d84830..0000000 --- a/xsa398-4.16-2-xen-arm-move-errata-CSV2-check-earlier.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 3d963874461b3001e33f3ff90e285670f04d16c4 Mon Sep 17 00:00:00 2001 -From: Bertrand Marquis -Date: Tue, 15 Feb 2022 10:39:47 +0000 -Subject: xen/arm: move errata CSV2 check earlier - -CSV2 availability check is done after printing to the user that -workaround 1 will be used. Move the check before to prevent saying to the -user that workaround 1 is used when it is not because it is not needed. -This will also allow to reuse install_bp_hardening_vec function for -other use cases. - -Code previously returning "true", now returns "0" to conform to -enable_smccc_arch_workaround_1 returning an int and surrounding code -doing a "return 0" if workaround is not needed. - -This is part of XSA-398 / CVE-2022-23960. - -Signed-off-by: Bertrand Marquis -Reviewed-by: Julien Grall -(cherry picked from commit 599616d70eb886b9ad0ef9d6b51693ce790504ba) - -diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c -index b398d480f113..00f9ebe9cee0 100644 ---- a/xen/arch/arm/cpuerrata.c -+++ b/xen/arch/arm/cpuerrata.c -@@ -103,13 +103,6 @@ install_bp_hardening_vec(const struct arm_cpu_capabilities *entry, - printk(XENLOG_INFO "CPU%u will %s on exception entry\n", - smp_processor_id(), desc); - -- /* -- * No need to install hardened vector when the processor has -- * ID_AA64PRF0_EL1.CSV2 set. -- */ -- if ( cpu_data[smp_processor_id()].pfr64.csv2 ) -- return true; -- - spin_lock(&bp_lock); - - /* -@@ -167,6 +160,13 @@ static int enable_smccc_arch_workaround_1(void *data) - if ( !entry->matches(entry) ) - return 0; - -+ /* -+ * No need to install hardened vector when the processor has -+ * ID_AA64PRF0_EL1.CSV2 set. -+ */ -+ if ( cpu_data[smp_processor_id()].pfr64.csv2 ) -+ return 0; -+ - if ( smccc_ver < SMCCC_VERSION(1, 1) ) - goto warn; - diff --git a/xsa398-4.16-3-xen-arm-Add-ECBHB-and-CLEARBHB-ID-fields.patch b/xsa398-4.16-3-xen-arm-Add-ECBHB-and-CLEARBHB-ID-fields.patch deleted file mode 100644 index d5f360c..0000000 --- a/xsa398-4.16-3-xen-arm-Add-ECBHB-and-CLEARBHB-ID-fields.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 8aa3833db97e8fe1143c5ece110b9321ce1494ea Mon Sep 17 00:00:00 2001 -From: Bertrand Marquis -Date: Wed, 23 Feb 2022 09:42:18 +0000 -Subject: xen/arm: Add ECBHB and CLEARBHB ID fields - -Introduce ID coprocessor register ID_AA64ISAR2_EL1. -Add definitions in cpufeature and sysregs of ECBHB field in mmfr1 and -CLEARBHB in isar2 ID coprocessor registers. - -This is part of XSA-398 / CVE-2022-23960. - -Signed-off-by: Bertrand Marquis -Acked-by: Julien Grall -(cherry picked from commit 4b68d12d98b8790d8002fcc2c25a9d713374a4d7) - -diff --git a/xen/arch/arm/cpufeature.c b/xen/arch/arm/cpufeature.c -index 6e51f530a80e..a58965f7b9bf 100644 ---- a/xen/arch/arm/cpufeature.c -+++ b/xen/arch/arm/cpufeature.c -@@ -122,6 +122,7 @@ void identify_cpu(struct cpuinfo_arm *c) - - c->isa64.bits[0] = READ_SYSREG(ID_AA64ISAR0_EL1); - c->isa64.bits[1] = READ_SYSREG(ID_AA64ISAR1_EL1); -+ c->isa64.bits[2] = READ_SYSREG(ID_AA64ISAR2_EL1); - - c->zfr64.bits[0] = READ_SYSREG(ID_AA64ZFR0_EL1); - -diff --git a/xen/include/asm-arm/arm64/sysregs.h b/xen/include/asm-arm/arm64/sysregs.h -index d7e4772f217f..eac08ed33f53 100644 ---- a/xen/include/asm-arm/arm64/sysregs.h -+++ b/xen/include/asm-arm/arm64/sysregs.h -@@ -84,6 +84,9 @@ - #ifndef ID_DFR1_EL1 - #define ID_DFR1_EL1 S3_0_C0_C3_5 - #endif -+#ifndef ID_AA64ISAR2_EL1 -+#define ID_AA64ISAR2_EL1 S3_0_C0_C6_2 -+#endif - - /* ID registers (imported from arm64/include/asm/sysreg.h in Linux) */ - -@@ -139,6 +142,9 @@ - #define ID_AA64ISAR1_GPI_NI 0x0 - #define ID_AA64ISAR1_GPI_IMP_DEF 0x1 - -+/* id_aa64isar2 */ -+#define ID_AA64ISAR2_CLEARBHB_SHIFT 28 -+ - /* id_aa64pfr0 */ - #define ID_AA64PFR0_CSV3_SHIFT 60 - #define ID_AA64PFR0_CSV2_SHIFT 56 -@@ -232,6 +238,7 @@ - #define ID_AA64MMFR0_PARANGE_52 0x6 - - /* id_aa64mmfr1 */ -+#define ID_AA64MMFR1_ECBHB_SHIFT 60 - #define ID_AA64MMFR1_ETS_SHIFT 36 - #define ID_AA64MMFR1_TWED_SHIFT 32 - #define ID_AA64MMFR1_XNX_SHIFT 28 -diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h -index 8a5afbaf0baf..db126508f159 100644 ---- a/xen/include/asm-arm/cpufeature.h -+++ b/xen/include/asm-arm/cpufeature.h -@@ -243,14 +243,15 @@ struct cpuinfo_arm { - unsigned long lo:4; - unsigned long pan:4; - unsigned long __res1:8; -- unsigned long __res2:32; -+ unsigned long __res2:28; -+ unsigned long ecbhb:4; - - unsigned long __res3:64; - }; - } mm64; - - union { -- register_t bits[2]; -+ register_t bits[3]; - struct { - /* ISAR0 */ - unsigned long __res0:4; -@@ -286,6 +287,12 @@ struct cpuinfo_arm { - unsigned long dgh:4; - unsigned long i8mm:4; - unsigned long __res2:8; -+ -+ /* ISAR2 */ -+ unsigned long __res3:28; -+ unsigned long clearbhb:4; -+ -+ unsigned long __res4:32; - }; - } isa64; - diff --git a/xsa398-4.16-4-xen-arm-Add-Spectre-BHB-handling.patch b/xsa398-4.16-4-xen-arm-Add-Spectre-BHB-handling.patch deleted file mode 100644 index 6959081..0000000 --- a/xsa398-4.16-4-xen-arm-Add-Spectre-BHB-handling.patch +++ /dev/null @@ -1,351 +0,0 @@ -From 789523a2aac88e3668f9c4ad892fa47b5f6bf1a7 Mon Sep 17 00:00:00 2001 -From: Rahul Singh -Date: Mon, 14 Feb 2022 18:47:32 +0000 -Subject: xen/arm: Add Spectre BHB handling - -This commit is adding Spectre BHB handling to Xen on Arm. -The commit is introducing new alternative code to be executed during -exception entry: -- SMCC workaround 3 call -- loop workaround (with 8, 24 or 32 iterations) -- use of new clearbhb instruction - -Cpuerrata is modified by this patch to apply the required workaround for -CPU affected by Spectre BHB when CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR is -enabled. - -To do this the system previously used to apply smcc workaround 1 is -reused and new alternative code to be copied in the exception handler is -introduced. - -To define the type of workaround required by a processor, 4 new cpu -capabilities are introduced (for each number of loop and for smcc -workaround 3). - -When a processor is affected, enable_spectre_bhb_workaround is called -and if the processor does not have CSV2 set to 3 or ECBHB feature (which -would mean that the processor is doing what is required in hardware), -the proper code is enabled at exception entry. - -In the case where workaround 3 is not supported by the firmware, we -enable workaround 1 when possible as it will also mitigate Spectre BHB -on systems without CSV2. - -This is part of XSA-398 / CVE-2022-23960. - -Signed-off-by: Bertrand Marquis -Signed-off-by: Rahul Singh -Acked-by: Julien Grall -(cherry picked from commit 62c91eb66a2904eefb1d1d9642e3697a1e3c3a3c) - -diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S -index d8743d955c4a..4e6382522048 100644 ---- a/xen/arch/arm/arm64/bpi.S -+++ b/xen/arch/arm/arm64/bpi.S -@@ -58,16 +58,42 @@ ENTRY(__bp_harden_hyp_vecs_start) - .endr - ENTRY(__bp_harden_hyp_vecs_end) - --ENTRY(__smccc_workaround_1_smc_start) -+.macro mitigate_spectre_bhb_loop count -+ENTRY(__mitigate_spectre_bhb_loop_start_\count) -+ stp x0, x1, [sp, #-16]! -+ mov x0, \count -+.Lspectre_bhb_loop\@: -+ b . + 4 -+ subs x0, x0, #1 -+ b.ne .Lspectre_bhb_loop\@ -+ sb -+ ldp x0, x1, [sp], #16 -+ENTRY(__mitigate_spectre_bhb_loop_end_\count) -+.endm -+ -+.macro smccc_workaround num smcc_id -+ENTRY(__smccc_workaround_smc_start_\num) - sub sp, sp, #(8 * 4) - stp x0, x1, [sp, #(8 * 2)] - stp x2, x3, [sp, #(8 * 0)] -- mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID -+ mov w0, \smcc_id - smc #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) --ENTRY(__smccc_workaround_1_smc_end) -+ENTRY(__smccc_workaround_smc_end_\num) -+.endm -+ -+ENTRY(__mitigate_spectre_bhb_clear_insn_start) -+ clearbhb -+ isb -+ENTRY(__mitigate_spectre_bhb_clear_insn_end) -+ -+mitigate_spectre_bhb_loop 8 -+mitigate_spectre_bhb_loop 24 -+mitigate_spectre_bhb_loop 32 -+smccc_workaround 1, #ARM_SMCCC_ARCH_WORKAROUND_1_FID -+smccc_workaround 3, #ARM_SMCCC_ARCH_WORKAROUND_3_FID - - /* - * Local variables: -diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c -index 00f9ebe9cee0..ae649d16ef02 100644 ---- a/xen/arch/arm/cpuerrata.c -+++ b/xen/arch/arm/cpuerrata.c -@@ -145,7 +145,16 @@ install_bp_hardening_vec(const struct arm_cpu_capabilities *entry, - return ret; - } - --extern char __smccc_workaround_1_smc_start[], __smccc_workaround_1_smc_end[]; -+extern char __smccc_workaround_smc_start_1[], __smccc_workaround_smc_end_1[]; -+extern char __smccc_workaround_smc_start_3[], __smccc_workaround_smc_end_3[]; -+extern char __mitigate_spectre_bhb_clear_insn_start[], -+ __mitigate_spectre_bhb_clear_insn_end[]; -+extern char __mitigate_spectre_bhb_loop_start_8[], -+ __mitigate_spectre_bhb_loop_end_8[]; -+extern char __mitigate_spectre_bhb_loop_start_24[], -+ __mitigate_spectre_bhb_loop_end_24[]; -+extern char __mitigate_spectre_bhb_loop_start_32[], -+ __mitigate_spectre_bhb_loop_end_32[]; - - static int enable_smccc_arch_workaround_1(void *data) - { -@@ -176,8 +185,8 @@ static int enable_smccc_arch_workaround_1(void *data) - if ( (int)res.a0 < 0 ) - goto warn; - -- return !install_bp_hardening_vec(entry,__smccc_workaround_1_smc_start, -- __smccc_workaround_1_smc_end, -+ return !install_bp_hardening_vec(entry,__smccc_workaround_smc_start_1, -+ __smccc_workaround_smc_end_1, - "call ARM_SMCCC_ARCH_WORKAROUND_1"); - - warn: -@@ -187,6 +196,93 @@ static int enable_smccc_arch_workaround_1(void *data) - return 0; - } - -+/* -+ * Spectre BHB Mitigation -+ * -+ * CPU is either: -+ * - Having CVS2.3 so it is not affected. -+ * - Having ECBHB and is clearing the branch history buffer when an exception -+ * to a different exception level is happening so no mitigation is needed. -+ * - Mitigating using a loop on exception entry (number of loop depending on -+ * the CPU). -+ * - Mitigating using the firmware. -+ */ -+static int enable_spectre_bhb_workaround(void *data) -+{ -+ const struct arm_cpu_capabilities *entry = data; -+ -+ /* -+ * Enable callbacks are called on every CPU based on the capabilities, so -+ * double-check whether the CPU matches the entry. -+ */ -+ if ( !entry->matches(entry) ) -+ return 0; -+ -+ if ( cpu_data[smp_processor_id()].pfr64.csv2 == 3 ) -+ return 0; -+ -+ if ( cpu_data[smp_processor_id()].mm64.ecbhb ) -+ return 0; -+ -+ if ( cpu_data[smp_processor_id()].isa64.clearbhb ) -+ return !install_bp_hardening_vec(entry, -+ __mitigate_spectre_bhb_clear_insn_start, -+ __mitigate_spectre_bhb_clear_insn_end, -+ "use clearBHB instruction"); -+ -+ /* Apply solution depending on hwcaps set on arm_errata */ -+ if ( cpus_have_cap(ARM_WORKAROUND_BHB_LOOP_8) ) -+ return !install_bp_hardening_vec(entry, -+ __mitigate_spectre_bhb_loop_start_8, -+ __mitigate_spectre_bhb_loop_end_8, -+ "use 8 loops workaround"); -+ -+ if ( cpus_have_cap(ARM_WORKAROUND_BHB_LOOP_24) ) -+ return !install_bp_hardening_vec(entry, -+ __mitigate_spectre_bhb_loop_start_24, -+ __mitigate_spectre_bhb_loop_end_24, -+ "use 24 loops workaround"); -+ -+ if ( cpus_have_cap(ARM_WORKAROUND_BHB_LOOP_32) ) -+ return !install_bp_hardening_vec(entry, -+ __mitigate_spectre_bhb_loop_start_32, -+ __mitigate_spectre_bhb_loop_end_32, -+ "use 32 loops workaround"); -+ -+ if ( cpus_have_cap(ARM_WORKAROUND_BHB_SMCC_3) ) -+ { -+ struct arm_smccc_res res; -+ -+ if ( smccc_ver < SMCCC_VERSION(1, 1) ) -+ goto warn; -+ -+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID, -+ ARM_SMCCC_ARCH_WORKAROUND_3_FID, &res); -+ /* The return value is in the lower 32-bits. */ -+ if ( (int)res.a0 < 0 ) -+ { -+ /* -+ * On processor affected with CSV2=0, workaround 1 will mitigate -+ * both Spectre v2 and BHB so use it when available -+ */ -+ if ( enable_smccc_arch_workaround_1(data) ) -+ return 1; -+ -+ goto warn; -+ } -+ -+ return !install_bp_hardening_vec(entry,__smccc_workaround_smc_start_3, -+ __smccc_workaround_smc_end_3, -+ "call ARM_SMCCC_ARCH_WORKAROUND_3"); -+ } -+ -+warn: -+ printk_once("**** No support for any spectre BHB workaround. ****\n" -+ "**** Please update your firmware. ****\n"); -+ -+ return 0; -+} -+ - #endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */ - - /* Hardening Branch predictor code for Arm32 */ -@@ -446,19 +542,77 @@ static const struct arm_cpu_capabilities arm_errata[] = { - }, - { - .capability = ARM_HARDEN_BRANCH_PREDICTOR, -- MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), -+ MIDR_RANGE(MIDR_CORTEX_A72, 0, 1 << MIDR_VARIANT_SHIFT), - .enable = enable_smccc_arch_workaround_1, - }, - { -- .capability = ARM_HARDEN_BRANCH_PREDICTOR, -+ .capability = ARM_WORKAROUND_BHB_SMCC_3, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), -- .enable = enable_smccc_arch_workaround_1, -+ .enable = enable_spectre_bhb_workaround, - }, - { -- .capability = ARM_HARDEN_BRANCH_PREDICTOR, -+ .capability = ARM_WORKAROUND_BHB_SMCC_3, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), -- .enable = enable_smccc_arch_workaround_1, -+ .enable = enable_spectre_bhb_workaround, -+ }, -+ /* spectre BHB */ -+ { -+ .capability = ARM_WORKAROUND_BHB_LOOP_8, -+ MIDR_RANGE(MIDR_CORTEX_A72, 1 << MIDR_VARIANT_SHIFT, -+ (MIDR_VARIANT_MASK | MIDR_REVISION_MASK)), -+ .enable = enable_spectre_bhb_workaround, -+ }, -+ { -+ .capability = ARM_WORKAROUND_BHB_LOOP_24, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), -+ .enable = enable_spectre_bhb_workaround, -+ }, -+ { -+ .capability = ARM_WORKAROUND_BHB_LOOP_24, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), -+ .enable = enable_spectre_bhb_workaround, -+ }, -+ { -+ .capability = ARM_WORKAROUND_BHB_LOOP_32, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), -+ .enable = enable_spectre_bhb_workaround, -+ }, -+ { -+ .capability = ARM_WORKAROUND_BHB_LOOP_32, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), -+ .enable = enable_spectre_bhb_workaround, -+ }, -+ { -+ .capability = ARM_WORKAROUND_BHB_LOOP_32, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), -+ .enable = enable_spectre_bhb_workaround, -+ }, -+ { -+ .capability = ARM_WORKAROUND_BHB_LOOP_32, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), -+ .enable = enable_spectre_bhb_workaround, -+ }, -+ { -+ .capability = ARM_WORKAROUND_BHB_LOOP_32, -+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), -+ .enable = enable_spectre_bhb_workaround, - }, -+ { -+ .capability = ARM_WORKAROUND_BHB_LOOP_24, -+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), -+ .enable = enable_spectre_bhb_workaround, -+ }, -+ { -+ .capability = ARM_WORKAROUND_BHB_LOOP_32, -+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), -+ .enable = enable_spectre_bhb_workaround, -+ }, -+ { -+ .capability = ARM_WORKAROUND_BHB_LOOP_32, -+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), -+ .enable = enable_spectre_bhb_workaround, -+ }, -+ - #endif - #ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR - { -diff --git a/xen/include/asm-arm/arm64/macros.h b/xen/include/asm-arm/arm64/macros.h -index 5ad66efd6ba4..140e223b4c99 100644 ---- a/xen/include/asm-arm/arm64/macros.h -+++ b/xen/include/asm-arm/arm64/macros.h -@@ -27,6 +27,11 @@ - sb - .endm - -+ /* clearbhb instruction clearing the branch history */ -+ .macro clearbhb -+ hint #22 -+ .endm -+ - /* - * Register aliases. - */ -diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h -index db126508f159..f7368766c07c 100644 ---- a/xen/include/asm-arm/cpufeature.h -+++ b/xen/include/asm-arm/cpufeature.h -@@ -63,8 +63,12 @@ - #define ARM64_WORKAROUND_AT_SPECULATE 9 - #define ARM_WORKAROUND_858921 10 - #define ARM64_WORKAROUND_REPEAT_TLBI 11 -+#define ARM_WORKAROUND_BHB_LOOP_8 12 -+#define ARM_WORKAROUND_BHB_LOOP_24 13 -+#define ARM_WORKAROUND_BHB_LOOP_32 14 -+#define ARM_WORKAROUND_BHB_SMCC_3 15 - --#define ARM_NCAPS 12 -+#define ARM_NCAPS 16 - - #ifndef __ASSEMBLY__ - -diff --git a/xen/include/asm-arm/smccc.h b/xen/include/asm-arm/smccc.h -index 9d94beb3df2d..b3dbeecc90ad 100644 ---- a/xen/include/asm-arm/smccc.h -+++ b/xen/include/asm-arm/smccc.h -@@ -334,6 +334,12 @@ void __arm_smccc_1_0_smc(register_t a0, register_t a1, register_t a2, - ARM_SMCCC_OWNER_ARCH, \ - 0x7FFF) - -+#define ARM_SMCCC_ARCH_WORKAROUND_3_FID \ -+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ -+ ARM_SMCCC_CONV_32, \ -+ ARM_SMCCC_OWNER_ARCH, \ -+ 0x3FFF) -+ - /* SMCCC error codes */ - #define ARM_SMCCC_NOT_REQUIRED (-2) - #define ARM_SMCCC_ERR_UNKNOWN_FUNCTION (-1) diff --git a/xsa398-4.16-5-xen-arm-Allow-to-discover-and-use-SMCCC_ARCH_WORKARO.patch b/xsa398-4.16-5-xen-arm-Allow-to-discover-and-use-SMCCC_ARCH_WORKARO.patch deleted file mode 100644 index efe99ac..0000000 --- a/xsa398-4.16-5-xen-arm-Allow-to-discover-and-use-SMCCC_ARCH_WORKARO.patch +++ /dev/null @@ -1,91 +0,0 @@ -From dab616cd3d4856a7a4d4f3a429a82dbdbf1aeeb9 Mon Sep 17 00:00:00 2001 -From: Bertrand Marquis -Date: Thu, 17 Feb 2022 14:52:54 +0000 -Subject: xen/arm: Allow to discover and use SMCCC_ARCH_WORKAROUND_3 - -Allow guest to discover whether or not SMCCC_ARCH_WORKAROUND_3 is -supported and create a fastpath in the code to handle guests request to -do the workaround. - -The function SMCCC_ARCH_WORKAROUND_3 will be called by the guest for -flushing the branch history. So we want the handling to be as fast as -possible. - -As the mitigation is applied on every guest exit, we can check for the -call before saving all context and return very early. - -This is part of XSA-398 / CVE-2022-23960. - -Signed-off-by: Bertrand Marquis -Reviewed-by: Julien Grall -(cherry picked from commit c0a56ea0fd92ecb471936b7355ddbecbaea3707c) - -diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S -index fc3811ad0ad5..cf7b9d826f54 100644 ---- a/xen/arch/arm/arm64/entry.S -+++ b/xen/arch/arm/arm64/entry.S -@@ -336,16 +336,26 @@ guest_sync: - cbnz x1, guest_sync_slowpath /* should be 0 for HVC #0 */ - - /* -- * Fastest path possible for ARM_SMCCC_ARCH_WORKAROUND_1. -- * The workaround has already been applied on the exception -+ * Fastest path possible for ARM_SMCCC_ARCH_WORKAROUND_1 and -+ * ARM_SMCCC_ARCH_WORKAROUND_3. -+ * The workaround needed has already been applied on the exception - * entry from the guest, so let's quickly get back to the guest. - * - * Note that eor is used because the function identifier cannot - * be encoded as an immediate for cmp. - */ - eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID -- cbnz w0, check_wa2 -+ cbz w0, fastpath_out_workaround - -+ /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ -+ eor w0, w0, #(ARM_SMCCC_ARCH_WORKAROUND_1_FID ^ ARM_SMCCC_ARCH_WORKAROUND_2_FID) -+ cbz w0, wa2_ssbd -+ -+ /* Fastpath out for ARM_SMCCC_ARCH_WORKAROUND_3 */ -+ eor w0, w0, #(ARM_SMCCC_ARCH_WORKAROUND_2_FID ^ ARM_SMCCC_ARCH_WORKAROUND_3_FID) -+ cbnz w0, guest_sync_slowpath -+ -+fastpath_out_workaround: - /* - * Clobber both x0 and x1 to prevent leakage. Note that thanks - * the eor, x0 = 0. -@@ -354,10 +364,7 @@ guest_sync: - eret - sb - --check_wa2: -- /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ -- eor w0, w0, #(ARM_SMCCC_ARCH_WORKAROUND_1_FID ^ ARM_SMCCC_ARCH_WORKAROUND_2_FID) -- cbnz w0, guest_sync_slowpath -+wa2_ssbd: - #ifdef CONFIG_ARM_SSBD - alternative_cb arm_enable_wa2_handling - b wa2_end -diff --git a/xen/arch/arm/vsmc.c b/xen/arch/arm/vsmc.c -index a36db15fffc0..b633ff2fe897 100644 ---- a/xen/arch/arm/vsmc.c -+++ b/xen/arch/arm/vsmc.c -@@ -124,6 +124,10 @@ static bool handle_arch(struct cpu_user_regs *regs) - break; - } - break; -+ case ARM_SMCCC_ARCH_WORKAROUND_3_FID: -+ if ( cpus_have_cap(ARM_WORKAROUND_BHB_SMCC_3) ) -+ ret = 0; -+ break; - } - - set_user_reg(regs, 0, ret); -@@ -132,6 +136,7 @@ static bool handle_arch(struct cpu_user_regs *regs) - } - - case ARM_SMCCC_ARCH_WORKAROUND_1_FID: -+ case ARM_SMCCC_ARCH_WORKAROUND_3_FID: - /* No return value */ - return true; - diff --git a/xsa398-4.16-6-x86-spec-ctrl-Cease-using-thunk-lfence-on-AMD.patch b/xsa398-4.16-6-x86-spec-ctrl-Cease-using-thunk-lfence-on-AMD.patch deleted file mode 100644 index 7c28ac0..0000000 --- a/xsa398-4.16-6-x86-spec-ctrl-Cease-using-thunk-lfence-on-AMD.patch +++ /dev/null @@ -1,118 +0,0 @@ -From c374a8c5cc74535e16410b7a0d9e92bf5de54f79 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 7 Mar 2022 16:35:52 +0000 -Subject: x86/spec-ctrl: Cease using thunk=lfence on AMD - -AMD have updated their Spectre v2 guidance, and lfence/jmp is no longer -considered safe. AMD are recommending using retpoline everywhere. - -Retpoline is incompatible with CET. All CET-capable hardware has efficient -IBRS (specifically, not something retrofitted in microcode), so use IBRS (and -STIBP for consistency sake). - -This is a logical change on AMD, but not on Intel as the default calculations -would end up with these settings anyway. Leave behind a message if IBRS is -found to be missing. - -Also update the default heuristics to never select THUNK_LFENCE. This causes -AMD CPUs to change their default to retpoline. - -Also update the printed message to include the AMD MSR_SPEC_CTRL settings, and -STIBP now that we set it for consistency sake. - -This is part of XSA-398 / CVE-2021-26401. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 8d03080d2a339840d3a59e0932a94f804e45110d) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 995197f4b23e..f606dc0e14c1 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2269,9 +2269,9 @@ to use. - - If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to - select which of the thunks gets patched into the `__x86_indirect_thunk_%reg` --locations. The default thunk is `retpoline` (generally preferred for Intel --hardware), with the alternatives being `jmp` (a `jmp *%reg` gadget, minimal --overhead), and `lfence` (an `lfence; jmp *%reg` gadget, preferred for AMD). -+locations. The default thunk is `retpoline` (generally preferred), with the -+alternatives being `jmp` (a `jmp *%reg` gadget, minimal overhead), and -+`lfence` (an `lfence; jmp *%reg` gadget). - - On hardware supporting IBRS (Indirect Branch Restricted Speculation), the - `ibrs=` option can be used to force or prevent Xen using the feature itself. -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index cbeeb199037e..ae076bec3ab0 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -367,14 +367,19 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - "\n"); - - /* Settings for Xen's protection, irrespective of guests. */ -- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s, Other:%s%s%s%s%s\n", -+ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s, Other:%s%s%s%s%s\n", - thunk == THUNK_NONE ? "N/A" : - thunk == THUNK_RETPOLINE ? "RETPOLINE" : - thunk == THUNK_LFENCE ? "LFENCE" : - thunk == THUNK_JMP ? "JMP" : "?", -- !boot_cpu_has(X86_FEATURE_IBRSB) ? "No" : -+ (!boot_cpu_has(X86_FEATURE_IBRSB) && -+ !boot_cpu_has(X86_FEATURE_IBRS)) ? "No" : - (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-", -- !boot_cpu_has(X86_FEATURE_SSBD) ? "" : -+ (!boot_cpu_has(X86_FEATURE_STIBP) && -+ !boot_cpu_has(X86_FEATURE_AMD_STIBP)) ? "" : -+ (default_xen_spec_ctrl & SPEC_CTRL_STIBP) ? " STIBP+" : " STIBP-", -+ (!boot_cpu_has(X86_FEATURE_SSBD) && -+ !boot_cpu_has(X86_FEATURE_AMD_SSBD)) ? "" : - (default_xen_spec_ctrl & SPEC_CTRL_SSBD) ? " SSBD+" : " SSBD-", - !(caps & ARCH_CAPS_TSX_CTRL) ? "" : - (opt_tsx & 1) ? " TSX+" : " TSX-", -@@ -945,10 +950,23 @@ void __init init_speculation_mitigations(void) - /* - * First, disable the use of retpolines if Xen is using shadow stacks, as - * they are incompatible. -+ * -+ * In the absence of retpolines, IBRS needs to be used for speculative -+ * safety. All CET-capable hardware has efficient IBRS. - */ -- if ( cpu_has_xen_shstk && -- (opt_thunk == THUNK_DEFAULT || opt_thunk == THUNK_RETPOLINE) ) -- thunk = THUNK_JMP; -+ if ( cpu_has_xen_shstk ) -+ { -+ if ( !has_spec_ctrl ) -+ printk(XENLOG_WARNING "?!? CET active, but no MSR_SPEC_CTRL?\n"); -+ else if ( opt_ibrs == -1 ) -+ { -+ opt_ibrs = ibrs = true; -+ default_xen_spec_ctrl |= SPEC_CTRL_IBRS | SPEC_CTRL_STIBP; -+ } -+ -+ if ( opt_thunk == THUNK_DEFAULT || opt_thunk == THUNK_RETPOLINE ) -+ thunk = THUNK_JMP; -+ } - - /* - * Has the user specified any custom BTI mitigations? If so, follow their -@@ -968,16 +986,10 @@ void __init init_speculation_mitigations(void) - if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) ) - { - /* -- * AMD's recommended mitigation is to set lfence as being dispatch -- * serialising, and to use IND_THUNK_LFENCE. -- */ -- if ( cpu_has_lfence_dispatch ) -- thunk = THUNK_LFENCE; -- /* -- * On Intel hardware, we'd like to use retpoline in preference to -+ * On all hardware, we'd like to use retpoline in preference to - * IBRS, but only if it is safe on this hardware. - */ -- else if ( retpoline_safe(caps) ) -+ if ( retpoline_safe(caps) ) - thunk = THUNK_RETPOLINE; - else if ( has_spec_ctrl ) - ibrs = true; diff --git a/xsa399-4.16.patch b/xsa399-4.16.patch deleted file mode 100644 index 5f3850e..0000000 --- a/xsa399-4.16.patch +++ /dev/null @@ -1,45 +0,0 @@ -From: Jan Beulich -Subject: VT-d: correct ordering of operations in cleanup_domid_map() - -The function may be called without any locks held (leaving aside the -domctl one, which we surely don't want to depend on here), so needs to -play safe wrt other accesses to domid_map[] and domid_bitmap[]. This is -to avoid context_set_domain_id()'s writing of domid_map[] to be reset to -zero right away in the case of it racing the freeing of a DID. - -For the interaction with context_set_domain_id() and ->domid_map[] reads -see the code comment. - -{check_,}cleanup_domid_map() are called with pcidevs_lock held or during -domain cleanup only (and pcidevs_lock is also held around -context_set_domain_id()), i.e. racing calls with the same (dom, iommu) -tuple cannot occur. - -domain_iommu_domid(), besides its use by cleanup_domid_map(), has its -result used only to control flushing, and hence a stale result would -only lead to a stray extra flush. - -This is CVE-2022-26357 / XSA-399. - -Fixes: b9c20c78789f ("VT-d: per-iommu domain-id") -Signed-off-by: Jan Beulich -Reviewed-by: Roger Pau Monné - ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -152,8 +152,14 @@ static void cleanup_domid_map(struct dom - - if ( iommu_domid >= 0 ) - { -+ /* -+ * Update domid_map[] /before/ domid_bitmap[] to avoid a race with -+ * context_set_domain_id(), setting the slot to DOMID_INVALID for -+ * ->domid_map[] reads to produce a suitable value while the bit is -+ * still set. -+ */ -+ iommu->domid_map[iommu_domid] = DOMID_INVALID; - clear_bit(iommu_domid, iommu->domid_bitmap); -- iommu->domid_map[iommu_domid] = 0; - } - } - diff --git a/xsa400-4.16-01.patch b/xsa400-4.16-01.patch deleted file mode 100644 index f44f08e..0000000 --- a/xsa400-4.16-01.patch +++ /dev/null @@ -1,105 +0,0 @@ -From: Jan Beulich -Subject: VT-d: fix (de)assign ordering when RMRRs are in use - -In the event that the RMRR mappings are essential for device operation, -they should be established before updating the device's context entry, -while they should be torn down only after the device's context entry was -successfully updated. - -Also adjust a related log message. - -This is CVE-2022-26358 / part of XSA-400. - -Fixes: 8b99f4400b69 ("VT-d: fix RMRR related error handling") -Signed-off-by: Jan Beulich -Reviewed-by: Roger Pau Monné -Reviewed-by: Paul Durrant -Reviewed-by: Kevin Tian - ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -2419,6 +2419,10 @@ static int reassign_device_ownership( - { - int ret; - -+ ret = domain_context_unmap(source, devfn, pdev); -+ if ( ret ) -+ return ret; -+ - /* - * Devices assigned to untrusted domains (here assumed to be any domU) - * can attempt to send arbitrary LAPIC/MSI messages. We are unprotected -@@ -2455,10 +2459,6 @@ static int reassign_device_ownership( - } - } - -- ret = domain_context_unmap(source, devfn, pdev); -- if ( ret ) -- return ret; -- - if ( devfn == pdev->devfn && pdev->domain != dom_io ) - { - list_move(&pdev->domain_list, &dom_io->pdev_list); -@@ -2534,9 +2534,8 @@ static int intel_iommu_assign_device( - } - } - -- ret = reassign_device_ownership(s, d, devfn, pdev); -- if ( ret || d == dom_io ) -- return ret; -+ if ( d == dom_io ) -+ return reassign_device_ownership(s, d, devfn, pdev); - - /* Setup rmrr identity mapping */ - for_each_rmrr_device( rmrr, bdf, i ) -@@ -2549,20 +2548,37 @@ static int intel_iommu_assign_device( - rmrr->end_address, flag); - if ( ret ) - { -- int rc; -- -- rc = reassign_device_ownership(d, s, devfn, pdev); - printk(XENLOG_G_ERR VTDPREFIX -- " cannot map reserved region (%"PRIx64",%"PRIx64"] for Dom%d (%d)\n", -- rmrr->base_address, rmrr->end_address, -- d->domain_id, ret); -- if ( rc ) -- { -- printk(XENLOG_ERR VTDPREFIX -- " failed to reclaim %pp from %pd (%d)\n", -- &PCI_SBDF3(seg, bus, devfn), d, rc); -- domain_crash(d); -- } -+ "%pd: cannot map reserved region [%"PRIx64",%"PRIx64"]: %d\n", -+ d, rmrr->base_address, rmrr->end_address, ret); -+ break; -+ } -+ } -+ } -+ -+ if ( !ret ) -+ ret = reassign_device_ownership(s, d, devfn, pdev); -+ -+ /* See reassign_device_ownership() for the hwdom aspect. */ -+ if ( !ret || is_hardware_domain(d) ) -+ return ret; -+ -+ for_each_rmrr_device( rmrr, bdf, i ) -+ { -+ if ( rmrr->segment == seg && -+ PCI_BUS(bdf) == bus && -+ PCI_DEVFN2(bdf) == devfn ) -+ { -+ int rc = iommu_identity_mapping(d, p2m_access_x, -+ rmrr->base_address, -+ rmrr->end_address, 0); -+ -+ if ( rc && rc != -ENOENT ) -+ { -+ printk(XENLOG_ERR VTDPREFIX -+ "%pd: cannot unmap reserved region [%"PRIx64",%"PRIx64"]: %d\n", -+ d, rmrr->base_address, rmrr->end_address, rc); -+ domain_crash(d); - break; - } - } diff --git a/xsa400-4.16-02.patch b/xsa400-4.16-02.patch deleted file mode 100644 index e8de874..0000000 --- a/xsa400-4.16-02.patch +++ /dev/null @@ -1,80 +0,0 @@ -From: Jan Beulich -Subject: VT-d: fix add/remove ordering when RMRRs are in use - -In the event that the RMRR mappings are essential for device operation, -they should be established before updating the device's context entry, -while they should be torn down only after the device's context entry was -successfully cleared. - -Also switch to %pd in related log messages. - -Fixes: fa88cfadf918 ("vt-d: Map RMRR in intel_iommu_add_device() if the device has RMRR") -Fixes: 8b99f4400b69 ("VT-d: fix RMRR related error handling") -Signed-off-by: Jan Beulich -Reviewed-by: Roger Pau Monné -Reviewed-by: Kevin Tian - ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -1997,14 +1997,6 @@ static int intel_iommu_add_device(u8 dev - if ( !pdev->domain ) - return -EINVAL; - -- ret = domain_context_mapping(pdev->domain, devfn, pdev); -- if ( ret ) -- { -- dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n", -- pdev->domain->domain_id); -- return ret; -- } -- - for_each_rmrr_device ( rmrr, bdf, i ) - { - if ( rmrr->segment == pdev->seg && -@@ -2021,12 +2013,17 @@ static int intel_iommu_add_device(u8 dev - rmrr->base_address, rmrr->end_address, - 0); - if ( ret ) -- dprintk(XENLOG_ERR VTDPREFIX, "d%d: RMRR mapping failed\n", -- pdev->domain->domain_id); -+ dprintk(XENLOG_ERR VTDPREFIX, "%pd: RMRR mapping failed\n", -+ pdev->domain); - } - } - -- return 0; -+ ret = domain_context_mapping(pdev->domain, devfn, pdev); -+ if ( ret ) -+ dprintk(XENLOG_ERR VTDPREFIX, "%pd: context mapping failed\n", -+ pdev->domain); -+ -+ return ret; - } - - static int intel_iommu_enable_device(struct pci_dev *pdev) -@@ -2048,11 +2045,15 @@ static int intel_iommu_remove_device(u8 - { - struct acpi_rmrr_unit *rmrr; - u16 bdf; -- int i; -+ int ret, i; - - if ( !pdev->domain ) - return -EINVAL; - -+ ret = domain_context_unmap(pdev->domain, devfn, pdev); -+ if ( ret ) -+ return ret; -+ - for_each_rmrr_device ( rmrr, bdf, i ) - { - if ( rmrr->segment != pdev->seg || -@@ -2068,7 +2069,7 @@ static int intel_iommu_remove_device(u8 - rmrr->end_address, 0); - } - -- return domain_context_unmap(pdev->domain, devfn, pdev); -+ return 0; - } - - static int __hwdom_init setup_hwdom_device(u8 devfn, struct pci_dev *pdev) diff --git a/xsa400-4.16-03.patch b/xsa400-4.16-03.patch deleted file mode 100644 index f422ad2..0000000 --- a/xsa400-4.16-03.patch +++ /dev/null @@ -1,134 +0,0 @@ -From: Jan Beulich -Subject: IOMMU/x86: tighten iommu_alloc_pgtable()'s parameter - -This is to make more obvious that nothing outside of domain_iommu(d) -actually changes or is otherwise needed by the function. - -No functional change intended. - -Signed-off-by: Jan Beulich -Reviewed-by: Roger Pau Monné -Reviewed-by: Paul Durrant -Reviewed-by: Kevin Tian - ---- a/xen/include/asm-x86/iommu.h -+++ b/xen/include/asm-x86/iommu.h -@@ -142,7 +142,8 @@ int pi_update_irte(const struct pi_desc - }) - - int __must_check iommu_free_pgtables(struct domain *d); --struct page_info *__must_check iommu_alloc_pgtable(struct domain *d); -+struct domain_iommu; -+struct page_info *__must_check iommu_alloc_pgtable(struct domain_iommu *hd); - - #endif /* !__ARCH_X86_IOMMU_H__ */ - /* ---- a/xen/drivers/passthrough/amd/iommu_map.c -+++ b/xen/drivers/passthrough/amd/iommu_map.c -@@ -184,7 +184,7 @@ static int iommu_pde_from_dfn(struct dom - unsigned long next_table_mfn; - unsigned int level; - struct page_info *table; -- const struct domain_iommu *hd = dom_iommu(d); -+ struct domain_iommu *hd = dom_iommu(d); - - table = hd->arch.amd.root_table; - level = hd->arch.amd.paging_mode; -@@ -219,7 +219,7 @@ static int iommu_pde_from_dfn(struct dom - mfn = next_table_mfn; - - /* allocate lower level page table */ -- table = iommu_alloc_pgtable(d); -+ table = iommu_alloc_pgtable(hd); - if ( table == NULL ) - { - AMD_IOMMU_ERROR("cannot allocate I/O page table\n"); -@@ -249,7 +249,7 @@ static int iommu_pde_from_dfn(struct dom - - if ( next_table_mfn == 0 ) - { -- table = iommu_alloc_pgtable(d); -+ table = iommu_alloc_pgtable(hd); - if ( table == NULL ) - { - AMD_IOMMU_ERROR("cannot allocate I/O page table\n"); -@@ -553,7 +553,7 @@ int __init amd_iommu_quarantine_init(str - - spin_lock(&hd->arch.mapping_lock); - -- hd->arch.amd.root_table = iommu_alloc_pgtable(d); -+ hd->arch.amd.root_table = iommu_alloc_pgtable(hd); - if ( !hd->arch.amd.root_table ) - goto out; - -@@ -568,7 +568,7 @@ int __init amd_iommu_quarantine_init(str - * page table pages, and the resulting allocations are always - * zeroed. - */ -- pg = iommu_alloc_pgtable(d); -+ pg = iommu_alloc_pgtable(hd); - if ( !pg ) - break; - ---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c -+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c -@@ -242,7 +242,7 @@ int amd_iommu_alloc_root(struct domain * - - if ( unlikely(!hd->arch.amd.root_table) ) - { -- hd->arch.amd.root_table = iommu_alloc_pgtable(d); -+ hd->arch.amd.root_table = iommu_alloc_pgtable(hd); - if ( !hd->arch.amd.root_table ) - return -ENOMEM; - } ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -330,7 +330,7 @@ static u64 addr_to_dma_page_maddr(struct - { - struct page_info *pg; - -- if ( !alloc || !(pg = iommu_alloc_pgtable(domain)) ) -+ if ( !alloc || !(pg = iommu_alloc_pgtable(hd)) ) - goto out; - - hd->arch.vtd.pgd_maddr = page_to_maddr(pg); -@@ -350,7 +350,7 @@ static u64 addr_to_dma_page_maddr(struct - if ( !alloc ) - break; - -- pg = iommu_alloc_pgtable(domain); -+ pg = iommu_alloc_pgtable(hd); - if ( !pg ) - break; - -@@ -2766,7 +2766,7 @@ static int __init intel_iommu_quarantine - goto out; - } - -- pg = iommu_alloc_pgtable(d); -+ pg = iommu_alloc_pgtable(hd); - - rc = -ENOMEM; - if ( !pg ) -@@ -2785,7 +2785,7 @@ static int __init intel_iommu_quarantine - * page table pages, and the resulting allocations are always - * zeroed. - */ -- pg = iommu_alloc_pgtable(d); -+ pg = iommu_alloc_pgtable(hd); - - if ( !pg ) - goto out; ---- a/xen/drivers/passthrough/x86/iommu.c -+++ b/xen/drivers/passthrough/x86/iommu.c -@@ -416,9 +416,8 @@ int iommu_free_pgtables(struct domain *d - return 0; - } - --struct page_info *iommu_alloc_pgtable(struct domain *d) -+struct page_info *iommu_alloc_pgtable(struct domain_iommu *hd) - { -- struct domain_iommu *hd = dom_iommu(d); - unsigned int memflags = 0; - struct page_info *pg; - void *p; diff --git a/xsa400-4.16-04.patch b/xsa400-4.16-04.patch deleted file mode 100644 index 41b4f34..0000000 --- a/xsa400-4.16-04.patch +++ /dev/null @@ -1,94 +0,0 @@ -From: Jan Beulich -Subject: VT-d: drop ownership checking from domain_context_mapping_one() - -Despite putting in quite a bit of effort it was not possible to -establish why exactly this code exists (beyond possibly sanity -checking). Instead of a subsequent change further complicating this -logic, simply get rid of it. - -Take the opportunity and move the respective unmap_vtd_domain_page() out -of the locked region. - -Signed-off-by: Jan Beulich -Reviewed-by: Roger Pau Monné -Reviewed-by: Paul Durrant -Reviewed-by: Kevin Tian - ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -124,28 +124,6 @@ static int context_set_domain_id(struct - return 0; - } - --static int context_get_domain_id(struct context_entry *context, -- struct vtd_iommu *iommu) --{ -- unsigned long dom_index, nr_dom; -- int domid = -1; -- -- if (iommu && context) -- { -- nr_dom = cap_ndoms(iommu->cap); -- -- dom_index = context_domain_id(*context); -- -- if ( dom_index < nr_dom && iommu->domid_map ) -- domid = iommu->domid_map[dom_index]; -- else -- dprintk(XENLOG_DEBUG VTDPREFIX, -- "dom_index %lu exceeds nr_dom %lu or iommu has no domid_map\n", -- dom_index, nr_dom); -- } -- return domid; --} -- - static void cleanup_domid_map(struct domain *domain, struct vtd_iommu *iommu) - { - int iommu_domid = domain_iommu_domid(domain, iommu); -@@ -1416,44 +1394,9 @@ int domain_context_mapping_one( - - if ( context_present(*context) ) - { -- int res = 0; -- -- /* Try to get domain ownership from device structure. If that's -- * not available, try to read it from the context itself. */ -- if ( pdev ) -- { -- if ( pdev->domain != domain ) -- { -- printk(XENLOG_G_INFO VTDPREFIX "%pd: %pp owned by %pd", -- domain, &PCI_SBDF3(seg, bus, devfn), -- pdev->domain); -- res = -EINVAL; -- } -- } -- else -- { -- int cdomain; -- cdomain = context_get_domain_id(context, iommu); -- -- if ( cdomain < 0 ) -- { -- printk(XENLOG_G_WARNING VTDPREFIX -- "%pd: %pp mapped, but can't find owner\n", -- domain, &PCI_SBDF3(seg, bus, devfn)); -- res = -EINVAL; -- } -- else if ( cdomain != domain->domain_id ) -- { -- printk(XENLOG_G_INFO VTDPREFIX -- "%pd: %pp already mapped to d%d", -- domain, &PCI_SBDF3(seg, bus, devfn), cdomain); -- res = -EINVAL; -- } -- } -- -- unmap_vtd_domain_page(context_entries); - spin_unlock(&iommu->lock); -- return res; -+ unmap_vtd_domain_page(context_entries); -+ return 0; - } - - if ( iommu_hwdom_passthrough && is_hardware_domain(domain) ) diff --git a/xsa400-4.16-05.patch b/xsa400-4.16-05.patch deleted file mode 100644 index 0eb6619..0000000 --- a/xsa400-4.16-05.patch +++ /dev/null @@ -1,570 +0,0 @@ -From: Jan Beulich -Subject: VT-d: re-assign devices directly - -Devices with RMRRs, due to it being unspecified how/when the specified -memory regions may get accessed, may not be left disconnected from their -respective mappings (as long as it's not certain that the device has -been fully quiesced). Hence rather than unmapping the old context and -then mapping the new one, re-assignment needs to be done in a single -step. - -This is CVE-2022-26359 / part of XSA-400. - -Reported-by: Roger Pau Monné - -Similarly quarantining scratch-page mode relies on page tables to be -continuously wired up. - -To avoid complicating things more than necessary, treat all devices -mostly equally, i.e. regardless of their association with any RMRRs. The -main difference is when it comes to updating context entries, which need -to be atomic when there are RMRRs. Yet atomicity can only be achieved -with CMPXCHG16B, availability of which we can't take for given. - -The seemingly complicated choice of non-negative return values for -domain_context_mapping_one() is to limit code churn: This way callers -passing NULL for pdev don't need fiddling with. - -Signed-off-by: Jan Beulich -Reviewed-by: Kevin Tian -Reviewed-by: Roger Pau Monné - ---- a/xen/drivers/passthrough/vtd/extern.h -+++ b/xen/drivers/passthrough/vtd/extern.h -@@ -84,7 +84,8 @@ void free_pgtable_maddr(u64 maddr); - void *map_vtd_domain_page(u64 maddr); - void unmap_vtd_domain_page(const void *va); - int domain_context_mapping_one(struct domain *domain, struct vtd_iommu *iommu, -- u8 bus, u8 devfn, const struct pci_dev *); -+ uint8_t bus, uint8_t devfn, -+ const struct pci_dev *pdev, unsigned int mode); - int domain_context_unmap_one(struct domain *domain, struct vtd_iommu *iommu, - u8 bus, u8 devfn); - int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt); -@@ -104,8 +105,8 @@ bool is_azalia_tlb_enabled(const struct - void platform_quirks_init(void); - void vtd_ops_preamble_quirk(struct vtd_iommu *iommu); - void vtd_ops_postamble_quirk(struct vtd_iommu *iommu); --int __must_check me_wifi_quirk(struct domain *domain, -- u8 bus, u8 devfn, int map); -+int __must_check me_wifi_quirk(struct domain *domain, uint8_t bus, -+ uint8_t devfn, unsigned int mode); - void pci_vtd_quirk(const struct pci_dev *); - void quirk_iommu_caps(struct vtd_iommu *iommu); - ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -120,6 +120,7 @@ static int context_set_domain_id(struct - } - - set_bit(i, iommu->domid_bitmap); -+ context->hi &= ~(((1 << DID_FIELD_WIDTH) - 1) << DID_HIGH_OFFSET); - context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET; - return 0; - } -@@ -1371,15 +1372,27 @@ static void __hwdom_init intel_iommu_hwd - } - } - -+/* -+ * This function returns -+ * - a negative errno value upon error, -+ * - zero upon success when previously the entry was non-present, or this isn't -+ * the "main" request for a device (pdev == NULL), or for no-op quarantining -+ * assignments, -+ * - positive (one) upon success when previously the entry was present and this -+ * is the "main" request for a device (pdev != NULL). -+ */ - int domain_context_mapping_one( - struct domain *domain, - struct vtd_iommu *iommu, -- u8 bus, u8 devfn, const struct pci_dev *pdev) -+ uint8_t bus, uint8_t devfn, const struct pci_dev *pdev, -+ unsigned int mode) - { - struct domain_iommu *hd = dom_iommu(domain); -- struct context_entry *context, *context_entries; -+ struct context_entry *context, *context_entries, lctxt; -+ __uint128_t old; - u64 maddr, pgd_maddr; -- u16 seg = iommu->drhd->segment; -+ uint16_t seg = iommu->drhd->segment, prev_did = 0; -+ struct domain *prev_dom = NULL; - int rc, ret; - bool_t flush_dev_iotlb; - -@@ -1391,17 +1404,32 @@ int domain_context_mapping_one( - maddr = bus_to_context_maddr(iommu, bus); - context_entries = (struct context_entry *)map_vtd_domain_page(maddr); - context = &context_entries[devfn]; -+ old = (lctxt = *context).full; - -- if ( context_present(*context) ) -+ if ( context_present(lctxt) ) - { -- spin_unlock(&iommu->lock); -- unmap_vtd_domain_page(context_entries); -- return 0; -+ domid_t domid; -+ -+ prev_did = context_domain_id(lctxt); -+ domid = iommu->domid_map[prev_did]; -+ if ( domid < DOMID_FIRST_RESERVED ) -+ prev_dom = rcu_lock_domain_by_id(domid); -+ else if ( domid == DOMID_IO ) -+ prev_dom = rcu_lock_domain(dom_io); -+ if ( !prev_dom ) -+ { -+ spin_unlock(&iommu->lock); -+ unmap_vtd_domain_page(context_entries); -+ dprintk(XENLOG_DEBUG VTDPREFIX, -+ "no domain for did %u (nr_dom %u)\n", -+ prev_did, cap_ndoms(iommu->cap)); -+ return -ESRCH; -+ } - } - - if ( iommu_hwdom_passthrough && is_hardware_domain(domain) ) - { -- context_set_translation_type(*context, CONTEXT_TT_PASS_THRU); -+ context_set_translation_type(lctxt, CONTEXT_TT_PASS_THRU); - } - else - { -@@ -1413,36 +1441,107 @@ int domain_context_mapping_one( - spin_unlock(&hd->arch.mapping_lock); - spin_unlock(&iommu->lock); - unmap_vtd_domain_page(context_entries); -+ if ( prev_dom ) -+ rcu_unlock_domain(prev_dom); - return -ENOMEM; - } - -- context_set_address_root(*context, pgd_maddr); -+ context_set_address_root(lctxt, pgd_maddr); - if ( ats_enabled && ecap_dev_iotlb(iommu->ecap) ) -- context_set_translation_type(*context, CONTEXT_TT_DEV_IOTLB); -+ context_set_translation_type(lctxt, CONTEXT_TT_DEV_IOTLB); - else -- context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); -+ context_set_translation_type(lctxt, CONTEXT_TT_MULTI_LEVEL); - - spin_unlock(&hd->arch.mapping_lock); - } - -- if ( context_set_domain_id(context, domain, iommu) ) -+ if ( context_set_domain_id(&lctxt, domain, iommu) ) - { -+ unlock: - spin_unlock(&iommu->lock); - unmap_vtd_domain_page(context_entries); -+ if ( prev_dom ) -+ rcu_unlock_domain(prev_dom); - return -EFAULT; - } - -- context_set_address_width(*context, level_to_agaw(iommu->nr_pt_levels)); -- context_set_fault_enable(*context); -- context_set_present(*context); -+ if ( !prev_dom ) -+ { -+ context_set_address_width(lctxt, level_to_agaw(iommu->nr_pt_levels)); -+ context_set_fault_enable(lctxt); -+ context_set_present(lctxt); -+ } -+ else if ( prev_dom == domain ) -+ { -+ ASSERT(lctxt.full == context->full); -+ rc = !!pdev; -+ goto unlock; -+ } -+ else -+ { -+ ASSERT(context_address_width(lctxt) == -+ level_to_agaw(iommu->nr_pt_levels)); -+ ASSERT(!context_fault_disable(lctxt)); -+ } -+ -+ if ( cpu_has_cx16 ) -+ { -+ __uint128_t res = cmpxchg16b(context, &old, &lctxt.full); -+ -+ /* -+ * Hardware does not update the context entry behind our backs, -+ * so the return value should match "old". -+ */ -+ if ( res != old ) -+ { -+ if ( pdev ) -+ check_cleanup_domid_map(domain, pdev, iommu); -+ printk(XENLOG_ERR -+ "%pp: unexpected context entry %016lx_%016lx (expected %016lx_%016lx)\n", -+ &PCI_SBDF3(pdev->seg, pdev->bus, devfn), -+ (uint64_t)(res >> 64), (uint64_t)res, -+ (uint64_t)(old >> 64), (uint64_t)old); -+ rc = -EILSEQ; -+ goto unlock; -+ } -+ } -+ else if ( !prev_dom || !(mode & MAP_WITH_RMRR) ) -+ { -+ context_clear_present(*context); -+ iommu_sync_cache(context, sizeof(*context)); -+ -+ write_atomic(&context->hi, lctxt.hi); -+ /* No barrier should be needed between these two. */ -+ write_atomic(&context->lo, lctxt.lo); -+ } -+ else /* Best effort, updating DID last. */ -+ { -+ /* -+ * By non-atomically updating the context entry's DID field last, -+ * during a short window in time TLB entries with the old domain ID -+ * but the new page tables may be inserted. This could affect I/O -+ * of other devices using this same (old) domain ID. Such updating -+ * therefore is not a problem if this was the only device associated -+ * with the old domain ID. Diverting I/O of any of a dying domain's -+ * devices to the quarantine page tables is intended anyway. -+ */ -+ if ( !(mode & (MAP_OWNER_DYING | MAP_SINGLE_DEVICE)) ) -+ printk(XENLOG_WARNING VTDPREFIX -+ " %pp: reassignment may cause %pd data corruption\n", -+ &PCI_SBDF3(seg, bus, devfn), prev_dom); -+ -+ write_atomic(&context->lo, lctxt.lo); -+ /* No barrier should be needed between these two. */ -+ write_atomic(&context->hi, lctxt.hi); -+ } -+ - iommu_sync_cache(context, sizeof(struct context_entry)); - spin_unlock(&iommu->lock); - -- /* Context entry was previously non-present (with domid 0). */ -- rc = iommu_flush_context_device(iommu, 0, PCI_BDF2(bus, devfn), -- DMA_CCMD_MASK_NOBIT, 1); -+ rc = iommu_flush_context_device(iommu, prev_did, PCI_BDF2(bus, devfn), -+ DMA_CCMD_MASK_NOBIT, !prev_dom); - flush_dev_iotlb = !!find_ats_dev_drhd(iommu); -- ret = iommu_flush_iotlb_dsi(iommu, 0, 1, flush_dev_iotlb); -+ ret = iommu_flush_iotlb_dsi(iommu, prev_did, !prev_dom, flush_dev_iotlb); - - /* - * The current logic for returns: -@@ -1463,17 +1562,26 @@ int domain_context_mapping_one( - unmap_vtd_domain_page(context_entries); - - if ( !seg && !rc ) -- rc = me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC); -+ rc = me_wifi_quirk(domain, bus, devfn, mode); - - if ( rc ) - { -- ret = domain_context_unmap_one(domain, iommu, bus, devfn); -+ if ( !prev_dom ) -+ ret = domain_context_unmap_one(domain, iommu, bus, devfn); -+ else if ( prev_dom != domain ) /* Avoid infinite recursion. */ -+ ret = domain_context_mapping_one(prev_dom, iommu, bus, devfn, pdev, -+ mode & MAP_WITH_RMRR) < 0; -+ else -+ ret = 1; - - if ( !ret && pdev && pdev->devfn == devfn ) - check_cleanup_domid_map(domain, pdev, iommu); - } - -- return rc; -+ if ( prev_dom ) -+ rcu_unlock_domain(prev_dom); -+ -+ return rc ?: pdev && prev_dom; - } - - static int domain_context_unmap(struct domain *d, uint8_t devfn, -@@ -1483,8 +1591,10 @@ static int domain_context_mapping(struct - struct pci_dev *pdev) - { - const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev); -+ const struct acpi_rmrr_unit *rmrr; - int ret = 0; -- uint16_t seg = pdev->seg; -+ unsigned int i, mode = 0; -+ uint16_t seg = pdev->seg, bdf; - uint8_t bus = pdev->bus, secbus; - - /* -@@ -1500,8 +1610,29 @@ static int domain_context_mapping(struct - - ASSERT(pcidevs_locked()); - -+ for_each_rmrr_device( rmrr, bdf, i ) -+ { -+ if ( rmrr->segment != pdev->seg || bdf != pdev->sbdf.bdf ) -+ continue; -+ -+ mode |= MAP_WITH_RMRR; -+ break; -+ } -+ -+ if ( domain != pdev->domain ) -+ { -+ if ( pdev->domain->is_dying ) -+ mode |= MAP_OWNER_DYING; -+ else if ( drhd && -+ !any_pdev_behind_iommu(pdev->domain, pdev, drhd->iommu) && -+ !pdev->phantom_stride ) -+ mode |= MAP_SINGLE_DEVICE; -+ } -+ - switch ( pdev->type ) - { -+ bool prev_present; -+ - case DEV_TYPE_PCI_HOST_BRIDGE: - if ( iommu_debug ) - printk(VTDPREFIX "%pd:Hostbridge: skip %pp map\n", -@@ -1523,7 +1654,9 @@ static int domain_context_mapping(struct - printk(VTDPREFIX "%pd:PCIe: map %pp\n", - domain, &PCI_SBDF3(seg, bus, devfn)); - ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, -- pdev); -+ pdev, mode); -+ if ( ret > 0 ) -+ ret = 0; - if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 ) - enable_ats_device(pdev, &drhd->iommu->ats_devices); - -@@ -1538,9 +1671,10 @@ static int domain_context_mapping(struct - domain, &PCI_SBDF3(seg, bus, devfn)); - - ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, -- pdev); -- if ( ret ) -+ pdev, mode); -+ if ( ret < 0 ) - break; -+ prev_present = ret; - - if ( (ret = find_upstream_bridge(seg, &bus, &devfn, &secbus)) < 1 ) - { -@@ -1548,6 +1682,15 @@ static int domain_context_mapping(struct - break; - ret = -ENXIO; - } -+ /* -+ * Strictly speaking if the device is the only one behind this bridge -+ * and the only one with this (secbus,0,0) tuple, it could be allowed -+ * to be re-assigned regardless of RMRR presence. But let's deal with -+ * that case only if it is actually found in the wild. -+ */ -+ else if ( prev_present && (mode & MAP_WITH_RMRR) && -+ domain != pdev->domain ) -+ ret = -EOPNOTSUPP; - - /* - * Mapping a bridge should, if anything, pass the struct pci_dev of -@@ -1556,7 +1699,7 @@ static int domain_context_mapping(struct - */ - if ( ret >= 0 ) - ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, -- NULL); -+ NULL, mode); - - /* - * Devices behind PCIe-to-PCI/PCIx bridge may generate different -@@ -1571,10 +1714,15 @@ static int domain_context_mapping(struct - if ( !ret && pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE && - (secbus != pdev->bus || pdev->devfn != 0) ) - ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0, -- NULL); -+ NULL, mode); - - if ( ret ) -- domain_context_unmap(domain, devfn, pdev); -+ { -+ if ( !prev_present ) -+ domain_context_unmap(domain, devfn, pdev); -+ else if ( pdev->domain != domain ) /* Avoid infinite recursion. */ -+ domain_context_mapping(pdev->domain, devfn, pdev); -+ } - - break; - -@@ -2363,17 +2511,46 @@ static int reassign_device_ownership( - { - int ret; - -- ret = domain_context_unmap(source, devfn, pdev); -+ if ( !QUARANTINE_SKIP(target) ) -+ { -+ if ( !has_arch_pdevs(target) ) -+ vmx_pi_hooks_assign(target); -+ -+ /* -+ * Devices assigned to untrusted domains (here assumed to be any domU) -+ * can attempt to send arbitrary LAPIC/MSI messages. We are unprotected -+ * by the root complex unless interrupt remapping is enabled. -+ */ -+ if ( (target != hardware_domain) && !iommu_intremap ) -+ untrusted_msi = true; -+ -+ ret = domain_context_mapping(target, devfn, pdev); -+ -+ if ( !ret && !QUARANTINE_SKIP(source) && pdev->devfn == devfn ) -+ { -+ const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev); -+ -+ if ( drhd ) -+ check_cleanup_domid_map(source, pdev, drhd->iommu); -+ } -+ } -+ else -+ ret = domain_context_unmap(source, devfn, pdev); - if ( ret ) -+ { -+ if ( !has_arch_pdevs(target) ) -+ vmx_pi_hooks_deassign(target); - return ret; -+ } - -- /* -- * Devices assigned to untrusted domains (here assumed to be any domU) -- * can attempt to send arbitrary LAPIC/MSI messages. We are unprotected -- * by the root complex unless interrupt remapping is enabled. -- */ -- if ( (target != hardware_domain) && !iommu_intremap ) -- untrusted_msi = true; -+ if ( devfn == pdev->devfn && pdev->domain != target ) -+ { -+ list_move(&pdev->domain_list, &target->pdev_list); -+ pdev->domain = target; -+ } -+ -+ if ( !has_arch_pdevs(source) ) -+ vmx_pi_hooks_deassign(source); - - /* - * If the device belongs to the hardware domain, and it has RMRR, don't -@@ -2403,34 +2580,7 @@ static int reassign_device_ownership( - } - } - -- if ( devfn == pdev->devfn && pdev->domain != dom_io ) -- { -- list_move(&pdev->domain_list, &dom_io->pdev_list); -- pdev->domain = dom_io; -- } -- -- if ( !has_arch_pdevs(source) ) -- vmx_pi_hooks_deassign(source); -- -- if ( !has_arch_pdevs(target) ) -- vmx_pi_hooks_assign(target); -- -- ret = domain_context_mapping(target, devfn, pdev); -- if ( ret ) -- { -- if ( !has_arch_pdevs(target) ) -- vmx_pi_hooks_deassign(target); -- -- return ret; -- } -- -- if ( devfn == pdev->devfn && pdev->domain != target ) -- { -- list_move(&pdev->domain_list, &target->pdev_list); -- pdev->domain = target; -- } -- -- return ret; -+ return 0; - } - - static int intel_iommu_assign_device( ---- a/xen/drivers/passthrough/vtd/iommu.h -+++ b/xen/drivers/passthrough/vtd/iommu.h -@@ -202,8 +202,12 @@ struct root_entry { - do {(root).val |= ((value) & PAGE_MASK_4K);} while(0) - - struct context_entry { -- u64 lo; -- u64 hi; -+ union { -+ struct { -+ uint64_t lo, hi; -+ }; -+ __uint128_t full; -+ }; - }; - #define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) - #define context_present(c) ((c).lo & 1) ---- a/xen/drivers/passthrough/vtd/quirks.c -+++ b/xen/drivers/passthrough/vtd/quirks.c -@@ -407,7 +407,8 @@ void __init platform_quirks_init(void) - */ - - static int __must_check map_me_phantom_function(struct domain *domain, -- u32 dev, int map) -+ unsigned int dev, -+ unsigned int mode) - { - struct acpi_drhd_unit *drhd; - struct pci_dev *pdev; -@@ -418,9 +419,9 @@ static int __must_check map_me_phantom_f - drhd = acpi_find_matched_drhd_unit(pdev); - - /* map or unmap ME phantom function */ -- if ( map ) -+ if ( !(mode & UNMAP_ME_PHANTOM_FUNC) ) - rc = domain_context_mapping_one(domain, drhd->iommu, 0, -- PCI_DEVFN(dev, 7), NULL); -+ PCI_DEVFN(dev, 7), NULL, mode); - else - rc = domain_context_unmap_one(domain, drhd->iommu, 0, - PCI_DEVFN(dev, 7)); -@@ -428,7 +429,8 @@ static int __must_check map_me_phantom_f - return rc; - } - --int me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map) -+int me_wifi_quirk(struct domain *domain, uint8_t bus, uint8_t devfn, -+ unsigned int mode) - { - u32 id; - int rc = 0; -@@ -452,7 +454,7 @@ int me_wifi_quirk(struct domain *domain, - case 0x423b8086: - case 0x423c8086: - case 0x423d8086: -- rc = map_me_phantom_function(domain, 3, map); -+ rc = map_me_phantom_function(domain, 3, mode); - break; - default: - break; -@@ -478,7 +480,7 @@ int me_wifi_quirk(struct domain *domain, - case 0x42388086: /* Puma Peak */ - case 0x422b8086: - case 0x422c8086: -- rc = map_me_phantom_function(domain, 22, map); -+ rc = map_me_phantom_function(domain, 22, mode); - break; - default: - break; ---- a/xen/drivers/passthrough/vtd/vtd.h -+++ b/xen/drivers/passthrough/vtd/vtd.h -@@ -22,8 +22,14 @@ - - #include - --#define MAP_ME_PHANTOM_FUNC 1 --#define UNMAP_ME_PHANTOM_FUNC 0 -+/* -+ * Values for domain_context_mapping_one()'s and me_wifi_quirk()'s "mode" -+ * parameters. -+ */ -+#define MAP_WITH_RMRR (1u << 0) -+#define MAP_OWNER_DYING (1u << 1) -+#define MAP_SINGLE_DEVICE (1u << 2) -+#define UNMAP_ME_PHANTOM_FUNC (1u << 3) - - /* Allow for both IOAPIC and IOSAPIC. */ - #define IO_xAPIC_route_entry IO_APIC_route_entry diff --git a/xsa400-4.16-06.patch b/xsa400-4.16-06.patch deleted file mode 100644 index dfde0ee..0000000 --- a/xsa400-4.16-06.patch +++ /dev/null @@ -1,330 +0,0 @@ -From: Jan Beulich -Subject: AMD/IOMMU: re-assign devices directly - -Devices with unity map ranges, due to it being unspecified how/when -these memory ranges may get accessed, may not be left disconnected from -their unity mappings (as long as it's not certain that the device has -been fully quiesced). Hence rather than tearing down the old root page -table pointer and then establishing the new one, re-assignment needs to -be done in a single step. - -This is CVE-2022-26360 / part of XSA-400. - -Reported-by: Roger Pau Monné - -Similarly quarantining scratch-page mode relies on page tables to be -continuously wired up. - -To avoid complicating things more than necessary, treat all devices -mostly equally, i.e. regardless of their association with any unity map -ranges. The main difference is when it comes to updating DTEs, which need -to be atomic when there are unity mappings. Yet atomicity can only be -achieved with CMPXCHG16B, availability of which we can't take for given. - -Signed-off-by: Jan Beulich -Reviewed-by: Paul Durrant -Reviewed-by: Roger Pau Monné - ---- a/xen/drivers/passthrough/amd/iommu.h -+++ b/xen/drivers/passthrough/amd/iommu.h -@@ -262,9 +262,13 @@ void amd_iommu_set_intremap_table(struct - const void *ptr, - const struct amd_iommu *iommu, - bool valid); --void amd_iommu_set_root_page_table(struct amd_iommu_dte *dte, -- uint64_t root_ptr, uint16_t domain_id, -- uint8_t paging_mode, bool valid); -+#define SET_ROOT_VALID (1u << 0) -+#define SET_ROOT_WITH_UNITY_MAP (1u << 1) -+int __must_check amd_iommu_set_root_page_table(struct amd_iommu_dte *dte, -+ uint64_t root_ptr, -+ uint16_t domain_id, -+ uint8_t paging_mode, -+ unsigned int flags); - void iommu_dte_add_device_entry(struct amd_iommu_dte *dte, - const struct ivrs_mappings *ivrs_dev); - ---- a/xen/drivers/passthrough/amd/iommu_map.c -+++ b/xen/drivers/passthrough/amd/iommu_map.c -@@ -114,10 +114,69 @@ static unsigned int set_iommu_ptes_prese - return flush_flags; - } - --void amd_iommu_set_root_page_table(struct amd_iommu_dte *dte, -- uint64_t root_ptr, uint16_t domain_id, -- uint8_t paging_mode, bool valid) -+/* -+ * This function returns -+ * - -errno for errors, -+ * - 0 for a successful update, atomic when necessary -+ * - 1 for a successful but non-atomic update, which may need to be warned -+ * about by the caller. -+ */ -+int amd_iommu_set_root_page_table(struct amd_iommu_dte *dte, -+ uint64_t root_ptr, uint16_t domain_id, -+ uint8_t paging_mode, unsigned int flags) - { -+ bool valid = flags & SET_ROOT_VALID; -+ -+ if ( dte->v && dte->tv && -+ (cpu_has_cx16 || (flags & SET_ROOT_WITH_UNITY_MAP)) ) -+ { -+ union { -+ struct amd_iommu_dte dte; -+ uint64_t raw64[4]; -+ __uint128_t raw128[2]; -+ } ldte = { .dte = *dte }; -+ __uint128_t old = ldte.raw128[0]; -+ int ret = 0; -+ -+ ldte.dte.domain_id = domain_id; -+ ldte.dte.pt_root = paddr_to_pfn(root_ptr); -+ ldte.dte.iw = true; -+ ldte.dte.ir = true; -+ ldte.dte.paging_mode = paging_mode; -+ ldte.dte.v = valid; -+ -+ if ( cpu_has_cx16 ) -+ { -+ __uint128_t res = cmpxchg16b(dte, &old, &ldte.raw128[0]); -+ -+ /* -+ * Hardware does not update the DTE behind our backs, so the -+ * return value should match "old". -+ */ -+ if ( res != old ) -+ { -+ printk(XENLOG_ERR -+ "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n", -+ domain_id, -+ (uint64_t)(res >> 64), (uint64_t)res, -+ (uint64_t)(old >> 64), (uint64_t)old); -+ ret = -EILSEQ; -+ } -+ } -+ else /* Best effort, updating domain_id last. */ -+ { -+ uint64_t *ptr = (void *)dte; -+ -+ write_atomic(ptr + 0, ldte.raw64[0]); -+ /* No barrier should be needed between these two. */ -+ write_atomic(ptr + 1, ldte.raw64[1]); -+ -+ ret = 1; -+ } -+ -+ return ret; -+ } -+ - if ( valid || dte->v ) - { - dte->tv = false; -@@ -132,6 +191,8 @@ void amd_iommu_set_root_page_table(struc - smp_wmb(); - dte->tv = true; - dte->v = valid; -+ -+ return 0; - } - - void amd_iommu_set_intremap_table( ---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c -+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c -@@ -96,13 +96,32 @@ static int __must_check allocate_domain_ - return rc; - } - -+static bool any_pdev_behind_iommu(const struct domain *d, -+ const struct pci_dev *exclude, -+ const struct amd_iommu *iommu) -+{ -+ const struct pci_dev *pdev; -+ -+ for_each_pdev ( d, pdev ) -+ { -+ if ( pdev == exclude ) -+ continue; -+ -+ if ( find_iommu_for_device(pdev->seg, pdev->sbdf.bdf) == iommu ) -+ return true; -+ } -+ -+ return false; -+} -+ - static int __must_check amd_iommu_setup_domain_device( - struct domain *domain, struct amd_iommu *iommu, - uint8_t devfn, struct pci_dev *pdev) - { - struct amd_iommu_dte *table, *dte; - unsigned long flags; -- int req_id, valid = 1, rc; -+ unsigned int req_id, sr_flags; -+ int rc; - u8 bus = pdev->bus; - struct domain_iommu *hd = dom_iommu(domain); - const struct ivrs_mappings *ivrs_dev; -@@ -116,8 +135,11 @@ static int __must_check amd_iommu_setup_ - if ( rc ) - return rc; - -- if ( iommu_hwdom_passthrough && is_hardware_domain(domain) ) -- valid = 0; -+ req_id = get_dma_requestor_id(iommu->seg, pdev->sbdf.bdf); -+ ivrs_dev = &get_ivrs_mappings(iommu->seg)[req_id]; -+ sr_flags = (iommu_hwdom_passthrough && is_hardware_domain(domain) -+ ? 0 : SET_ROOT_VALID) -+ | (ivrs_dev->unity_map ? SET_ROOT_WITH_UNITY_MAP : 0); - - /* get device-table entry */ - req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn)); -@@ -130,9 +152,15 @@ static int __must_check amd_iommu_setup_ - if ( !dte->v || !dte->tv ) - { - /* bind DTE to domain page-tables */ -- amd_iommu_set_root_page_table( -- dte, page_to_maddr(hd->arch.amd.root_table), -- domain->domain_id, hd->arch.amd.paging_mode, valid); -+ rc = amd_iommu_set_root_page_table( -+ dte, page_to_maddr(hd->arch.amd.root_table), -+ domain->domain_id, hd->arch.amd.paging_mode, sr_flags); -+ if ( rc ) -+ { -+ ASSERT(rc < 0); -+ spin_unlock_irqrestore(&iommu->lock, flags); -+ return rc; -+ } - - /* Undo what amd_iommu_disable_domain_device() may have done. */ - if ( dte->it_root ) -@@ -152,17 +180,76 @@ static int __must_check amd_iommu_setup_ - spin_unlock_irqrestore(&iommu->lock, flags); - - amd_iommu_flush_device(iommu, req_id); -+ } -+ else if ( dte->pt_root != mfn_x(page_to_mfn(hd->arch.amd.root_table)) ) -+ { -+ /* -+ * Strictly speaking if the device is the only one with this requestor -+ * ID, it could be allowed to be re-assigned regardless of unity map -+ * presence. But let's deal with that case only if it is actually -+ * found in the wild. -+ */ -+ if ( req_id != PCI_BDF2(bus, devfn) && -+ (sr_flags & SET_ROOT_WITH_UNITY_MAP) ) -+ rc = -EOPNOTSUPP; -+ else -+ rc = amd_iommu_set_root_page_table( -+ dte, page_to_maddr(hd->arch.amd.root_table), -+ domain->domain_id, hd->arch.amd.paging_mode, sr_flags); -+ if ( rc < 0 ) -+ { -+ spin_unlock_irqrestore(&iommu->lock, flags); -+ return rc; -+ } -+ if ( rc && -+ domain != pdev->domain && -+ /* -+ * By non-atomically updating the DTE's domain ID field last, -+ * during a short window in time TLB entries with the old domain -+ * ID but the new page tables may have been inserted. This could -+ * affect I/O of other devices using this same (old) domain ID. -+ * Such updating therefore is not a problem if this was the only -+ * device associated with the old domain ID. Diverting I/O of any -+ * of a dying domain's devices to the quarantine page tables is -+ * intended anyway. -+ */ -+ !pdev->domain->is_dying && -+ (any_pdev_behind_iommu(pdev->domain, pdev, iommu) || -+ pdev->phantom_stride) ) -+ AMD_IOMMU_WARN(" %pp: reassignment may cause %pd data corruption\n", -+ &PCI_SBDF3(pdev->seg, bus, devfn), pdev->domain); -+ -+ /* -+ * Check remaining settings are still in place from an earlier call -+ * here. They're all independent of the domain, so should not have -+ * changed. -+ */ -+ if ( dte->it_root ) -+ ASSERT(dte->int_ctl == IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED); -+ ASSERT(dte->iv == iommu_intremap); -+ ASSERT(dte->ex == ivrs_dev->dte_allow_exclusion); -+ ASSERT(dte->sys_mgt == MASK_EXTR(ivrs_dev->device_flags, -+ ACPI_IVHD_SYSTEM_MGMT)); - -- AMD_IOMMU_DEBUG("Setup I/O page table: device id = %#x, type = %#x, " -- "root table = %#"PRIx64", " -- "domain = %d, paging mode = %d\n", -- req_id, pdev->type, -- page_to_maddr(hd->arch.amd.root_table), -- domain->domain_id, hd->arch.amd.paging_mode); -+ if ( pci_ats_device(iommu->seg, bus, pdev->devfn) && -+ !ivrs_dev->block_ats && -+ iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) ) -+ ASSERT(dte->i == ats_enabled); -+ -+ spin_unlock_irqrestore(&iommu->lock, flags); -+ -+ amd_iommu_flush_device(iommu, req_id); - } - else - spin_unlock_irqrestore(&iommu->lock, flags); - -+ AMD_IOMMU_DEBUG("Setup I/O page table: device id = %#x, type = %#x, " -+ "root table = %#"PRIx64", " -+ "domain = %d, paging mode = %d\n", -+ req_id, pdev->type, -+ page_to_maddr(hd->arch.amd.root_table), -+ domain->domain_id, hd->arch.amd.paging_mode); -+ - ASSERT(pcidevs_locked()); - - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) && -@@ -366,7 +453,20 @@ static int reassign_device(struct domain - return -ENODEV; - } - -- amd_iommu_disable_domain_device(source, iommu, devfn, pdev); -+ if ( !QUARANTINE_SKIP(target) ) -+ { -+ rc = amd_iommu_setup_domain_device(target, iommu, devfn, pdev); -+ if ( rc ) -+ return rc; -+ } -+ else -+ amd_iommu_disable_domain_device(source, iommu, devfn, pdev); -+ -+ if ( devfn == pdev->devfn && pdev->domain != target ) -+ { -+ list_move(&pdev->domain_list, &target->pdev_list); -+ pdev->domain = target; -+ } - - /* - * If the device belongs to the hardware domain, and it has a unity mapping, -@@ -382,25 +482,9 @@ static int reassign_device(struct domain - return rc; - } - -- if ( devfn == pdev->devfn && pdev->domain != dom_io ) -- { -- list_move(&pdev->domain_list, &dom_io->pdev_list); -- pdev->domain = dom_io; -- } -- -- rc = amd_iommu_setup_domain_device(target, iommu, devfn, pdev); -- if ( rc ) -- return rc; -- - AMD_IOMMU_DEBUG("Re-assign %pp from dom%d to dom%d\n", - &pdev->sbdf, source->domain_id, target->domain_id); - -- if ( devfn == pdev->devfn && pdev->domain != target ) -- { -- list_move(&pdev->domain_list, &target->pdev_list); -- pdev->domain = target; -- } -- - return 0; - } - diff --git a/xsa400-4.16-07.patch b/xsa400-4.16-07.patch deleted file mode 100644 index 8af34d5..0000000 --- a/xsa400-4.16-07.patch +++ /dev/null @@ -1,333 +0,0 @@ -From: Jan Beulich -Subject: VT-d: prepare for per-device quarantine page tables (part I) - -Arrange for domain ID and page table root to be passed around, the latter in -particular to domain_pgd_maddr() such that taking it from the per-domain -fields can be overridden. - -No functional change intended. - -Signed-off-by: Jan Beulich -Reviewed-by: Paul Durrant -Reviewed-by: Roger Pau Monné -Reviewed-by: Kevin Tian - ---- a/xen/drivers/passthrough/vtd/extern.h -+++ b/xen/drivers/passthrough/vtd/extern.h -@@ -85,9 +85,10 @@ void *map_vtd_domain_page(u64 maddr); - void unmap_vtd_domain_page(const void *va); - int domain_context_mapping_one(struct domain *domain, struct vtd_iommu *iommu, - uint8_t bus, uint8_t devfn, -- const struct pci_dev *pdev, unsigned int mode); -+ const struct pci_dev *pdev, domid_t domid, -+ paddr_t pgd_maddr, unsigned int mode); - int domain_context_unmap_one(struct domain *domain, struct vtd_iommu *iommu, -- u8 bus, u8 devfn); -+ uint8_t bus, uint8_t devfn, domid_t domid); - int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt); - - unsigned int io_apic_read_remap_rte(unsigned int apic, unsigned int reg); -@@ -106,7 +107,8 @@ void platform_quirks_init(void); - void vtd_ops_preamble_quirk(struct vtd_iommu *iommu); - void vtd_ops_postamble_quirk(struct vtd_iommu *iommu); - int __must_check me_wifi_quirk(struct domain *domain, uint8_t bus, -- uint8_t devfn, unsigned int mode); -+ uint8_t devfn, domid_t domid, paddr_t pgd_maddr, -+ unsigned int mode); - void pci_vtd_quirk(const struct pci_dev *); - void quirk_iommu_caps(struct vtd_iommu *iommu); - ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -43,7 +43,7 @@ - #include "../ats.h" - - /* dom_io is used as a sentinel for quarantined devices */ --#define QUARANTINE_SKIP(d) ((d) == dom_io && !dom_iommu(d)->arch.vtd.pgd_maddr) -+#define QUARANTINE_SKIP(d, pgd_maddr) ((d) == dom_io && !(pgd_maddr)) - - /* Possible unfiltered LAPIC/MSI messages from untrusted sources? */ - bool __read_mostly untrusted_msi; -@@ -358,15 +358,17 @@ static u64 addr_to_dma_page_maddr(struct - return pte_maddr; - } - --static uint64_t domain_pgd_maddr(struct domain *d, unsigned int nr_pt_levels) -+static paddr_t domain_pgd_maddr(struct domain *d, paddr_t pgd_maddr, -+ unsigned int nr_pt_levels) - { - struct domain_iommu *hd = dom_iommu(d); -- uint64_t pgd_maddr; - unsigned int agaw; - - ASSERT(spin_is_locked(&hd->arch.mapping_lock)); - -- if ( iommu_use_hap_pt(d) ) -+ if ( pgd_maddr ) -+ /* nothing */; -+ else if ( iommu_use_hap_pt(d) ) - { - pagetable_t pgt = p2m_get_pagetable(p2m_get_hostp2m(d)); - -@@ -1385,18 +1387,18 @@ int domain_context_mapping_one( - struct domain *domain, - struct vtd_iommu *iommu, - uint8_t bus, uint8_t devfn, const struct pci_dev *pdev, -- unsigned int mode) -+ domid_t domid, paddr_t pgd_maddr, unsigned int mode) - { - struct domain_iommu *hd = dom_iommu(domain); - struct context_entry *context, *context_entries, lctxt; - __uint128_t old; -- u64 maddr, pgd_maddr; -+ uint64_t maddr; - uint16_t seg = iommu->drhd->segment, prev_did = 0; - struct domain *prev_dom = NULL; - int rc, ret; - bool_t flush_dev_iotlb; - -- if ( QUARANTINE_SKIP(domain) ) -+ if ( QUARANTINE_SKIP(domain, pgd_maddr) ) - return 0; - - ASSERT(pcidevs_locked()); -@@ -1433,10 +1435,12 @@ int domain_context_mapping_one( - } - else - { -+ paddr_t root; -+ - spin_lock(&hd->arch.mapping_lock); - -- pgd_maddr = domain_pgd_maddr(domain, iommu->nr_pt_levels); -- if ( !pgd_maddr ) -+ root = domain_pgd_maddr(domain, pgd_maddr, iommu->nr_pt_levels); -+ if ( !root ) - { - spin_unlock(&hd->arch.mapping_lock); - spin_unlock(&iommu->lock); -@@ -1446,7 +1450,7 @@ int domain_context_mapping_one( - return -ENOMEM; - } - -- context_set_address_root(lctxt, pgd_maddr); -+ context_set_address_root(lctxt, root); - if ( ats_enabled && ecap_dev_iotlb(iommu->ecap) ) - context_set_translation_type(lctxt, CONTEXT_TT_DEV_IOTLB); - else -@@ -1562,15 +1566,21 @@ int domain_context_mapping_one( - unmap_vtd_domain_page(context_entries); - - if ( !seg && !rc ) -- rc = me_wifi_quirk(domain, bus, devfn, mode); -+ rc = me_wifi_quirk(domain, bus, devfn, domid, pgd_maddr, mode); - - if ( rc ) - { - if ( !prev_dom ) -- ret = domain_context_unmap_one(domain, iommu, bus, devfn); -+ ret = domain_context_unmap_one(domain, iommu, bus, devfn, -+ domain->domain_id); - else if ( prev_dom != domain ) /* Avoid infinite recursion. */ -+ { -+ hd = dom_iommu(prev_dom); - ret = domain_context_mapping_one(prev_dom, iommu, bus, devfn, pdev, -+ domain->domain_id, -+ hd->arch.vtd.pgd_maddr, - mode & MAP_WITH_RMRR) < 0; -+ } - else - ret = 1; - -@@ -1592,6 +1602,7 @@ static int domain_context_mapping(struct - { - const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev); - const struct acpi_rmrr_unit *rmrr; -+ paddr_t pgd_maddr = dom_iommu(domain)->arch.vtd.pgd_maddr; - int ret = 0; - unsigned int i, mode = 0; - uint16_t seg = pdev->seg, bdf; -@@ -1654,7 +1665,8 @@ static int domain_context_mapping(struct - printk(VTDPREFIX "%pd:PCIe: map %pp\n", - domain, &PCI_SBDF3(seg, bus, devfn)); - ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, -- pdev, mode); -+ pdev, domain->domain_id, pgd_maddr, -+ mode); - if ( ret > 0 ) - ret = 0; - if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 ) -@@ -1671,7 +1683,8 @@ static int domain_context_mapping(struct - domain, &PCI_SBDF3(seg, bus, devfn)); - - ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, -- pdev, mode); -+ pdev, domain->domain_id, pgd_maddr, -+ mode); - if ( ret < 0 ) - break; - prev_present = ret; -@@ -1699,7 +1712,8 @@ static int domain_context_mapping(struct - */ - if ( ret >= 0 ) - ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, -- NULL, mode); -+ NULL, domain->domain_id, pgd_maddr, -+ mode); - - /* - * Devices behind PCIe-to-PCI/PCIx bridge may generate different -@@ -1714,7 +1728,8 @@ static int domain_context_mapping(struct - if ( !ret && pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE && - (secbus != pdev->bus || pdev->devfn != 0) ) - ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0, -- NULL, mode); -+ NULL, domain->domain_id, pgd_maddr, -+ mode); - - if ( ret ) - { -@@ -1742,14 +1757,14 @@ static int domain_context_mapping(struct - int domain_context_unmap_one( - struct domain *domain, - struct vtd_iommu *iommu, -- u8 bus, u8 devfn) -+ uint8_t bus, uint8_t devfn, domid_t domid) - { - struct context_entry *context, *context_entries; - u64 maddr; - int iommu_domid, rc, ret; - bool_t flush_dev_iotlb; - -- if ( QUARANTINE_SKIP(domain) ) -+ if ( QUARANTINE_SKIP(domain, dom_iommu(domain)->arch.vtd.pgd_maddr) ) - return 0; - - ASSERT(pcidevs_locked()); -@@ -1803,7 +1818,7 @@ int domain_context_unmap_one( - unmap_vtd_domain_page(context_entries); - - if ( !iommu->drhd->segment && !rc ) -- rc = me_wifi_quirk(domain, bus, devfn, UNMAP_ME_PHANTOM_FUNC); -+ rc = me_wifi_quirk(domain, bus, devfn, domid, 0, UNMAP_ME_PHANTOM_FUNC); - - if ( rc && !is_hardware_domain(domain) && domain != dom_io ) - { -@@ -1850,7 +1865,8 @@ static int domain_context_unmap(struct d - if ( iommu_debug ) - printk(VTDPREFIX "%pd:PCIe: unmap %pp\n", - domain, &PCI_SBDF3(seg, bus, devfn)); -- ret = domain_context_unmap_one(domain, iommu, bus, devfn); -+ ret = domain_context_unmap_one(domain, iommu, bus, devfn, -+ domain->domain_id); - if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 ) - disable_ats_device(pdev); - -@@ -1863,7 +1879,8 @@ static int domain_context_unmap(struct d - if ( iommu_debug ) - printk(VTDPREFIX "%pd:PCI: unmap %pp\n", - domain, &PCI_SBDF3(seg, bus, devfn)); -- ret = domain_context_unmap_one(domain, iommu, bus, devfn); -+ ret = domain_context_unmap_one(domain, iommu, bus, devfn, -+ domain->domain_id); - if ( ret ) - break; - -@@ -1889,12 +1906,15 @@ static int domain_context_unmap(struct d - /* PCIe to PCI/PCIx bridge */ - if ( pdev_type(seg, tmp_bus, tmp_devfn) == DEV_TYPE_PCIe2PCI_BRIDGE ) - { -- ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn); -+ ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn, -+ domain->domain_id); - if ( !ret ) -- ret = domain_context_unmap_one(domain, iommu, secbus, 0); -+ ret = domain_context_unmap_one(domain, iommu, secbus, 0, -+ domain->domain_id); - } - else /* Legacy PCI bridge */ -- ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn); -+ ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn, -+ domain->domain_id); - - break; - -@@ -1904,7 +1924,8 @@ static int domain_context_unmap(struct d - return -EINVAL; - } - -- if ( !ret && !QUARANTINE_SKIP(domain) && pdev->devfn == devfn ) -+ if ( !ret && pdev->devfn == devfn && -+ !QUARANTINE_SKIP(domain, dom_iommu(domain)->arch.vtd.pgd_maddr) ) - check_cleanup_domid_map(domain, pdev, iommu); - - return ret; -@@ -2511,7 +2532,7 @@ static int reassign_device_ownership( - { - int ret; - -- if ( !QUARANTINE_SKIP(target) ) -+ if ( !QUARANTINE_SKIP(target, dom_iommu(target)->arch.vtd.pgd_maddr) ) - { - if ( !has_arch_pdevs(target) ) - vmx_pi_hooks_assign(target); -@@ -2526,7 +2547,8 @@ static int reassign_device_ownership( - - ret = domain_context_mapping(target, devfn, pdev); - -- if ( !ret && !QUARANTINE_SKIP(source) && pdev->devfn == devfn ) -+ if ( !ret && pdev->devfn == devfn && -+ !QUARANTINE_SKIP(source, dom_iommu(source)->arch.vtd.pgd_maddr) ) - { - const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev); - ---- a/xen/drivers/passthrough/vtd/quirks.c -+++ b/xen/drivers/passthrough/vtd/quirks.c -@@ -408,6 +408,8 @@ void __init platform_quirks_init(void) - - static int __must_check map_me_phantom_function(struct domain *domain, - unsigned int dev, -+ domid_t domid, -+ paddr_t pgd_maddr, - unsigned int mode) - { - struct acpi_drhd_unit *drhd; -@@ -421,16 +423,17 @@ static int __must_check map_me_phantom_f - /* map or unmap ME phantom function */ - if ( !(mode & UNMAP_ME_PHANTOM_FUNC) ) - rc = domain_context_mapping_one(domain, drhd->iommu, 0, -- PCI_DEVFN(dev, 7), NULL, mode); -+ PCI_DEVFN(dev, 7), NULL, -+ domid, pgd_maddr, mode); - else - rc = domain_context_unmap_one(domain, drhd->iommu, 0, -- PCI_DEVFN(dev, 7)); -+ PCI_DEVFN(dev, 7), domid); - - return rc; - } - - int me_wifi_quirk(struct domain *domain, uint8_t bus, uint8_t devfn, -- unsigned int mode) -+ domid_t domid, paddr_t pgd_maddr, unsigned int mode) - { - u32 id; - int rc = 0; -@@ -454,7 +457,7 @@ int me_wifi_quirk(struct domain *domain, - case 0x423b8086: - case 0x423c8086: - case 0x423d8086: -- rc = map_me_phantom_function(domain, 3, mode); -+ rc = map_me_phantom_function(domain, 3, domid, pgd_maddr, mode); - break; - default: - break; -@@ -480,7 +483,7 @@ int me_wifi_quirk(struct domain *domain, - case 0x42388086: /* Puma Peak */ - case 0x422b8086: - case 0x422c8086: -- rc = map_me_phantom_function(domain, 22, mode); -+ rc = map_me_phantom_function(domain, 22, domid, pgd_maddr, mode); - break; - default: - break; diff --git a/xsa400-4.16-08.patch b/xsa400-4.16-08.patch deleted file mode 100644 index 56a2b32..0000000 --- a/xsa400-4.16-08.patch +++ /dev/null @@ -1,144 +0,0 @@ -From: Jan Beulich -Subject: VT-d: prepare for per-device quarantine page tables (part II) - -Replace the passing of struct domain * by domid_t in preparation of -per-device quarantine page tables also requiring per-device pseudo -domain IDs, which aren't going to be associated with any struct domain -instances. - -No functional change intended (except for slightly adjusted log message -text). - -Signed-off-by: Jan Beulich -Reviewed-by: Paul Durrant -Reviewed-by: Kevin Tian -Reviewed-by: Roger Pau Monné - ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -62,8 +62,8 @@ static struct tasklet vtd_fault_tasklet; - static int setup_hwdom_device(u8 devfn, struct pci_dev *); - static void setup_hwdom_rmrr(struct domain *d); - --static int domain_iommu_domid(struct domain *d, -- struct vtd_iommu *iommu) -+static int get_iommu_did(domid_t domid, const struct vtd_iommu *iommu, -+ bool warn) - { - unsigned long nr_dom, i; - -@@ -71,16 +71,16 @@ static int domain_iommu_domid(struct dom - i = find_first_bit(iommu->domid_bitmap, nr_dom); - while ( i < nr_dom ) - { -- if ( iommu->domid_map[i] == d->domain_id ) -+ if ( iommu->domid_map[i] == domid ) - return i; - - i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1); - } - -- if ( !d->is_dying ) -+ if ( warn ) - dprintk(XENLOG_ERR VTDPREFIX, -- "Cannot get valid iommu %u domid: %pd\n", -- iommu->index, d); -+ "No valid iommu %u domid for Dom%d\n", -+ iommu->index, domid); - - return -1; - } -@@ -88,8 +88,7 @@ static int domain_iommu_domid(struct dom - #define DID_FIELD_WIDTH 16 - #define DID_HIGH_OFFSET 8 - static int context_set_domain_id(struct context_entry *context, -- struct domain *d, -- struct vtd_iommu *iommu) -+ domid_t domid, struct vtd_iommu *iommu) - { - unsigned long nr_dom, i; - int found = 0; -@@ -100,7 +99,7 @@ static int context_set_domain_id(struct - i = find_first_bit(iommu->domid_bitmap, nr_dom); - while ( i < nr_dom ) - { -- if ( iommu->domid_map[i] == d->domain_id ) -+ if ( iommu->domid_map[i] == domid ) - { - found = 1; - break; -@@ -116,7 +115,7 @@ static int context_set_domain_id(struct - dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no free domain ids\n"); - return -EFAULT; - } -- iommu->domid_map[i] = d->domain_id; -+ iommu->domid_map[i] = domid; - } - - set_bit(i, iommu->domid_bitmap); -@@ -125,9 +124,9 @@ static int context_set_domain_id(struct - return 0; - } - --static void cleanup_domid_map(struct domain *domain, struct vtd_iommu *iommu) -+static void cleanup_domid_map(domid_t domid, struct vtd_iommu *iommu) - { -- int iommu_domid = domain_iommu_domid(domain, iommu); -+ int iommu_domid = get_iommu_did(domid, iommu, false); - - if ( iommu_domid >= 0 ) - { -@@ -167,7 +166,7 @@ static bool any_pdev_behind_iommu(const - * If no other devices under the same iommu owned by this domain, - * clear iommu in iommu_bitmap and clear domain_id in domid_bitmap. - */ --static void check_cleanup_domid_map(struct domain *d, -+static void check_cleanup_domid_map(const struct domain *d, - const struct pci_dev *exclude, - struct vtd_iommu *iommu) - { -@@ -183,7 +182,7 @@ static void check_cleanup_domid_map(stru - if ( !found ) - { - clear_bit(iommu->index, dom_iommu(d)->arch.vtd.iommu_bitmap); -- cleanup_domid_map(d, iommu); -+ cleanup_domid_map(d->domain_id, iommu); - } - } - -@@ -683,7 +682,7 @@ static int __must_check iommu_flush_iotl - continue; - - flush_dev_iotlb = !!find_ats_dev_drhd(iommu); -- iommu_domid= domain_iommu_domid(d, iommu); -+ iommu_domid = get_iommu_did(d->domain_id, iommu, !d->is_dying); - if ( iommu_domid == -1 ) - continue; - -@@ -1459,7 +1458,7 @@ int domain_context_mapping_one( - spin_unlock(&hd->arch.mapping_lock); - } - -- if ( context_set_domain_id(&lctxt, domain, iommu) ) -+ if ( context_set_domain_id(&lctxt, domid, iommu) ) - { - unlock: - spin_unlock(&iommu->lock); -@@ -1785,7 +1784,7 @@ int domain_context_unmap_one( - context_clear_entry(*context); - iommu_sync_cache(context, sizeof(struct context_entry)); - -- iommu_domid= domain_iommu_domid(domain, iommu); -+ iommu_domid = get_iommu_did(domid, iommu, !domain->is_dying); - if ( iommu_domid == -1 ) - { - spin_unlock(&iommu->lock); -@@ -1953,7 +1952,7 @@ static void iommu_domain_teardown(struct - ASSERT(!hd->arch.vtd.pgd_maddr); - - for_each_drhd_unit ( drhd ) -- cleanup_domid_map(d, drhd->iommu); -+ cleanup_domid_map(d->domain_id, drhd->iommu); - - XFREE(hd->arch.vtd.iommu_bitmap); - } diff --git a/xsa400-4.16-09.patch b/xsa400-4.16-09.patch deleted file mode 100644 index 5cd5732..0000000 --- a/xsa400-4.16-09.patch +++ /dev/null @@ -1,447 +0,0 @@ -From: Jan Beulich -Subject: IOMMU/x86: maintain a per-device pseudo domain ID - -In order to subsequently enable per-device quarantine page tables, we'll -need domain-ID-like identifiers to be inserted in the respective device -(AMD) or context (Intel) table entries alongside the per-device page -table root addresses. - -Make use of "real" domain IDs occupying only half of the value range -coverable by domid_t. - -Note that in VT-d's iommu_alloc() I didn't want to introduce new memory -leaks in case of error, but existing ones don't get plugged - that'll be -the subject of a later change. - -The VT-d changes are slightly asymmetric, but this way we can avoid -assigning pseudo domain IDs to devices which would never be mapped while -still avoiding to add a new parameter to domain_context_unmap(). - -Signed-off-by: Jan Beulich -Reviewed-by: Paul Durrant -Reviewed-by: Kevin Tian -Reviewed-by: Roger Pau Monné - ---- a/xen/include/asm-x86/iommu.h -+++ b/xen/include/asm-x86/iommu.h -@@ -141,6 +141,10 @@ int pi_update_irte(const struct pi_desc - iommu_vcall(ops, sync_cache, addr, size); \ - }) - -+unsigned long *iommu_init_domid(void); -+domid_t iommu_alloc_domid(unsigned long *map); -+void iommu_free_domid(domid_t domid, unsigned long *map); -+ - int __must_check iommu_free_pgtables(struct domain *d); - struct domain_iommu; - struct page_info *__must_check iommu_alloc_pgtable(struct domain_iommu *hd); ---- a/xen/include/asm-x86/pci.h -+++ b/xen/include/asm-x86/pci.h -@@ -13,6 +13,12 @@ - - struct arch_pci_dev { - vmask_t used_vectors; -+ /* -+ * These fields are (de)initialized under pcidevs-lock. Other uses of -+ * them don't race (de)initialization and hence don't strictly need any -+ * locking. -+ */ -+ domid_t pseudo_domid; - }; - - int pci_conf_write_intercept(unsigned int seg, unsigned int bdf, -@@ -36,6 +42,6 @@ static always_inline bool is_pci_passthr - return true; - } - --static inline void arch_pci_init_pdev(struct pci_dev *pdev) {} -+void arch_pci_init_pdev(struct pci_dev *pdev); - - #endif /* __X86_PCI_H__ */ ---- a/xen/drivers/passthrough/amd/iommu.h -+++ b/xen/drivers/passthrough/amd/iommu.h -@@ -96,6 +96,7 @@ struct amd_iommu { - struct ring_buffer cmd_buffer; - struct ring_buffer event_log; - struct ring_buffer ppr_log; -+ unsigned long *domid_map; - - int exclusion_enable; - int exclusion_allow_all; ---- a/xen/drivers/passthrough/amd/iommu_detect.c -+++ b/xen/drivers/passthrough/amd/iommu_detect.c -@@ -223,6 +223,11 @@ int __init amd_iommu_detect_one_acpi( - if ( rt ) - goto out; - -+ iommu->domid_map = iommu_init_domid(); -+ rt = -ENOMEM; -+ if ( !iommu->domid_map ) -+ goto out; -+ - rt = pci_ro_device(iommu->seg, bus, PCI_DEVFN(dev, func)); - if ( rt ) - printk(XENLOG_ERR "Could not mark config space of %pp read-only (%d)\n", -@@ -233,7 +238,10 @@ int __init amd_iommu_detect_one_acpi( - - out: - if ( rt ) -+ { -+ xfree(iommu->domid_map); - xfree(iommu); -+ } - - return rt; - } ---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c -+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c -@@ -539,6 +539,8 @@ static int amd_iommu_add_device(u8 devfn - struct amd_iommu *iommu; - u16 bdf; - struct ivrs_mappings *ivrs_mappings; -+ bool fresh_domid = false; -+ int ret; - - if ( !pdev->domain ) - return -EINVAL; -@@ -606,7 +608,22 @@ static int amd_iommu_add_device(u8 devfn - AMD_IOMMU_WARN("%pd: unity mapping failed for %pp\n", - pdev->domain, &pdev->sbdf); - -- return amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev); -+ if ( iommu_quarantine && pdev->arch.pseudo_domid == DOMID_INVALID ) -+ { -+ pdev->arch.pseudo_domid = iommu_alloc_domid(iommu->domid_map); -+ if ( pdev->arch.pseudo_domid == DOMID_INVALID ) -+ return -ENOSPC; -+ fresh_domid = true; -+ } -+ -+ ret = amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev); -+ if ( ret && fresh_domid ) -+ { -+ iommu_free_domid(pdev->arch.pseudo_domid, iommu->domid_map); -+ pdev->arch.pseudo_domid = DOMID_INVALID; -+ } -+ -+ return ret; - } - - static int amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev) -@@ -638,6 +655,9 @@ static int amd_iommu_remove_device(u8 de - AMD_IOMMU_WARN("%pd: unity unmapping failed for %pp\n", - pdev->domain, &pdev->sbdf); - -+ iommu_free_domid(pdev->arch.pseudo_domid, iommu->domid_map); -+ pdev->arch.pseudo_domid = DOMID_INVALID; -+ - if ( amd_iommu_perdev_intremap && - ivrs_mappings[bdf].dte_requestor_id == bdf && - ivrs_mappings[bdf].intremap_table ) ---- a/xen/drivers/passthrough/pci.c -+++ b/xen/drivers/passthrough/pci.c -@@ -1271,9 +1271,14 @@ static int _dump_pci_devices(struct pci_ - - list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) - { -- printk("%pp - %pd - node %-3d", -- &pdev->sbdf, pdev->domain, -- (pdev->node != NUMA_NO_NODE) ? pdev->node : -1); -+ printk("%pp - ", &pdev->sbdf); -+#ifdef CONFIG_X86 -+ if ( pdev->domain == dom_io ) -+ printk("DomIO:%x", pdev->arch.pseudo_domid); -+ else -+#endif -+ printk("%pd", pdev->domain); -+ printk(" - node %-3d", (pdev->node != NUMA_NO_NODE) ? pdev->node : -1); - pdev_dump_msi(pdev); - printk("\n"); - } ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1215,7 +1216,7 @@ int __init iommu_alloc(struct acpi_drhd_ - { - struct vtd_iommu *iommu; - unsigned long sagaw, nr_dom; -- int agaw; -+ int agaw, rc; - - iommu = xzalloc(struct vtd_iommu); - if ( iommu == NULL ) -@@ -1301,7 +1302,16 @@ int __init iommu_alloc(struct acpi_drhd_ - if ( !iommu->domid_map ) - return -ENOMEM; - -+ iommu->pseudo_domid_map = iommu_init_domid(); -+ rc = -ENOMEM; -+ if ( !iommu->pseudo_domid_map ) -+ goto free; -+ - return 0; -+ -+ free: -+ iommu_free(drhd); -+ return rc; - } - - void __init iommu_free(struct acpi_drhd_unit *drhd) -@@ -1324,6 +1334,7 @@ void __init iommu_free(struct acpi_drhd_ - - xfree(iommu->domid_bitmap); - xfree(iommu->domid_map); -+ xfree(iommu->pseudo_domid_map); - - if ( iommu->msi.irq >= 0 ) - destroy_irq(iommu->msi.irq); -@@ -1593,8 +1604,8 @@ int domain_context_mapping_one( - return rc ?: pdev && prev_dom; - } - --static int domain_context_unmap(struct domain *d, uint8_t devfn, -- struct pci_dev *pdev); -+static const struct acpi_drhd_unit *domain_context_unmap( -+ struct domain *d, uint8_t devfn, struct pci_dev *pdev); - - static int domain_context_mapping(struct domain *domain, u8 devfn, - struct pci_dev *pdev) -@@ -1602,6 +1613,7 @@ static int domain_context_mapping(struct - const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev); - const struct acpi_rmrr_unit *rmrr; - paddr_t pgd_maddr = dom_iommu(domain)->arch.vtd.pgd_maddr; -+ domid_t orig_domid = pdev->arch.pseudo_domid; - int ret = 0; - unsigned int i, mode = 0; - uint16_t seg = pdev->seg, bdf; -@@ -1660,6 +1672,14 @@ static int domain_context_mapping(struct - if ( !drhd ) - return -ENODEV; - -+ if ( iommu_quarantine && orig_domid == DOMID_INVALID ) -+ { -+ pdev->arch.pseudo_domid = -+ iommu_alloc_domid(drhd->iommu->pseudo_domid_map); -+ if ( pdev->arch.pseudo_domid == DOMID_INVALID ) -+ return -ENOSPC; -+ } -+ - if ( iommu_debug ) - printk(VTDPREFIX "%pd:PCIe: map %pp\n", - domain, &PCI_SBDF3(seg, bus, devfn)); -@@ -1677,6 +1697,14 @@ static int domain_context_mapping(struct - if ( !drhd ) - return -ENODEV; - -+ if ( iommu_quarantine && orig_domid == DOMID_INVALID ) -+ { -+ pdev->arch.pseudo_domid = -+ iommu_alloc_domid(drhd->iommu->pseudo_domid_map); -+ if ( pdev->arch.pseudo_domid == DOMID_INVALID ) -+ return -ENOSPC; -+ } -+ - if ( iommu_debug ) - printk(VTDPREFIX "%pd:PCI: map %pp\n", - domain, &PCI_SBDF3(seg, bus, devfn)); -@@ -1750,6 +1778,13 @@ static int domain_context_mapping(struct - if ( !ret && devfn == pdev->devfn ) - pci_vtd_quirk(pdev); - -+ if ( ret && drhd && orig_domid == DOMID_INVALID ) -+ { -+ iommu_free_domid(pdev->arch.pseudo_domid, -+ drhd->iommu->pseudo_domid_map); -+ pdev->arch.pseudo_domid = DOMID_INVALID; -+ } -+ - return ret; - } - -@@ -1835,8 +1870,10 @@ int domain_context_unmap_one( - return rc; - } - --static int domain_context_unmap(struct domain *domain, u8 devfn, -- struct pci_dev *pdev) -+static const struct acpi_drhd_unit *domain_context_unmap( -+ struct domain *domain, -+ uint8_t devfn, -+ struct pci_dev *pdev) - { - const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev); - struct vtd_iommu *iommu = drhd ? drhd->iommu : NULL; -@@ -1850,16 +1887,16 @@ static int domain_context_unmap(struct d - if ( iommu_debug ) - printk(VTDPREFIX "%pd:Hostbridge: skip %pp unmap\n", - domain, &PCI_SBDF3(seg, bus, devfn)); -- return is_hardware_domain(domain) ? 0 : -EPERM; -+ return ERR_PTR(is_hardware_domain(domain) ? 0 : -EPERM); - - case DEV_TYPE_PCIe_BRIDGE: - case DEV_TYPE_PCIe2PCI_BRIDGE: - case DEV_TYPE_LEGACY_PCI_BRIDGE: -- return 0; -+ return ERR_PTR(0); - - case DEV_TYPE_PCIe_ENDPOINT: - if ( !iommu ) -- return -ENODEV; -+ return ERR_PTR(-ENODEV); - - if ( iommu_debug ) - printk(VTDPREFIX "%pd:PCIe: unmap %pp\n", -@@ -1873,7 +1910,7 @@ static int domain_context_unmap(struct d - - case DEV_TYPE_PCI: - if ( !iommu ) -- return -ENODEV; -+ return ERR_PTR(-ENODEV); - - if ( iommu_debug ) - printk(VTDPREFIX "%pd:PCI: unmap %pp\n", -@@ -1920,14 +1957,14 @@ static int domain_context_unmap(struct d - default: - dprintk(XENLOG_ERR VTDPREFIX, "%pd:unknown(%u): %pp\n", - domain, pdev->type, &PCI_SBDF3(seg, bus, devfn)); -- return -EINVAL; -+ return ERR_PTR(-EINVAL); - } - - if ( !ret && pdev->devfn == devfn && - !QUARANTINE_SKIP(domain, dom_iommu(domain)->arch.vtd.pgd_maddr) ) - check_cleanup_domid_map(domain, pdev, iommu); - -- return ret; -+ return drhd; - } - - static void iommu_clear_root_pgtable(struct domain *d) -@@ -2154,16 +2191,17 @@ static int intel_iommu_enable_device(str - - static int intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev) - { -+ const struct acpi_drhd_unit *drhd; - struct acpi_rmrr_unit *rmrr; - u16 bdf; -- int ret, i; -+ unsigned int i; - - if ( !pdev->domain ) - return -EINVAL; - -- ret = domain_context_unmap(pdev->domain, devfn, pdev); -- if ( ret ) -- return ret; -+ drhd = domain_context_unmap(pdev->domain, devfn, pdev); -+ if ( IS_ERR(drhd) ) -+ return PTR_ERR(drhd); - - for_each_rmrr_device ( rmrr, bdf, i ) - { -@@ -2180,6 +2218,13 @@ static int intel_iommu_remove_device(u8 - rmrr->end_address, 0); - } - -+ if ( drhd ) -+ { -+ iommu_free_domid(pdev->arch.pseudo_domid, -+ drhd->iommu->pseudo_domid_map); -+ pdev->arch.pseudo_domid = DOMID_INVALID; -+ } -+ - return 0; - } - -@@ -2556,7 +2601,12 @@ static int reassign_device_ownership( - } - } - else -- ret = domain_context_unmap(source, devfn, pdev); -+ { -+ const struct acpi_drhd_unit *drhd; -+ -+ drhd = domain_context_unmap(source, devfn, pdev); -+ ret = IS_ERR(drhd) ? PTR_ERR(drhd) : 0; -+ } - if ( ret ) - { - if ( !has_arch_pdevs(target) ) ---- a/xen/drivers/passthrough/vtd/iommu.h -+++ b/xen/drivers/passthrough/vtd/iommu.h -@@ -508,6 +508,7 @@ struct vtd_iommu { - } flush; - - struct list_head ats_devices; -+ unsigned long *pseudo_domid_map; /* "pseudo" domain id bitmap */ - unsigned long *domid_bitmap; /* domain id bitmap */ - u16 *domid_map; /* domain id mapping array */ - uint32_t version; ---- a/xen/drivers/passthrough/x86/iommu.c -+++ b/xen/drivers/passthrough/x86/iommu.c -@@ -387,6 +387,58 @@ void __hwdom_init arch_iommu_hwdom_init( - return; - } - -+void arch_pci_init_pdev(struct pci_dev *pdev) -+{ -+ pdev->arch.pseudo_domid = DOMID_INVALID; -+} -+ -+unsigned long *__init iommu_init_domid(void) -+{ -+ if ( !iommu_quarantine ) -+ return ZERO_BLOCK_PTR; -+ -+ BUILD_BUG_ON(DOMID_MASK * 2U >= UINT16_MAX); -+ -+ return xzalloc_array(unsigned long, -+ BITS_TO_LONGS(UINT16_MAX - DOMID_MASK)); -+} -+ -+domid_t iommu_alloc_domid(unsigned long *map) -+{ -+ /* -+ * This is used uniformly across all IOMMUs, such that on typical -+ * systems we wouldn't re-use the same ID very quickly (perhaps never). -+ */ -+ static unsigned int start; -+ unsigned int idx = find_next_zero_bit(map, UINT16_MAX - DOMID_MASK, start); -+ -+ ASSERT(pcidevs_locked()); -+ -+ if ( idx >= UINT16_MAX - DOMID_MASK ) -+ idx = find_first_zero_bit(map, UINT16_MAX - DOMID_MASK); -+ if ( idx >= UINT16_MAX - DOMID_MASK ) -+ return DOMID_INVALID; -+ -+ __set_bit(idx, map); -+ -+ start = idx + 1; -+ -+ return idx | (DOMID_MASK + 1); -+} -+ -+void iommu_free_domid(domid_t domid, unsigned long *map) -+{ -+ ASSERT(pcidevs_locked()); -+ -+ if ( domid == DOMID_INVALID ) -+ return; -+ -+ ASSERT(domid > DOMID_MASK); -+ -+ if ( !__test_and_clear_bit(domid & DOMID_MASK, map) ) -+ BUG(); -+} -+ - int iommu_free_pgtables(struct domain *d) - { - struct domain_iommu *hd = dom_iommu(d); diff --git a/xsa400-4.16-10.patch b/xsa400-4.16-10.patch deleted file mode 100644 index 4f5886e..0000000 --- a/xsa400-4.16-10.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Jan Beulich -Subject: IOMMU/x86: drop TLB flushes from quarantine_init() hooks - -The page tables just created aren't hooked up yet anywhere, so there's -nothing that could be present in any TLB, and hence nothing to flush. -Dropping this flush is, at least on the VT-d side, a prereq to per- -device domain ID use when quarantining devices, as dom_io isn't going -to be assigned a DID anymore: The warning in get_iommu_did() would -trigger. - -Signed-off-by: Jan Beulich -Reviewed-by: Paul Durrant -Reviewed-by: Roger Pau Monné -Reviewed-by: Kevin Tian - ---- a/xen/drivers/passthrough/amd/iommu_map.c -+++ b/xen/drivers/passthrough/amd/iommu_map.c -@@ -654,8 +654,6 @@ int __init amd_iommu_quarantine_init(str - out: - spin_unlock(&hd->arch.mapping_lock); - -- amd_iommu_flush_all_pages(d); -- - /* Pages leaked in failure case */ - return level ? -ENOMEM : 0; - } ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -2975,9 +2975,6 @@ static int __init intel_iommu_quarantine - out: - spin_unlock(&hd->arch.mapping_lock); - -- if ( !rc ) -- rc = iommu_flush_iotlb_all(d); -- - /* Pages may be leaked in failure case */ - return rc; - } diff --git a/xsa400-4.16-11.patch b/xsa400-4.16-11.patch deleted file mode 100644 index 831983c..0000000 --- a/xsa400-4.16-11.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: Jan Beulich -Subject: AMD/IOMMU: abstract maximum number of page table levels - -We will want to use the constant elsewhere. - -Signed-off-by: Jan Beulich -Reviewed-by: Paul Durrant - ---- a/xen/drivers/passthrough/amd/iommu-defs.h -+++ b/xen/drivers/passthrough/amd/iommu-defs.h -@@ -106,6 +106,7 @@ struct amd_iommu_dte { - bool tv:1; - unsigned int :5; - unsigned int had:2; -+#define IOMMU_MAX_PT_LEVELS 6 - unsigned int paging_mode:3; - uint64_t pt_root:40; - bool ppr:1; ---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c -+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c -@@ -337,7 +337,7 @@ int amd_iommu_alloc_root(struct domain * - return 0; - } - --unsigned int __read_mostly amd_iommu_max_paging_mode = 6; -+unsigned int __read_mostly amd_iommu_max_paging_mode = IOMMU_MAX_PT_LEVELS; - int __read_mostly amd_iommu_min_paging_mode = 1; - - static int amd_iommu_domain_init(struct domain *d) diff --git a/xsa400-4.16-12.patch b/xsa400-4.16-12.patch deleted file mode 100644 index b4836b1..0000000 --- a/xsa400-4.16-12.patch +++ /dev/null @@ -1,945 +0,0 @@ -From: Jan Beulich -Subject: IOMMU/x86: use per-device page tables for quarantining - -Devices with RMRRs / unity mapped regions, due to it being unspecified -how/when these memory regions may be accessed, may not be left -disconnected from the mappings of these regions (as long as it's not -certain that the device has been fully quiesced). Hence even the page -tables used when quarantining such devices need to have mappings of -those regions. This implies installing page tables in the first place -even when not in scratch-page quarantining mode. - -This is CVE-2022-26361 / part of XSA-400. - -While for the purpose here it would be sufficient to have devices with -RMRRs / unity mapped regions use per-device page tables, extend this to -all devices (in scratch-page quarantining mode). This allows the leaf -pages to be mapped r/w, thus covering also memory writes (rather than -just reads) issued by non-quiescent devices. - -Set up quarantine page tables as late as possible, yet early enough to -not encounter failure during de-assign. This means setup generally -happens in assign_device(), while (for now) the one in deassign_device() -is there mainly to be on the safe side. - -As to the removal of QUARANTINE_SKIP() from domain_context_unmap_one(): -I think this was never really needed there, as the function explicitly -deals with finding a non-present context entry. Leaving it there would -require propagating pgd_maddr into the function (like was done by "VT-d: -prepare for per-device quarantine page tables" for -domain_context_mapping_one()). - -In VT-d's DID allocation function don't require the IOMMU lock to be -held anymore: All involved code paths hold pcidevs_lock, so this way we -avoid the need to acquire the IOMMU lock around the new call to -context_set_domain_id(). - -Signed-off-by: Jan Beulich -Reviewed-by: Paul Durrant -Reviewed-by: Kevin Tian -Reviewed-by: Roger Pau Monné - ---- a/xen/include/asm-x86/pci.h -+++ b/xen/include/asm-x86/pci.h -@@ -1,6 +1,8 @@ - #ifndef __X86_PCI_H__ - #define __X86_PCI_H__ - -+#include -+ - #define CF8_BDF(cf8) ( ((cf8) & 0x00ffff00) >> 8) - #define CF8_ADDR_LO(cf8) ( (cf8) & 0x000000fc) - #define CF8_ADDR_HI(cf8) ( ((cf8) & 0x0f000000) >> 16) -@@ -18,7 +20,18 @@ struct arch_pci_dev { - * them don't race (de)initialization and hence don't strictly need any - * locking. - */ -+ union { -+ /* Subset of struct arch_iommu's fields, to be used in dom_io. */ -+ struct { -+ uint64_t pgd_maddr; -+ } vtd; -+ struct { -+ struct page_info *root_table; -+ } amd; -+ }; - domid_t pseudo_domid; -+ mfn_t leaf_mfn; -+ struct page_list_head pgtables_list; - }; - - int pci_conf_write_intercept(unsigned int seg, unsigned int bdf, ---- a/xen/drivers/passthrough/amd/iommu.h -+++ b/xen/drivers/passthrough/amd/iommu.h -@@ -237,7 +237,8 @@ int amd_iommu_init_late(void); - int amd_iommu_update_ivrs_mapping_acpi(void); - int iov_adjust_irq_affinities(void); - --int amd_iommu_quarantine_init(struct domain *d); -+int amd_iommu_quarantine_init(struct pci_dev *pdev, bool scratch_page); -+void amd_iommu_quarantine_teardown(struct pci_dev *pdev); - - /* mapping functions */ - int __must_check amd_iommu_map_page(struct domain *d, dfn_t dfn, ---- a/xen/drivers/passthrough/amd/iommu_map.c -+++ b/xen/drivers/passthrough/amd/iommu_map.c -@@ -598,64 +598,138 @@ int amd_iommu_get_reserved_device_memory - return 0; - } - --int __init amd_iommu_quarantine_init(struct domain *d) -+static int fill_qpt(union amd_iommu_pte *this, unsigned int level, -+ struct page_info *pgs[IOMMU_MAX_PT_LEVELS]) - { -- struct domain_iommu *hd = dom_iommu(d); -+ struct domain_iommu *hd = dom_iommu(dom_io); -+ unsigned int i; -+ int rc = 0; -+ -+ for ( i = 0; !rc && i < PTE_PER_TABLE_SIZE; ++i ) -+ { -+ union amd_iommu_pte *pte = &this[i], *next; -+ -+ if ( !pte->pr ) -+ { -+ if ( !pgs[level] ) -+ { -+ /* -+ * The pgtable allocator is fine for the leaf page, as well as -+ * page table pages, and the resulting allocations are always -+ * zeroed. -+ */ -+ pgs[level] = iommu_alloc_pgtable(hd); -+ if ( !pgs[level] ) -+ { -+ rc = -ENOMEM; -+ break; -+ } -+ -+ if ( level ) -+ { -+ next = __map_domain_page(pgs[level]); -+ rc = fill_qpt(next, level - 1, pgs); -+ unmap_domain_page(next); -+ } -+ } -+ -+ /* -+ * PDEs are essentially a subset of PTEs, so this function -+ * is fine to use even at the leaf. -+ */ -+ set_iommu_pde_present(pte, mfn_x(page_to_mfn(pgs[level])), level, -+ true, true); -+ } -+ else if ( level && pte->next_level ) -+ { -+ next = map_domain_page(_mfn(pte->mfn)); -+ rc = fill_qpt(next, level - 1, pgs); -+ unmap_domain_page(next); -+ } -+ } -+ -+ return rc; -+} -+ -+int amd_iommu_quarantine_init(struct pci_dev *pdev, bool scratch_page) -+{ -+ struct domain_iommu *hd = dom_iommu(dom_io); - unsigned long end_gfn = - 1ul << (DEFAULT_DOMAIN_ADDRESS_WIDTH - PAGE_SHIFT); - unsigned int level = amd_iommu_get_paging_mode(end_gfn); -- union amd_iommu_pte *table; -+ unsigned int req_id = get_dma_requestor_id(pdev->seg, pdev->sbdf.bdf); -+ const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg); -+ int rc; -+ -+ ASSERT(pcidevs_locked()); -+ ASSERT(!hd->arch.amd.root_table); -+ ASSERT(page_list_empty(&hd->arch.pgtables.list)); - -- if ( hd->arch.amd.root_table ) -- { -- ASSERT_UNREACHABLE(); -+ if ( !scratch_page && !ivrs_mappings[req_id].unity_map ) - return 0; -- } - -- spin_lock(&hd->arch.mapping_lock); -+ ASSERT(pdev->arch.pseudo_domid != DOMID_INVALID); - -- hd->arch.amd.root_table = iommu_alloc_pgtable(hd); -- if ( !hd->arch.amd.root_table ) -- goto out; -+ if ( pdev->arch.amd.root_table ) -+ { -+ clear_domain_page(pdev->arch.leaf_mfn); -+ return 0; -+ } - -- table = __map_domain_page(hd->arch.amd.root_table); -- while ( level ) -+ pdev->arch.amd.root_table = iommu_alloc_pgtable(hd); -+ if ( !pdev->arch.amd.root_table ) -+ return -ENOMEM; -+ -+ /* Transiently install the root into DomIO, for iommu_identity_mapping(). */ -+ hd->arch.amd.root_table = pdev->arch.amd.root_table; -+ -+ rc = amd_iommu_reserve_domain_unity_map(dom_io, -+ ivrs_mappings[req_id].unity_map, -+ 0); -+ -+ iommu_identity_map_teardown(dom_io); -+ hd->arch.amd.root_table = NULL; -+ -+ if ( rc ) -+ AMD_IOMMU_WARN("%pp: quarantine unity mapping failed\n", &pdev->sbdf); -+ else if ( scratch_page ) - { -- struct page_info *pg; -- unsigned int i; -+ union amd_iommu_pte *root; -+ struct page_info *pgs[IOMMU_MAX_PT_LEVELS] = {}; - -- /* -- * The pgtable allocator is fine for the leaf page, as well as -- * page table pages, and the resulting allocations are always -- * zeroed. -- */ -- pg = iommu_alloc_pgtable(hd); -- if ( !pg ) -- break; -+ spin_lock(&hd->arch.mapping_lock); - -- for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ ) -- { -- union amd_iommu_pte *pde = &table[i]; -+ root = __map_domain_page(pdev->arch.amd.root_table); -+ rc = fill_qpt(root, level - 1, pgs); -+ unmap_domain_page(root); - -- /* -- * PDEs are essentially a subset of PTEs, so this function -- * is fine to use even at the leaf. -- */ -- set_iommu_pde_present(pde, mfn_x(page_to_mfn(pg)), level - 1, -- false, true); -- } -+ pdev->arch.leaf_mfn = page_to_mfn(pgs[0]); - -- unmap_domain_page(table); -- table = __map_domain_page(pg); -- level--; -+ spin_unlock(&hd->arch.mapping_lock); - } -- unmap_domain_page(table); - -- out: -- spin_unlock(&hd->arch.mapping_lock); -+ page_list_move(&pdev->arch.pgtables_list, &hd->arch.pgtables.list); -+ -+ if ( rc ) -+ amd_iommu_quarantine_teardown(pdev); -+ -+ return rc; -+} -+ -+void amd_iommu_quarantine_teardown(struct pci_dev *pdev) -+{ -+ struct domain_iommu *hd = dom_iommu(dom_io); -+ -+ ASSERT(pcidevs_locked()); -+ -+ if ( !pdev->arch.amd.root_table ) -+ return; - -- /* Pages leaked in failure case */ -- return level ? -ENOMEM : 0; -+ ASSERT(page_list_empty(&hd->arch.pgtables.list)); -+ page_list_move(&hd->arch.pgtables.list, &pdev->arch.pgtables_list); -+ while ( iommu_free_pgtables(dom_io) == -ERESTART ) -+ /* nothing */; -+ pdev->arch.amd.root_table = NULL; - } - - /* ---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c -+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c -@@ -26,7 +26,7 @@ - #include "../ats.h" - - /* dom_io is used as a sentinel for quarantined devices */ --#define QUARANTINE_SKIP(d) ((d) == dom_io && !dom_iommu(d)->arch.amd.root_table) -+#define QUARANTINE_SKIP(d, p) ((d) == dom_io && !(p)->arch.amd.root_table) - - static bool_t __read_mostly init_done; - -@@ -125,8 +125,10 @@ static int __must_check amd_iommu_setup_ - u8 bus = pdev->bus; - struct domain_iommu *hd = dom_iommu(domain); - const struct ivrs_mappings *ivrs_dev; -+ const struct page_info *root_pg; -+ domid_t domid; - -- if ( QUARANTINE_SKIP(domain) ) -+ if ( QUARANTINE_SKIP(domain, pdev) ) - return 0; - - BUG_ON(!hd->arch.amd.paging_mode || !iommu->dev_table.buffer); -@@ -147,14 +149,25 @@ static int __must_check amd_iommu_setup_ - dte = &table[req_id]; - ivrs_dev = &get_ivrs_mappings(iommu->seg)[req_id]; - -+ if ( domain != dom_io ) -+ { -+ root_pg = hd->arch.amd.root_table; -+ domid = domain->domain_id; -+ } -+ else -+ { -+ root_pg = pdev->arch.amd.root_table; -+ domid = pdev->arch.pseudo_domid; -+ } -+ - spin_lock_irqsave(&iommu->lock, flags); - - if ( !dte->v || !dte->tv ) - { - /* bind DTE to domain page-tables */ - rc = amd_iommu_set_root_page_table( -- dte, page_to_maddr(hd->arch.amd.root_table), -- domain->domain_id, hd->arch.amd.paging_mode, sr_flags); -+ dte, page_to_maddr(root_pg), domid, -+ hd->arch.amd.paging_mode, sr_flags); - if ( rc ) - { - ASSERT(rc < 0); -@@ -181,7 +194,7 @@ static int __must_check amd_iommu_setup_ - - amd_iommu_flush_device(iommu, req_id); - } -- else if ( dte->pt_root != mfn_x(page_to_mfn(hd->arch.amd.root_table)) ) -+ else if ( dte->pt_root != mfn_x(page_to_mfn(root_pg)) ) - { - /* - * Strictly speaking if the device is the only one with this requestor -@@ -194,8 +207,8 @@ static int __must_check amd_iommu_setup_ - rc = -EOPNOTSUPP; - else - rc = amd_iommu_set_root_page_table( -- dte, page_to_maddr(hd->arch.amd.root_table), -- domain->domain_id, hd->arch.amd.paging_mode, sr_flags); -+ dte, page_to_maddr(root_pg), domid, -+ hd->arch.amd.paging_mode, sr_flags); - if ( rc < 0 ) - { - spin_unlock_irqrestore(&iommu->lock, flags); -@@ -214,6 +227,7 @@ static int __must_check amd_iommu_setup_ - * intended anyway. - */ - !pdev->domain->is_dying && -+ pdev->domain != dom_io && - (any_pdev_behind_iommu(pdev->domain, pdev, iommu) || - pdev->phantom_stride) ) - AMD_IOMMU_WARN(" %pp: reassignment may cause %pd data corruption\n", -@@ -246,9 +260,8 @@ static int __must_check amd_iommu_setup_ - AMD_IOMMU_DEBUG("Setup I/O page table: device id = %#x, type = %#x, " - "root table = %#"PRIx64", " - "domain = %d, paging mode = %d\n", -- req_id, pdev->type, -- page_to_maddr(hd->arch.amd.root_table), -- domain->domain_id, hd->arch.amd.paging_mode); -+ req_id, pdev->type, page_to_maddr(root_pg), -+ domid, hd->arch.amd.paging_mode); - - ASSERT(pcidevs_locked()); - -@@ -327,7 +340,7 @@ int amd_iommu_alloc_root(struct domain * - { - struct domain_iommu *hd = dom_iommu(d); - -- if ( unlikely(!hd->arch.amd.root_table) ) -+ if ( unlikely(!hd->arch.amd.root_table) && d != dom_io ) - { - hd->arch.amd.root_table = iommu_alloc_pgtable(hd); - if ( !hd->arch.amd.root_table ) -@@ -391,7 +404,7 @@ static void amd_iommu_disable_domain_dev - int req_id; - u8 bus = pdev->bus; - -- if ( QUARANTINE_SKIP(domain) ) -+ if ( QUARANTINE_SKIP(domain, pdev) ) - return; - - ASSERT(pcidevs_locked()); -@@ -430,7 +443,7 @@ static void amd_iommu_disable_domain_dev - - AMD_IOMMU_DEBUG("Disable: device id = %#x, " - "domain = %d, paging mode = %d\n", -- req_id, domain->domain_id, -+ req_id, dte->domain_id, - dom_iommu(domain)->arch.amd.paging_mode); - } - else -@@ -453,7 +466,7 @@ static int reassign_device(struct domain - return -ENODEV; - } - -- if ( !QUARANTINE_SKIP(target) ) -+ if ( !QUARANTINE_SKIP(target, pdev) ) - { - rc = amd_iommu_setup_domain_device(target, iommu, devfn, pdev); - if ( rc ) -@@ -655,6 +668,8 @@ static int amd_iommu_remove_device(u8 de - AMD_IOMMU_WARN("%pd: unity unmapping failed for %pp\n", - pdev->domain, &pdev->sbdf); - -+ amd_iommu_quarantine_teardown(pdev); -+ - iommu_free_domid(pdev->arch.pseudo_domid, iommu->domid_map); - pdev->arch.pseudo_domid = DOMID_INVALID; - ---- a/xen/drivers/passthrough/iommu.c -+++ b/xen/drivers/passthrough/iommu.c -@@ -443,21 +443,22 @@ int iommu_iotlb_flush_all(struct domain - return rc; - } - --static int __init iommu_quarantine_init(void) -+int iommu_quarantine_dev_init(device_t *dev) - { - const struct domain_iommu *hd = dom_iommu(dom_io); -- int rc; - -- dom_io->options |= XEN_DOMCTL_CDF_iommu; -+ if ( !iommu_quarantine || !hd->platform_ops->quarantine_init ) -+ return 0; - -- rc = iommu_domain_init(dom_io, 0); -- if ( rc || iommu_quarantine < IOMMU_quarantine_scratch_page ) -- return rc; -+ return iommu_call(hd->platform_ops, quarantine_init, -+ dev, iommu_quarantine == IOMMU_quarantine_scratch_page); -+} - -- if ( !hd->platform_ops->quarantine_init ) -- return 0; -+static int __init iommu_quarantine_init(void) -+{ -+ dom_io->options |= XEN_DOMCTL_CDF_iommu; - -- return hd->platform_ops->quarantine_init(dom_io); -+ return iommu_domain_init(dom_io, 0); - } - - int __init iommu_setup(void) ---- a/xen/drivers/passthrough/pci.c -+++ b/xen/drivers/passthrough/pci.c -@@ -852,9 +852,16 @@ static int deassign_device(struct domain - return -ENODEV; - - /* De-assignment from dom_io should de-quarantine the device */ -- target = ((pdev->quarantine || iommu_quarantine) && -- pdev->domain != dom_io) ? -- dom_io : hardware_domain; -+ if ( (pdev->quarantine || iommu_quarantine) && pdev->domain != dom_io ) -+ { -+ ret = iommu_quarantine_dev_init(pci_to_dev(pdev)); -+ if ( ret ) -+ return ret; -+ -+ target = dom_io; -+ } -+ else -+ target = hardware_domain; - - while ( pdev->phantom_stride ) - { -@@ -1424,6 +1431,13 @@ static int assign_device(struct domain * - if ( rc ) - goto done; - -+ if ( pdev->domain != dom_io ) -+ { -+ rc = iommu_quarantine_dev_init(pci_to_dev(pdev)); -+ if ( rc ) -+ goto done; -+ } -+ - pdev->fault.count = 0; - - if ( (rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag)) ) ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -45,6 +45,11 @@ - - /* dom_io is used as a sentinel for quarantined devices */ - #define QUARANTINE_SKIP(d, pgd_maddr) ((d) == dom_io && !(pgd_maddr)) -+#define DEVICE_DOMID(d, pdev) ((d) != dom_io ? (d)->domain_id \ -+ : (pdev)->arch.pseudo_domid) -+#define DEVICE_PGTABLE(d, pdev) ((d) != dom_io \ -+ ? dom_iommu(d)->arch.vtd.pgd_maddr \ -+ : (pdev)->arch.vtd.pgd_maddr) - - /* Possible unfiltered LAPIC/MSI messages from untrusted sources? */ - bool __read_mostly untrusted_msi; -@@ -88,13 +93,18 @@ static int get_iommu_did(domid_t domid, - - #define DID_FIELD_WIDTH 16 - #define DID_HIGH_OFFSET 8 -+ -+/* -+ * This function may have "context" passed as NULL, to merely obtain a DID -+ * for "domid". -+ */ - static int context_set_domain_id(struct context_entry *context, - domid_t domid, struct vtd_iommu *iommu) - { - unsigned long nr_dom, i; - int found = 0; - -- ASSERT(spin_is_locked(&iommu->lock)); -+ ASSERT(pcidevs_locked()); - - nr_dom = cap_ndoms(iommu->cap); - i = find_first_bit(iommu->domid_bitmap, nr_dom); -@@ -120,8 +130,13 @@ static int context_set_domain_id(struct - } - - set_bit(i, iommu->domid_bitmap); -- context->hi &= ~(((1 << DID_FIELD_WIDTH) - 1) << DID_HIGH_OFFSET); -- context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET; -+ -+ if ( context ) -+ { -+ context->hi &= ~(((1 << DID_FIELD_WIDTH) - 1) << DID_HIGH_OFFSET); -+ context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET; -+ } -+ - return 0; - } - -@@ -171,8 +186,12 @@ static void check_cleanup_domid_map(cons - const struct pci_dev *exclude, - struct vtd_iommu *iommu) - { -- bool found = any_pdev_behind_iommu(d, exclude, iommu); -+ bool found; -+ -+ if ( d == dom_io ) -+ return; - -+ found = any_pdev_behind_iommu(d, exclude, iommu); - /* - * Hidden devices are associated with DomXEN but usable by the hardware - * domain. Hence they need considering here as well. -@@ -1426,7 +1445,7 @@ int domain_context_mapping_one( - domid = iommu->domid_map[prev_did]; - if ( domid < DOMID_FIRST_RESERVED ) - prev_dom = rcu_lock_domain_by_id(domid); -- else if ( domid == DOMID_IO ) -+ else if ( pdev ? domid == pdev->arch.pseudo_domid : domid > DOMID_MASK ) - prev_dom = rcu_lock_domain(dom_io); - if ( !prev_dom ) - { -@@ -1582,15 +1601,12 @@ int domain_context_mapping_one( - { - if ( !prev_dom ) - ret = domain_context_unmap_one(domain, iommu, bus, devfn, -- domain->domain_id); -+ DEVICE_DOMID(domain, pdev)); - else if ( prev_dom != domain ) /* Avoid infinite recursion. */ -- { -- hd = dom_iommu(prev_dom); - ret = domain_context_mapping_one(prev_dom, iommu, bus, devfn, pdev, -- domain->domain_id, -- hd->arch.vtd.pgd_maddr, -+ DEVICE_DOMID(prev_dom, pdev), -+ DEVICE_PGTABLE(prev_dom, pdev), - mode & MAP_WITH_RMRR) < 0; -- } - else - ret = 1; - -@@ -1612,7 +1628,7 @@ static int domain_context_mapping(struct - { - const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev); - const struct acpi_rmrr_unit *rmrr; -- paddr_t pgd_maddr = dom_iommu(domain)->arch.vtd.pgd_maddr; -+ paddr_t pgd_maddr = DEVICE_PGTABLE(domain, pdev); - domid_t orig_domid = pdev->arch.pseudo_domid; - int ret = 0; - unsigned int i, mode = 0; -@@ -1641,7 +1657,7 @@ static int domain_context_mapping(struct - break; - } - -- if ( domain != pdev->domain ) -+ if ( domain != pdev->domain && pdev->domain != dom_io ) - { - if ( pdev->domain->is_dying ) - mode |= MAP_OWNER_DYING; -@@ -1683,8 +1699,8 @@ static int domain_context_mapping(struct - if ( iommu_debug ) - printk(VTDPREFIX "%pd:PCIe: map %pp\n", - domain, &PCI_SBDF3(seg, bus, devfn)); -- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, -- pdev, domain->domain_id, pgd_maddr, -+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, pdev, -+ DEVICE_DOMID(domain, pdev), pgd_maddr, - mode); - if ( ret > 0 ) - ret = 0; -@@ -1710,8 +1726,8 @@ static int domain_context_mapping(struct - domain, &PCI_SBDF3(seg, bus, devfn)); - - ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, -- pdev, domain->domain_id, pgd_maddr, -- mode); -+ pdev, DEVICE_DOMID(domain, pdev), -+ pgd_maddr, mode); - if ( ret < 0 ) - break; - prev_present = ret; -@@ -1739,8 +1755,8 @@ static int domain_context_mapping(struct - */ - if ( ret >= 0 ) - ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, -- NULL, domain->domain_id, pgd_maddr, -- mode); -+ NULL, DEVICE_DOMID(domain, pdev), -+ pgd_maddr, mode); - - /* - * Devices behind PCIe-to-PCI/PCIx bridge may generate different -@@ -1755,8 +1771,8 @@ static int domain_context_mapping(struct - if ( !ret && pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE && - (secbus != pdev->bus || pdev->devfn != 0) ) - ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0, -- NULL, domain->domain_id, pgd_maddr, -- mode); -+ NULL, DEVICE_DOMID(domain, pdev), -+ pgd_maddr, mode); - - if ( ret ) - { -@@ -1798,9 +1814,6 @@ int domain_context_unmap_one( - int iommu_domid, rc, ret; - bool_t flush_dev_iotlb; - -- if ( QUARANTINE_SKIP(domain, dom_iommu(domain)->arch.vtd.pgd_maddr) ) -- return 0; -- - ASSERT(pcidevs_locked()); - spin_lock(&iommu->lock); - -@@ -1902,7 +1915,7 @@ static const struct acpi_drhd_unit *doma - printk(VTDPREFIX "%pd:PCIe: unmap %pp\n", - domain, &PCI_SBDF3(seg, bus, devfn)); - ret = domain_context_unmap_one(domain, iommu, bus, devfn, -- domain->domain_id); -+ DEVICE_DOMID(domain, pdev)); - if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 ) - disable_ats_device(pdev); - -@@ -1916,7 +1929,7 @@ static const struct acpi_drhd_unit *doma - printk(VTDPREFIX "%pd:PCI: unmap %pp\n", - domain, &PCI_SBDF3(seg, bus, devfn)); - ret = domain_context_unmap_one(domain, iommu, bus, devfn, -- domain->domain_id); -+ DEVICE_DOMID(domain, pdev)); - if ( ret ) - break; - -@@ -1939,18 +1952,12 @@ static const struct acpi_drhd_unit *doma - break; - } - -+ ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn, -+ DEVICE_DOMID(domain, pdev)); - /* PCIe to PCI/PCIx bridge */ -- if ( pdev_type(seg, tmp_bus, tmp_devfn) == DEV_TYPE_PCIe2PCI_BRIDGE ) -- { -- ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn, -- domain->domain_id); -- if ( !ret ) -- ret = domain_context_unmap_one(domain, iommu, secbus, 0, -- domain->domain_id); -- } -- else /* Legacy PCI bridge */ -- ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn, -- domain->domain_id); -+ if ( !ret && pdev_type(seg, tmp_bus, tmp_devfn) == DEV_TYPE_PCIe2PCI_BRIDGE ) -+ ret = domain_context_unmap_one(domain, iommu, secbus, 0, -+ DEVICE_DOMID(domain, pdev)); - - break; - -@@ -1961,7 +1968,7 @@ static const struct acpi_drhd_unit *doma - } - - if ( !ret && pdev->devfn == devfn && -- !QUARANTINE_SKIP(domain, dom_iommu(domain)->arch.vtd.pgd_maddr) ) -+ !QUARANTINE_SKIP(domain, pdev->arch.vtd.pgd_maddr) ) - check_cleanup_domid_map(domain, pdev, iommu); - - return drhd; -@@ -1994,6 +2001,26 @@ static void iommu_domain_teardown(struct - XFREE(hd->arch.vtd.iommu_bitmap); - } - -+static void quarantine_teardown(struct pci_dev *pdev, -+ const struct acpi_drhd_unit *drhd) -+{ -+ struct domain_iommu *hd = dom_iommu(dom_io); -+ -+ ASSERT(pcidevs_locked()); -+ -+ if ( !pdev->arch.vtd.pgd_maddr ) -+ return; -+ -+ ASSERT(page_list_empty(&hd->arch.pgtables.list)); -+ page_list_move(&hd->arch.pgtables.list, &pdev->arch.pgtables_list); -+ while ( iommu_free_pgtables(dom_io) == -ERESTART ) -+ /* nothing */; -+ pdev->arch.vtd.pgd_maddr = 0; -+ -+ if ( drhd ) -+ cleanup_domid_map(pdev->arch.pseudo_domid, drhd->iommu); -+} -+ - static int __must_check intel_iommu_map_page(struct domain *d, dfn_t dfn, - mfn_t mfn, unsigned int flags, - unsigned int *flush_flags) -@@ -2218,6 +2245,8 @@ static int intel_iommu_remove_device(u8 - rmrr->end_address, 0); - } - -+ quarantine_teardown(pdev, drhd); -+ - if ( drhd ) - { - iommu_free_domid(pdev->arch.pseudo_domid, -@@ -2576,7 +2605,7 @@ static int reassign_device_ownership( - { - int ret; - -- if ( !QUARANTINE_SKIP(target, dom_iommu(target)->arch.vtd.pgd_maddr) ) -+ if ( !QUARANTINE_SKIP(target, pdev->arch.vtd.pgd_maddr) ) - { - if ( !has_arch_pdevs(target) ) - vmx_pi_hooks_assign(target); -@@ -2592,7 +2621,7 @@ static int reassign_device_ownership( - ret = domain_context_mapping(target, devfn, pdev); - - if ( !ret && pdev->devfn == devfn && -- !QUARANTINE_SKIP(source, dom_iommu(source)->arch.vtd.pgd_maddr) ) -+ !QUARANTINE_SKIP(source, pdev->arch.vtd.pgd_maddr) ) - { - const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev); - -@@ -2913,69 +2942,135 @@ static void vtd_dump_page_tables(struct - agaw_to_level(hd->arch.vtd.agaw), 0, 0); - } - --static int __init intel_iommu_quarantine_init(struct domain *d) -+static int fill_qpt(struct dma_pte *this, unsigned int level, -+ struct page_info *pgs[6]) - { -- struct domain_iommu *hd = dom_iommu(d); -+ struct domain_iommu *hd = dom_iommu(dom_io); -+ unsigned int i; -+ int rc = 0; -+ -+ for ( i = 0; !rc && i < PTE_NUM; ++i ) -+ { -+ struct dma_pte *pte = &this[i], *next; -+ -+ if ( !dma_pte_present(*pte) ) -+ { -+ if ( !pgs[level] ) -+ { -+ /* -+ * The pgtable allocator is fine for the leaf page, as well as -+ * page table pages, and the resulting allocations are always -+ * zeroed. -+ */ -+ pgs[level] = iommu_alloc_pgtable(hd); -+ if ( !pgs[level] ) -+ { -+ rc = -ENOMEM; -+ break; -+ } -+ -+ if ( level ) -+ { -+ next = map_vtd_domain_page(page_to_maddr(pgs[level])); -+ rc = fill_qpt(next, level - 1, pgs); -+ unmap_vtd_domain_page(next); -+ } -+ } -+ -+ dma_set_pte_addr(*pte, page_to_maddr(pgs[level])); -+ dma_set_pte_readable(*pte); -+ dma_set_pte_writable(*pte); -+ } -+ else if ( level && !dma_pte_superpage(*pte) ) -+ { -+ next = map_vtd_domain_page(dma_pte_addr(*pte)); -+ rc = fill_qpt(next, level - 1, pgs); -+ unmap_vtd_domain_page(next); -+ } -+ } -+ -+ return rc; -+} -+ -+static int intel_iommu_quarantine_init(struct pci_dev *pdev, bool scratch_page) -+{ -+ struct domain_iommu *hd = dom_iommu(dom_io); - struct page_info *pg; -- struct dma_pte *parent; - unsigned int agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); - unsigned int level = agaw_to_level(agaw); -- int rc = 0; -+ const struct acpi_drhd_unit *drhd; -+ const struct acpi_rmrr_unit *rmrr; -+ unsigned int i, bdf; -+ bool rmrr_found = false; -+ int rc; - -- spin_lock(&hd->arch.mapping_lock); -+ ASSERT(pcidevs_locked()); -+ ASSERT(!hd->arch.vtd.pgd_maddr); -+ ASSERT(page_list_empty(&hd->arch.pgtables.list)); - -- if ( hd->arch.vtd.pgd_maddr ) -+ if ( pdev->arch.vtd.pgd_maddr ) - { -- ASSERT_UNREACHABLE(); -- goto out; -+ clear_domain_page(pdev->arch.leaf_mfn); -+ return 0; - } - -- pg = iommu_alloc_pgtable(hd); -+ drhd = acpi_find_matched_drhd_unit(pdev); -+ if ( !drhd ) -+ return -ENODEV; - -- rc = -ENOMEM; -+ pg = iommu_alloc_pgtable(hd); - if ( !pg ) -- goto out; -+ return -ENOMEM; - -+ rc = context_set_domain_id(NULL, pdev->arch.pseudo_domid, drhd->iommu); -+ -+ /* Transiently install the root into DomIO, for iommu_identity_mapping(). */ - hd->arch.vtd.pgd_maddr = page_to_maddr(pg); - -- parent = map_vtd_domain_page(hd->arch.vtd.pgd_maddr); -- while ( level ) -+ for_each_rmrr_device ( rmrr, bdf, i ) - { -- uint64_t maddr; -- unsigned int offset; -- -- /* -- * The pgtable allocator is fine for the leaf page, as well as -- * page table pages, and the resulting allocations are always -- * zeroed. -- */ -- pg = iommu_alloc_pgtable(hd); -- -- if ( !pg ) -- goto out; -+ if ( rc ) -+ break; - -- maddr = page_to_maddr(pg); -- for ( offset = 0; offset < PTE_NUM; offset++ ) -+ if ( rmrr->segment == pdev->seg && bdf == pdev->sbdf.bdf ) - { -- struct dma_pte *pte = &parent[offset]; -+ rmrr_found = true; - -- dma_set_pte_addr(*pte, maddr); -- dma_set_pte_readable(*pte); -+ rc = iommu_identity_mapping(dom_io, p2m_access_rw, -+ rmrr->base_address, rmrr->end_address, -+ 0); -+ if ( rc ) -+ printk(XENLOG_ERR VTDPREFIX -+ "%pp: RMRR quarantine mapping failed\n", -+ &pdev->sbdf); - } -- iommu_sync_cache(parent, PAGE_SIZE); -+ } - -- unmap_vtd_domain_page(parent); -- parent = map_vtd_domain_page(maddr); -- level--; -+ iommu_identity_map_teardown(dom_io); -+ hd->arch.vtd.pgd_maddr = 0; -+ pdev->arch.vtd.pgd_maddr = page_to_maddr(pg); -+ -+ if ( !rc && scratch_page ) -+ { -+ struct dma_pte *root; -+ struct page_info *pgs[6] = {}; -+ -+ spin_lock(&hd->arch.mapping_lock); -+ -+ root = map_vtd_domain_page(pdev->arch.vtd.pgd_maddr); -+ rc = fill_qpt(root, level - 1, pgs); -+ unmap_vtd_domain_page(root); -+ -+ pdev->arch.leaf_mfn = page_to_mfn(pgs[0]); -+ -+ spin_unlock(&hd->arch.mapping_lock); - } -- unmap_vtd_domain_page(parent); - -- rc = 0; -+ page_list_move(&pdev->arch.pgtables_list, &hd->arch.pgtables.list); - -- out: -- spin_unlock(&hd->arch.mapping_lock); -+ if ( rc || (!scratch_page && !rmrr_found) ) -+ quarantine_teardown(pdev, drhd); - -- /* Pages may be leaked in failure case */ - return rc; - } - ---- a/xen/drivers/passthrough/vtd/iommu.h -+++ b/xen/drivers/passthrough/vtd/iommu.h -@@ -482,7 +482,7 @@ struct vtd_iommu { - u32 nr_pt_levels; - u64 cap; - u64 ecap; -- spinlock_t lock; /* protect context, domain ids */ -+ spinlock_t lock; /* protect context */ - spinlock_t register_lock; /* protect iommu register handling */ - u64 root_maddr; /* root entry machine address */ - nodeid_t node; ---- a/xen/include/xen/iommu.h -+++ b/xen/include/xen/iommu.h -@@ -233,7 +233,7 @@ typedef int iommu_grdm_t(xen_pfn_t start - struct iommu_ops { - int (*init)(struct domain *d); - void (*hwdom_init)(struct domain *d); -- int (*quarantine_init)(struct domain *d); -+ int (*quarantine_init)(device_t *dev, bool scratch_page); - int (*add_device)(u8 devfn, device_t *dev); - int (*enable_device)(device_t *dev); - int (*remove_device)(u8 devfn, device_t *dev); -@@ -350,6 +350,7 @@ int __must_check iommu_suspend(void); - void iommu_resume(void); - void iommu_crash_shutdown(void); - int iommu_get_reserved_device_memory(iommu_grdm_t *, void *); -+int iommu_quarantine_dev_init(device_t *dev); - - #ifdef CONFIG_HAS_PCI - int iommu_do_pci_domctl(struct xen_domctl *, struct domain *d,