diff --git a/.gitignore b/.gitignore index e0e02a4..7c63a9b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ lwip-1.3.0.tar.gz pciutils-2.2.9.tar.bz2 zlib-1.2.3.tar.gz polarssl-1.1.4-gpl.tgz -/xen-4.17.2.tar.gz +/xen-4.18.0.tar.gz diff --git a/sources b/sources index 730c9d1..1aaa8a6 100644 --- a/sources +++ b/sources @@ -4,4 +4,4 @@ SHA512 (newlib-1.16.0.tar.gz) = 40eb96bbc6736a16b6399e0cdb73e853d0d90b685c967e77 SHA512 (zlib-1.2.3.tar.gz) = 021b958fcd0d346c4ba761bcf0cc40f3522de6186cf5a0a6ea34a70504ce9622b1c2626fce40675bc8282cf5f5ade18473656abc38050f72f5d6480507a2106e SHA512 (polarssl-1.1.4-gpl.tgz) = 88da614e4d3f4409c4fd3bb3e44c7587ba051e3fed4e33d526069a67e8180212e1ea22da984656f50e290049f60ddca65383e5983c0f8884f648d71f698303ad SHA512 (pciutils-2.2.9.tar.bz2) = 2b3d98d027e46d8c08037366dde6f0781ca03c610ef2b380984639e4ef39899ed8d8b8e4cd9c9dc54df101279b95879bd66bfd4d04ad07fef41e847ea7ae32b5 -SHA512 (xen-4.17.2.tar.gz) = 0bc475483676e4aa27735695f9a8d2821059e7a55984adb8a29badb5c09a4e7cf8ea29cbc9691be616cc0d7a5ee6b6dacc59ba29c2b16e0919ebdf7dfc54201a +SHA512 (xen-4.18.0.tar.gz) = 4cc9fd155144045a173c5f8ecc45f149817f1034eec618cb6f8b0494ef2fb5b95c4c60cf0bf4bec4bef8a622c35b6a3cb7dedc38e6d95e726f1611c73ddb3273 diff --git a/xen.canonicalize.patch b/xen.canonicalize.patch index f3ae37d..45fa724 100644 --- a/xen.canonicalize.patch +++ b/xen.canonicalize.patch @@ -1,45 +1,54 @@ ---- xen-4.9.0-rc1.2/tools/xenstore/xenstored_watch.c.orig 2017-04-12 16:18:57.000000000 +0100 -+++ xen-4.9.0-rc1.2/tools/xenstore/xenstored_watch.c 2017-04-13 21:17:12.255231094 +0100 -@@ -215,7 +215,7 @@ - goto inval; - } else { - *relative = !strstarts(*path, "/"); -- *path = canonicalize(conn, ctx, *path); -+ *path = xenstore_canonicalize(conn, ctx, *path); - if (!*path) - return errno; - if (!is_valid_nodename(*path)) -@@ -305,7 +305,7 @@ +--- xen-4.18.0-rc1/tools/xenstored/watch.c.orig 2023-09-29 09:09:29.000000000 +0100 ++++ xen-4.18.0-rc1/tools/xenstored/watch.c 2023-10-02 16:12:14.971264769 +0100 +@@ -164,7 +164,7 @@ + const char **path, bool *relative) + { + *relative = !strstarts(*path, "/") && !strstarts(*path, "@"); +- *path = canonicalize(conn, ctx, *path, true); ++ *path = xenstore_canonicalize(conn, ctx, *path, true); + + return *path ? 0 : errno; + } +@@ -250,7 +250,7 @@ if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec)) return EINVAL; -- node = canonicalize(conn, ctx, vec[0]); -+ node = xenstore_canonicalize(conn, ctx, vec[0]); +- node = canonicalize(conn, ctx, vec[0], true); ++ node = xenstore_canonicalize(conn, ctx, vec[0], true); if (!node) - return ENOMEM; + return errno; list_for_each_entry(watch, &conn->watches, list) { ---- xen-4.9.0-rc1.2/tools/xenstore/xenstored_core.c.orig 2017-04-12 16:18:57.000000000 +0100 -+++ xen-4.9.0-rc1.2/tools/xenstore/xenstored_core.c 2017-04-13 21:19:35.668429881 +0100 -@@ -777,7 +777,7 @@ +--- xen-4.18.0-rc1/tools/xenstored/core.c.orig 2023-09-29 09:09:29.000000000 +0100 ++++ xen-4.18.0-rc1/tools/xenstored/core.c 2023-10-02 16:12:14.993264626 +0100 +@@ -1249,7 +1249,7 @@ return strings; } --char *canonicalize(struct connection *conn, const void *ctx, const char *node) -+char *xenstore_canonicalize(struct connection *conn, const void *ctx, const char *node) +-const char *canonicalize(struct connection *conn, const void *ctx, ++const char *xenstore_canonicalize(struct connection *conn, const void *ctx, + const char *node, bool allow_special) { - const char *prefix; - -@@ -874,7 +874,7 @@ + const char *name; +@@ -1303,7 +1303,7 @@ + { + struct node *node; - if (!canonical_name) - canonical_name = &tmp_name; -- *canonical_name = canonicalize(conn, ctx, name); -+ *canonical_name = xenstore_canonicalize(conn, ctx, name); +- *canonical_name = canonicalize(conn, ctx, name, allow_special); ++ *canonical_name = xenstore_canonicalize(conn, ctx, name, allow_special); if (!*canonical_name) return NULL; - return get_node(conn, ctx, *canonical_name, perm); ---- xen-4.8.0/tools/console/testsuite/console-dom0.c.orig 2016-12-05 12:03:27.000000000 +0000 -+++ xen-4.8.0/tools/console/testsuite/console-dom0.c 2017-02-26 21:52:24.554678631 +0000 + +@@ -1320,7 +1320,7 @@ + const char *tmp_name; + const struct node *node; + +- tmp_name = canonicalize(conn, ctx, name, allow_special); ++ tmp_name = xenstore_canonicalize(conn, ctx, name, allow_special); + if (!tmp_name) + return NULL; + +--- xen-4.18.0-rc1/tools/console/testsuite/console-dom0.c.orig 2023-09-29 09:09:29.000000000 +0100 ++++ xen-4.18.0-rc1/tools/console/testsuite/console-dom0.c 2023-10-02 16:12:15.001264574 +0100 @@ -18,7 +18,7 @@ } } @@ -76,8 +85,8 @@ fprintf(stderr, "%s", line); } while (strcmp(line, "Okay.\n") != 0); ---- xen-4.8.0/tools/console/testsuite/console-domU.c.orig 2016-12-05 12:03:27.000000000 +0000 -+++ xen-4.8.0/tools/console/testsuite/console-domU.c 2017-02-26 21:52:50.320622804 +0000 +--- xen-4.18.0-rc1/tools/console/testsuite/console-domU.c.orig 2023-09-29 09:09:29.000000000 +0100 ++++ xen-4.18.0-rc1/tools/console/testsuite/console-domU.c 2023-10-02 16:12:15.008264528 +0100 @@ -6,7 +6,7 @@ #include #include @@ -96,14 +105,14 @@ seed = strtoul(line, 0, 0); printf("Seed Okay.\n"); fflush(stdout); ---- xen-4.14.1/tools/xenstore/xenstored_core.h.orig 2020-12-17 16:47:25.000000000 +0000 -+++ xen-4.14.1/tools/xenstore/xenstored_core.h 2020-12-17 20:13:10.806887309 +0000 -@@ -153,7 +153,7 @@ +--- xen-4.18.0-rc1/tools/xenstored/core.h.orig 2023-09-29 09:09:29.000000000 +0100 ++++ xen-4.18.0-rc1/tools/xenstored/core.h 2023-10-02 16:12:15.015264482 +0100 +@@ -240,7 +240,7 @@ void send_ack(struct connection *conn, enum xsd_sockmsg_type type); /* Canonicalize this path if possible. */ --char *canonicalize(struct connection *conn, const void *ctx, const char *node); -+char *xenstore_canonicalize(struct connection *conn, const void *ctx, const char *node); +-const char *canonicalize(struct connection *conn, const void *ctx, ++const char *xenstore_canonicalize(struct connection *conn, const void *ctx, + const char *node, bool allow_special); /* Get access permissions. */ - unsigned int perm_for_conn(struct connection *conn, diff --git a/xen.ocaml5.fixes.patch b/xen.ocaml5.fixes.patch index 2191334..bdffd4c 100644 --- a/xen.ocaml5.fixes.patch +++ b/xen.ocaml5.fixes.patch @@ -13,36 +13,6 @@ VERSION := 4.1 ---- xen-4.17.1/tools/ocaml/libs/xentoollog/xentoollog_stubs.c.orig 2023-04-27 06:53:19.000000000 -0600 -+++ xen-4.17.1/tools/ocaml/libs/xentoollog/xentoollog_stubs.c 2023-06-20 09:04:30.375263358 -0600 -@@ -33,7 +33,7 @@ - - /* The following is equal to the CAMLreturn macro, but without the return */ - #define CAMLdone do{ \ --caml_local_roots = caml__frame; \ -+CAML_LOCAL_ROOTS = caml__frame; \ - }while (0) - - #define XTL ((xentoollog_logger *) Xtl_val(handle)) ---- xen-4.17.1/tools/ocaml/libs/xl/xenlight_stubs.c.orig 2023-04-27 06:53:19.000000000 -0600 -+++ xen-4.17.1/tools/ocaml/libs/xl/xenlight_stubs.c 2023-06-20 09:34:09.550454354 -0600 -@@ -43,14 +43,14 @@ - #ifndef CAMLreturnT - #define CAMLreturnT(type, result) do { \ - type caml__temp_result = (result); \ -- caml_local_roots = caml__frame; \ -+ CAML_LOCAL_ROOTS = caml__frame; \ - return (caml__temp_result); \ - } while (0) - #endif - - /* The following is equal to the CAMLreturn macro, but without the return */ - #define CAMLdone do{ \ --caml_local_roots = caml__frame; \ -+CAML_LOCAL_ROOTS = caml__frame; \ - }while (0) - - #define Ctx_val(x)(*((libxl_ctx **) Data_custom_val(x))) --- xen-4.17.1/tools/ocaml/Makefile.rules.orig 2023-04-27 06:53:19.000000000 -0600 +++ xen-4.17.1/tools/ocaml/Makefile.rules 2023-06-20 10:00:58.769235173 -0600 @@ -59,9 +59,9 @@ quiet-command = $(if $(V),$1,@printf " % @@ -60,11 +30,11 @@ --- xen-4.17.1/tools/ocaml/xenstored/disk.ml.orig 2023-04-27 06:53:19.000000000 -0600 +++ xen-4.17.1/tools/ocaml/xenstored/disk.ml 2023-06-20 09:49:44.361963710 -0600 @@ -30,7 +30,7 @@ let undec c = - | _ -> raise (Failure "undecify") + | _ -> raise (Failure "undecify") let unhex c = -- let c = Char.lowercase c in -+ let c = Char.lowercase_ascii c in - match c with - | '0' .. '9' -> (Char.code c) - (Char.code '0') - | 'a' .. 'f' -> (Char.code c) - (Char.code 'a') + 10 +- let c = Char.lowercase c in ++ let c = Char.lowercase_ascii c in + match c with + | '0' .. '9' -> (Char.code c) - (Char.code '0') + | 'a' .. 'f' -> (Char.code c) - (Char.code 'a') + 10 diff --git a/xen.spec b/xen.spec index 1845570..07bb3d2 100644 --- a/xen.spec +++ b/xen.spec @@ -50,12 +50,12 @@ %endif # Hypervisor ABI -%define hv_abi 4.17 +%define hv_abi 4.18 Summary: Xen is a virtual machine monitor Name: xen -Version: 4.17.2 -Release: 5%{?dist} +Version: 4.18.0 +Release: 1%{?dist} License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz @@ -112,35 +112,6 @@ Patch46: xen.efi.build.patch Patch47: xen.gcc13.fixes.patch Patch49: xen.python3.12.patch Patch50: xen.ocaml5.fixes.patch -Patch51: xsa437.patch -Patch52: xsa438-4.17.patch -Patch53: xsa439-0001-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch -Patch54: xsa439-0002-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch -Patch55: xsa439-0003-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch -Patch56: xsa439-0004-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch -Patch57: xsa439-0005-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch -Patch58: xsa439-0006-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch -Patch59: xsa439-0007-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch -Patch60: xsa439-0008-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch -Patch61: xsa439-0009-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch -Patch62: xsa439-0010-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch -Patch63: xsa440-4.17.patch -Patch64: xsa442-4.17.patch -Patch65: xsa443-4.17-01.patch -Patch66: xsa443-4.17-02.patch -Patch67: xsa443-4.17-03.patch -Patch68: xsa443-4.17-04.patch -Patch69: xsa443-4.17-05.patch -Patch70: xsa443-4.17-06.patch -Patch71: xsa443-4.17-07.patch -Patch72: xsa443-4.17-08.patch -Patch73: xsa443-4.17-09.patch -Patch74: xsa443-4.17-10.patch -Patch75: xsa443-4.17-11.patch -Patch76: xsa444-4.17-1.patch -Patch77: xsa444-4.17-2.patch -Patch78: xsa445-4.17.patch -Patch79: xsa446.patch %if %build_qemutrad @@ -357,35 +328,6 @@ manage Xen virtual machines. %if "%dist" != ".fc38" %patch 50 -p1 %endif -%patch 51 -p1 -%patch 52 -p1 -%patch 53 -p1 -%patch 54 -p1 -%patch 55 -p1 -%patch 56 -p1 -%patch 57 -p1 -%patch 58 -p1 -%patch 59 -p1 -%patch 60 -p1 -%patch 61 -p1 -%patch 62 -p1 -%patch 63 -p1 -%patch 64 -p1 -%patch 65 -p1 -%patch 66 -p1 -%patch 67 -p1 -%patch 68 -p1 -%patch 69 -p1 -%patch 70 -p1 -%patch 71 -p1 -%patch 72 -p1 -%patch 73 -p1 -%patch 74 -p1 -%patch 75 -p1 -%patch 76 -p1 -%patch 77 -p1 -%patch 78 -p1 -%patch 79 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -737,7 +679,6 @@ fi #files -f xen-xm.lang %files %doc COPYING README -%{_bindir}/xencons %{python3_sitearch}/%{name} %{python3_sitearch}/xen-*.egg-info @@ -993,6 +934,13 @@ fi %endif %changelog +* Wed Nov 29 2023 Michael Young - 4.18.0-1 +- update to xen-4.18.0 + rebase xen.canonicalize.patch and xen.ocaml5.fixes.patch + remove or adjust patches now included or superceded upstream +- xencons has been dropped + + * Tue Nov 14 2023 Michael Young - 4.17.2-5 - x86/AMD: mismatch in IOMMU quarantine page table levels [XSA-445, CVE-2023-46835] diff --git a/xsa437.patch b/xsa437.patch deleted file mode 100644 index 18c9f8f..0000000 --- a/xsa437.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 7fac5971340a13ca9458195305bcfe14df2e52d2 Mon Sep 17 00:00:00 2001 -From: Stefano Stabellini -Date: Thu, 17 Aug 2023 13:41:35 +0100 -Subject: [PATCH] xen/arm: page: Handle cache flush of an element at the top of - the address space - -The region that needs to be cleaned/invalidated may be at the top -of the address space. This means that 'end' (i.e. 'p + size') will -be 0 and therefore nothing will be cleaned/invalidated as the check -in the loop will always be false. - -On Arm64, we only support we only support up to 48-bit Virtual -address space. So this is not a concern there. However, for 32-bit, -the mapcache is using the last 2GB of the address space. Therefore -we may not clean/invalidate properly some pages. This could lead -to memory corruption or data leakage (the scrubbed value may -still sit in the cache when the guest could read directly the memory -and therefore read the old content). - -Rework invalidate_dcache_va_range(), clean_dcache_va_range(), -clean_and_invalidate_dcache_va_range() to handle a cache flush -with an element at the top of the address space. - -This is CVE-2023-34321 / XSA-437. - -Reported-by: Julien Grall -Signed-off-by: Stefano Stabellini -Signed-off-by: Julien Grall -Acked-by: Bertrand Marquis - ---- - xen/arch/arm/include/asm/page.h | 33 ++++++++++++++++++++------------- - 1 file changed, 20 insertions(+), 13 deletions(-) - -diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h -index e7cd62190c7f..d7fe770a5e49 100644 ---- a/xen/arch/arm/include/asm/page.h -+++ b/xen/arch/arm/include/asm/page.h -@@ -160,26 +160,25 @@ static inline size_t read_dcache_line_bytes(void) - - static inline int invalidate_dcache_va_range(const void *p, unsigned long size) - { -- const void *end = p + size; - size_t cacheline_mask = dcache_line_bytes - 1; - - dsb(sy); /* So the CPU issues all writes to the range */ - - if ( (uintptr_t)p & cacheline_mask ) - { -+ size -= dcache_line_bytes - ((uintptr_t)p & cacheline_mask); - p = (void *)((uintptr_t)p & ~cacheline_mask); - asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p)); - p += dcache_line_bytes; - } -- if ( (uintptr_t)end & cacheline_mask ) -- { -- end = (void *)((uintptr_t)end & ~cacheline_mask); -- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (end)); -- } - -- for ( ; p < end; p += dcache_line_bytes ) -+ for ( ; size >= dcache_line_bytes; -+ p += dcache_line_bytes, size -= dcache_line_bytes ) - asm volatile (__invalidate_dcache_one(0) : : "r" (p)); - -+ if ( size > 0 ) -+ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p)); -+ - dsb(sy); /* So we know the flushes happen before continuing */ - - return 0; -@@ -187,10 +186,14 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size) - - static inline int clean_dcache_va_range(const void *p, unsigned long size) - { -- const void *end = p + size; -+ size_t cacheline_mask = dcache_line_bytes - 1; -+ - dsb(sy); /* So the CPU issues all writes to the range */ -- p = (void *)((uintptr_t)p & ~(dcache_line_bytes - 1)); -- for ( ; p < end; p += dcache_line_bytes ) -+ size += (uintptr_t)p & cacheline_mask; -+ size = (size + cacheline_mask) & ~cacheline_mask; -+ p = (void *)((uintptr_t)p & ~cacheline_mask); -+ for ( ; size >= dcache_line_bytes; -+ p += dcache_line_bytes, size -= dcache_line_bytes ) - asm volatile (__clean_dcache_one(0) : : "r" (p)); - dsb(sy); /* So we know the flushes happen before continuing */ - /* ARM callers assume that dcache_* functions cannot fail. */ -@@ -200,10 +203,14 @@ static inline int clean_dcache_va_range(const void *p, unsigned long size) - static inline int clean_and_invalidate_dcache_va_range - (const void *p, unsigned long size) - { -- const void *end = p + size; -+ size_t cacheline_mask = dcache_line_bytes - 1; -+ - dsb(sy); /* So the CPU issues all writes to the range */ -- p = (void *)((uintptr_t)p & ~(dcache_line_bytes - 1)); -- for ( ; p < end; p += dcache_line_bytes ) -+ size += (uintptr_t)p & cacheline_mask; -+ size = (size + cacheline_mask) & ~cacheline_mask; -+ p = (void *)((uintptr_t)p & ~cacheline_mask); -+ for ( ; size >= dcache_line_bytes; -+ p += dcache_line_bytes, size -= dcache_line_bytes ) - asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p)); - dsb(sy); /* So we know the flushes happen before continuing */ - /* ARM callers assume that dcache_* functions cannot fail. */ --- -2.40.1 - diff --git a/xsa438-4.17.patch b/xsa438-4.17.patch deleted file mode 100644 index 12d6ec7..0000000 --- a/xsa438-4.17.patch +++ /dev/null @@ -1,416 +0,0 @@ -From: Jan Beulich -Subject: x86/shadow: defer releasing of PV's top-level shadow reference - -sh_set_toplevel_shadow() re-pinning the top-level shadow we may be -running on is not enough (and at the same time unnecessary when the -shadow isn't what we're running on): That shadow becomes eligible for -blowing away (from e.g. shadow_prealloc()) immediately after the -paging lock was dropped. Yet it needs to remain valid until the actual -page table switch occurred. - -Propagate up the call chain the shadow entry that needs releasing -eventually, and carry out the release immediately after switching page -tables. Handle update_cr3() failures by switching to idle pagetables. -Note that various further uses of update_cr3() are HVM-only or only act -on paused vCPU-s, in which case sh_set_toplevel_shadow() will not defer -releasing of the reference. - -While changing the update_cr3() hook, also convert the "do_locking" -parameter to boolean. - -This is CVE-2023-34322 / XSA-438. - -Reported-by: Tim Deegan -Signed-off-by: Jan Beulich -Reviewed-by: George Dunlap - ---- a/xen/arch/x86/include/asm/mm.h -+++ b/xen/arch/x86/include/asm/mm.h -@@ -552,7 +552,7 @@ void audit_domains(void); - #endif - - void make_cr3(struct vcpu *v, mfn_t mfn); --void update_cr3(struct vcpu *v); -+pagetable_t update_cr3(struct vcpu *v); - int vcpu_destroy_pagetables(struct vcpu *); - void *do_page_walk(struct vcpu *v, unsigned long addr); - ---- a/xen/arch/x86/include/asm/paging.h -+++ b/xen/arch/x86/include/asm/paging.h -@@ -138,7 +138,7 @@ struct paging_mode { - paddr_t ga, uint32_t *pfec, - unsigned int *page_order); - #endif -- void (*update_cr3 )(struct vcpu *v, int do_locking, -+ pagetable_t (*update_cr3 )(struct vcpu *v, bool do_locking, - bool noflush); - void (*update_paging_modes )(struct vcpu *v); - bool (*flush_tlb )(const unsigned long *vcpu_bitmap); -@@ -310,9 +310,9 @@ static inline unsigned long paging_ga_to - /* Update all the things that are derived from the guest's CR3. - * Called when the guest changes CR3; the caller can then use v->arch.cr3 - * as the value to load into the host CR3 to schedule this vcpu */ --static inline void paging_update_cr3(struct vcpu *v, bool noflush) -+static inline pagetable_t paging_update_cr3(struct vcpu *v, bool noflush) - { -- paging_get_hostmode(v)->update_cr3(v, 1, noflush); -+ return paging_get_hostmode(v)->update_cr3(v, 1, noflush); - } - - /* Update all the things that are derived from the guest's CR0/CR3/CR4. ---- a/xen/arch/x86/include/asm/shadow.h -+++ b/xen/arch/x86/include/asm/shadow.h -@@ -99,6 +99,9 @@ int shadow_set_allocation(struct domain - - int shadow_get_allocation_bytes(struct domain *d, uint64_t *size); - -+/* Helper to invoke for deferred releasing of a top-level shadow's reference. */ -+void shadow_put_top_level(struct domain *d, pagetable_t old); -+ - #else /* !CONFIG_SHADOW_PAGING */ - - #define shadow_vcpu_teardown(v) ASSERT(is_pv_vcpu(v)) -@@ -121,6 +124,11 @@ static inline void shadow_prepare_page_t - - static inline void shadow_blow_tables_per_domain(struct domain *d) {} - -+static inline void shadow_put_top_level(struct domain *d, pagetable_t old) -+{ -+ ASSERT_UNREACHABLE(); -+} -+ - static inline int shadow_domctl(struct domain *d, - struct xen_domctl_shadow_op *sc, - XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) ---- a/xen/arch/x86/mm/hap/hap.c -+++ b/xen/arch/x86/mm/hap/hap.c -@@ -739,11 +739,13 @@ static bool cf_check hap_invlpg(struct v - return 1; - } - --static void cf_check hap_update_cr3( -- struct vcpu *v, int do_locking, bool noflush) -+static pagetable_t cf_check hap_update_cr3( -+ struct vcpu *v, bool do_locking, bool noflush) - { - v->arch.hvm.hw_cr[3] = v->arch.hvm.guest_cr[3]; - hvm_update_guest_cr3(v, noflush); -+ -+ return pagetable_null(); - } - - static bool flush_vcpu(const struct vcpu *v, const unsigned long *vcpu_bitmap) ---- a/xen/arch/x86/mm/shadow/common.c -+++ b/xen/arch/x86/mm/shadow/common.c -@@ -2590,13 +2590,13 @@ void cf_check shadow_update_paging_modes - } - - /* Set up the top-level shadow and install it in slot 'slot' of shadow_table */ --void sh_set_toplevel_shadow(struct vcpu *v, -- unsigned int slot, -- mfn_t gmfn, -- unsigned int root_type, -- mfn_t (*make_shadow)(struct vcpu *v, -- mfn_t gmfn, -- uint32_t shadow_type)) -+pagetable_t sh_set_toplevel_shadow(struct vcpu *v, -+ unsigned int slot, -+ mfn_t gmfn, -+ unsigned int root_type, -+ mfn_t (*make_shadow)(struct vcpu *v, -+ mfn_t gmfn, -+ uint32_t shadow_type)) - { - mfn_t smfn; - pagetable_t old_entry, new_entry; -@@ -2653,20 +2653,37 @@ void sh_set_toplevel_shadow(struct vcpu - mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry))); - v->arch.paging.shadow.shadow_table[slot] = new_entry; - -- /* Decrement the refcount of the old contents of this slot */ -- if ( !pagetable_is_null(old_entry) ) -+ /* -+ * Decrement the refcount of the old contents of this slot, unless -+ * we're still running on that shadow - in that case it'll need holding -+ * on to until the actual page table switch did occur. -+ */ -+ if ( !pagetable_is_null(old_entry) && (v != current || !is_pv_domain(d)) ) - { -- mfn_t old_smfn = pagetable_get_mfn(old_entry); -- /* Need to repin the old toplevel shadow if it's been unpinned -- * by shadow_prealloc(): in PV mode we're still running on this -- * shadow and it's not safe to free it yet. */ -- if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(d, old_smfn) ) -- { -- printk(XENLOG_G_ERR "can't re-pin %"PRI_mfn"\n", mfn_x(old_smfn)); -- domain_crash(d); -- } -- sh_put_ref(d, old_smfn, 0); -+ sh_put_ref(d, pagetable_get_mfn(old_entry), 0); -+ old_entry = pagetable_null(); - } -+ -+ /* -+ * 2- and 3-level shadow mode is used for HVM only. Therefore we never run -+ * on such a shadow, so only call sites requesting an L4 shadow need to pay -+ * attention to the returned value. -+ */ -+ ASSERT(pagetable_is_null(old_entry) || root_type == SH_type_l4_64_shadow); -+ -+ return old_entry; -+} -+ -+/* -+ * Helper invoked when releasing of a top-level shadow's reference was -+ * deferred in sh_set_toplevel_shadow() above. -+ */ -+void shadow_put_top_level(struct domain *d, pagetable_t old_entry) -+{ -+ ASSERT(!pagetable_is_null(old_entry)); -+ paging_lock(d); -+ sh_put_ref(d, pagetable_get_mfn(old_entry), 0); -+ paging_unlock(d); - } - - /**************************************************************************/ ---- a/xen/arch/x86/mm/shadow/multi.c -+++ b/xen/arch/x86/mm/shadow/multi.c -@@ -3224,7 +3224,8 @@ static void cf_check sh_detach_old_table - } - } - --static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush) -+static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking, -+ bool noflush) - /* Updates vcpu->arch.cr3 after the guest has changed CR3. - * Paravirtual guests should set v->arch.guest_table (and guest_table_user, - * if appropriate). -@@ -3238,6 +3239,7 @@ static void cf_check sh_update_cr3(struc - { - struct domain *d = v->domain; - mfn_t gmfn; -+ pagetable_t old_entry = pagetable_null(); - #if GUEST_PAGING_LEVELS == 3 - const guest_l3e_t *gl3e; - unsigned int i, guest_idx; -@@ -3247,7 +3249,7 @@ static void cf_check sh_update_cr3(struc - if ( !is_hvm_domain(d) && !v->is_initialised ) - { - ASSERT(v->arch.cr3 == 0); -- return; -+ return old_entry; - } - - if ( do_locking ) paging_lock(v->domain); -@@ -3320,11 +3322,12 @@ static void cf_check sh_update_cr3(struc - #if GUEST_PAGING_LEVELS == 4 - if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 ) - guest_flush_tlb_mask(d, d->dirty_cpumask); -- sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow, sh_make_shadow); -+ old_entry = sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow, -+ sh_make_shadow); - if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) ) - { - ASSERT(d->is_dying || d->is_shutting_down); -- return; -+ return old_entry; - } - if ( !shadow_mode_external(d) && !is_pv_32bit_domain(d) ) - { -@@ -3368,24 +3371,30 @@ static void cf_check sh_update_cr3(struc - gl2gfn = guest_l3e_get_gfn(gl3e[i]); - gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt); - if ( p2m_is_ram(p2mt) ) -- sh_set_toplevel_shadow(v, i, gl2mfn, SH_type_l2_shadow, -- sh_make_shadow); -+ old_entry = sh_set_toplevel_shadow(v, i, gl2mfn, -+ SH_type_l2_shadow, -+ sh_make_shadow); - else -- sh_set_toplevel_shadow(v, i, INVALID_MFN, 0, -- sh_make_shadow); -+ old_entry = sh_set_toplevel_shadow(v, i, INVALID_MFN, 0, -+ sh_make_shadow); - } - else -- sh_set_toplevel_shadow(v, i, INVALID_MFN, 0, sh_make_shadow); -+ old_entry = sh_set_toplevel_shadow(v, i, INVALID_MFN, 0, -+ sh_make_shadow); -+ -+ ASSERT(pagetable_is_null(old_entry)); - } - } - #elif GUEST_PAGING_LEVELS == 2 - if ( sh_remove_write_access(d, gmfn, 2, 0) != 0 ) - guest_flush_tlb_mask(d, d->dirty_cpumask); -- sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow, sh_make_shadow); -+ old_entry = sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow, -+ sh_make_shadow); -+ ASSERT(pagetable_is_null(old_entry)); - if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) ) - { - ASSERT(d->is_dying || d->is_shutting_down); -- return; -+ return old_entry; - } - #else - #error This should never happen -@@ -3473,6 +3482,8 @@ static void cf_check sh_update_cr3(struc - - /* Release the lock, if we took it (otherwise it's the caller's problem) */ - if ( do_locking ) paging_unlock(v->domain); -+ -+ return old_entry; - } - - ---- a/xen/arch/x86/mm/shadow/none.c -+++ b/xen/arch/x86/mm/shadow/none.c -@@ -52,9 +52,11 @@ static unsigned long cf_check _gva_to_gf - } - #endif - --static void cf_check _update_cr3(struct vcpu *v, int do_locking, bool noflush) -+static pagetable_t cf_check _update_cr3(struct vcpu *v, bool do_locking, -+ bool noflush) - { - ASSERT_UNREACHABLE(); -+ return pagetable_null(); - } - - static void cf_check _update_paging_modes(struct vcpu *v) ---- a/xen/arch/x86/mm/shadow/private.h -+++ b/xen/arch/x86/mm/shadow/private.h -@@ -391,13 +391,13 @@ mfn_t shadow_alloc(struct domain *d, - void shadow_free(struct domain *d, mfn_t smfn); - - /* Set up the top-level shadow and install it in slot 'slot' of shadow_table */ --void sh_set_toplevel_shadow(struct vcpu *v, -- unsigned int slot, -- mfn_t gmfn, -- unsigned int root_type, -- mfn_t (*make_shadow)(struct vcpu *v, -- mfn_t gmfn, -- uint32_t shadow_type)); -+pagetable_t sh_set_toplevel_shadow(struct vcpu *v, -+ unsigned int slot, -+ mfn_t gmfn, -+ unsigned int root_type, -+ mfn_t (*make_shadow)(struct vcpu *v, -+ mfn_t gmfn, -+ uint32_t shadow_type)); - - /* Update the shadows in response to a pagetable write from Xen */ - int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size); ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -567,15 +567,12 @@ void write_ptbase(struct vcpu *v) - * - * Update ref counts to shadow tables appropriately. - */ --void update_cr3(struct vcpu *v) -+pagetable_t update_cr3(struct vcpu *v) - { - mfn_t cr3_mfn; - - if ( paging_mode_enabled(v->domain) ) -- { -- paging_update_cr3(v, false); -- return; -- } -+ return paging_update_cr3(v, false); - - if ( !(v->arch.flags & TF_kernel_mode) ) - cr3_mfn = pagetable_get_mfn(v->arch.guest_table_user); -@@ -583,6 +580,8 @@ void update_cr3(struct vcpu *v) - cr3_mfn = pagetable_get_mfn(v->arch.guest_table); - - make_cr3(v, cr3_mfn); -+ -+ return pagetable_null(); - } - - static inline void set_tlbflush_timestamp(struct page_info *page) -@@ -3285,6 +3284,7 @@ int new_guest_cr3(mfn_t mfn) - struct domain *d = curr->domain; - int rc; - mfn_t old_base_mfn; -+ pagetable_t old_shadow; - - if ( is_pv_32bit_domain(d) ) - { -@@ -3352,9 +3352,22 @@ int new_guest_cr3(mfn_t mfn) - if ( !VM_ASSIST(d, m2p_strict) ) - fill_ro_mpt(mfn); - curr->arch.guest_table = pagetable_from_mfn(mfn); -- update_cr3(curr); -+ old_shadow = update_cr3(curr); -+ -+ /* -+ * In shadow mode update_cr3() can fail, in which case here we're still -+ * running on the prior top-level shadow (which we're about to release). -+ * Switch to the idle page tables in such an event; the guest will have -+ * been crashed already. -+ */ -+ if ( likely(!mfn_eq(pagetable_get_mfn(old_shadow), -+ maddr_to_mfn(curr->arch.cr3 & ~X86_CR3_NOFLUSH))) ) -+ write_ptbase(curr); -+ else -+ write_ptbase(idle_vcpu[curr->processor]); - -- write_ptbase(curr); -+ if ( !pagetable_is_null(old_shadow) ) -+ shadow_put_top_level(d, old_shadow); - - if ( likely(mfn_x(old_base_mfn) != 0) ) - { ---- a/xen/arch/x86/pv/domain.c -+++ b/xen/arch/x86/pv/domain.c -@@ -424,10 +424,13 @@ bool __init xpti_pcid_enabled(void) - - static void _toggle_guest_pt(struct vcpu *v) - { -+ bool guest_update; -+ pagetable_t old_shadow; - unsigned long cr3; - - v->arch.flags ^= TF_kernel_mode; -- update_cr3(v); -+ guest_update = v->arch.flags & TF_kernel_mode; -+ old_shadow = update_cr3(v); - - /* - * Don't flush user global mappings from the TLB. Don't tick TLB clock. -@@ -436,13 +439,31 @@ static void _toggle_guest_pt(struct vcpu - * TLB flush (for just the incoming PCID), as the top level page table may - * have changed behind our backs. To be on the safe side, suppress the - * no-flush unconditionally in this case. -+ * -+ * Furthermore in shadow mode update_cr3() can fail, in which case here -+ * we're still running on the prior top-level shadow (which we're about -+ * to release). Switch to the idle page tables in such an event; the -+ * guest will have been crashed already. - */ - cr3 = v->arch.cr3; - if ( shadow_mode_enabled(v->domain) ) -+ { - cr3 &= ~X86_CR3_NOFLUSH; -+ -+ if ( unlikely(mfn_eq(pagetable_get_mfn(old_shadow), -+ maddr_to_mfn(cr3))) ) -+ { -+ cr3 = idle_vcpu[v->processor]->arch.cr3; -+ /* Also suppress runstate/time area updates below. */ -+ guest_update = false; -+ } -+ } - write_cr3(cr3); - -- if ( !(v->arch.flags & TF_kernel_mode) ) -+ if ( !pagetable_is_null(old_shadow) ) -+ shadow_put_top_level(v->domain, old_shadow); -+ -+ if ( !guest_update ) - return; - - if ( v->arch.pv.need_update_runstate_area && update_runstate_area(v) ) diff --git a/xsa439-0001-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch b/xsa439-0001-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch deleted file mode 100644 index 96e56ec..0000000 --- a/xsa439-0001-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch +++ /dev/null @@ -1,49 +0,0 @@ -From d2d2dcae879c6cc05227c9620f0a772f35fe6886 Mon Sep 17 00:00:00 2001 -Message-ID: -From: Jan Beulich -Date: Wed, 23 Aug 2023 09:26:36 +0200 -Subject: [XEN PATCH 01/10] x86/AMD: extend Zenbleed check to models "good" - ucode isn't known for - -Reportedly the AMD Custom APU 0405 found on SteamDeck, models 0x90 and -0x91, (quoting the respective Linux commit) is similarly affected. Put -another instance of our Zen1 vs Zen2 distinction checks in -amd_check_zenbleed(), forcing use of the chickenbit irrespective of -ucode version (building upon real hardware never surfacing a version of -0xffffffff). - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -(cherry picked from commit 145a69c0944ac70cfcf9d247c85dee9e99d9d302) ---- - xen/arch/x86/cpu/amd.c | 13 ++++++++++--- - 1 file changed, 10 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index 3ea214fc2e..1bb3044be1 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -909,10 +909,17 @@ void amd_check_zenbleed(void) - case 0xa0 ... 0xaf: good_rev = 0x08a00008; break; - default: - /* -- * With the Fam17h check above, parts getting here are Zen1. -- * They're not affected. -+ * With the Fam17h check above, most parts getting here are -+ * Zen1. They're not affected. Assume Zen2 ones making it -+ * here are affected regardless of microcode version. -+ * -+ * Zen1 vs Zen2 isn't a simple model number comparison, so use -+ * STIBP as a heuristic to distinguish. - */ -- return; -+ if (!boot_cpu_has(X86_FEATURE_AMD_STIBP)) -+ return; -+ good_rev = ~0U; -+ break; - } - - rdmsrl(MSR_AMD64_DE_CFG, val); --- -2.41.0 - diff --git a/xsa439-0002-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch b/xsa439-0002-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch deleted file mode 100644 index 8b8e30a..0000000 --- a/xsa439-0002-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch +++ /dev/null @@ -1,77 +0,0 @@ -From dc28aba565f226f9bec24cfde993e78478acfb4e Mon Sep 17 00:00:00 2001 -Message-ID: -In-Reply-To: -References: -From: Andrew Cooper -Date: Tue, 12 Sep 2023 15:06:49 +0100 -Subject: [XEN PATCH 02/10] x86/spec-ctrl: Fix confusion between - SPEC_CTRL_EXIT_TO_XEN{,_IST} - -c/s 3fffaf9c13e9 ("x86/entry: Avoid using alternatives in NMI/#MC paths") -dropped the only user, leaving behind the (incorrect) implication that Xen had -split exit paths. - -Delete the unused SPEC_CTRL_EXIT_TO_XEN and rename SPEC_CTRL_EXIT_TO_XEN_IST -to SPEC_CTRL_EXIT_TO_XEN for consistency. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 1c18d73774533a55ba9d1cbee8bdace03efdb5e7) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 10 ++-------- - xen/arch/x86/x86_64/entry.S | 2 +- - 2 files changed, 3 insertions(+), 9 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index f23bb105c5..e8fd01243c 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -79,7 +79,6 @@ - * - SPEC_CTRL_ENTRY_FROM_PV - * - SPEC_CTRL_ENTRY_FROM_INTR - * - SPEC_CTRL_ENTRY_FROM_INTR_IST -- * - SPEC_CTRL_EXIT_TO_XEN_IST - * - SPEC_CTRL_EXIT_TO_XEN - * - SPEC_CTRL_EXIT_TO_PV - * -@@ -268,11 +267,6 @@ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ - X86_FEATURE_SC_MSR_PV - --/* Use when exiting to Xen context. */ --#define SPEC_CTRL_EXIT_TO_XEN \ -- ALTERNATIVE "", \ -- DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR_PV -- - /* Use when exiting to PV guest context. */ - #define SPEC_CTRL_EXIT_TO_PV \ - ALTERNATIVE "", \ -@@ -339,8 +333,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - UNLIKELY_END(\@_serialise) - .endm - --/* Use when exiting to Xen in IST context. */ --.macro SPEC_CTRL_EXIT_TO_XEN_IST -+/* Use when exiting to Xen context. */ -+.macro SPEC_CTRL_EXIT_TO_XEN - /* - * Requires %rbx=stack_end - * Clobbers %rax, %rcx, %rdx -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 7675a59ff0..b45a09823a 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -673,7 +673,7 @@ UNLIKELY_START(ne, exit_cr3) - UNLIKELY_END(exit_cr3) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -- SPEC_CTRL_EXIT_TO_XEN_IST /* Req: %rbx=end, Clob: acd */ -+ SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */ - - RESTORE_ALL adj=8 - iretq --- -2.41.0 - diff --git a/xsa439-0003-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch b/xsa439-0003-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch deleted file mode 100644 index 547032e..0000000 --- a/xsa439-0003-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 84690fb82c4f4aecb72a6789d8994efa74841e09 Mon Sep 17 00:00:00 2001 -Message-ID: <84690fb82c4f4aecb72a6789d8994efa74841e09.1695733540.git.m.a.young@durham.ac.uk> -In-Reply-To: -References: -From: Andrew Cooper -Date: Tue, 12 Sep 2023 17:03:16 +0100 -Subject: [XEN PATCH 03/10] x86/spec-ctrl: Fold DO_SPEC_CTRL_EXIT_TO_XEN into - it's single user - -With the SPEC_CTRL_EXIT_TO_XEN{,_IST} confusion fixed, it's now obvious that -there's only a single EXIT_TO_XEN path. Fold DO_SPEC_CTRL_EXIT_TO_XEN into -SPEC_CTRL_EXIT_TO_XEN to simplify further fixes. - -When merging labels, switch the name to .L\@_skip_sc_msr as "skip" on its own -is going to be too generic shortly. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 694bb0f280fd08a4377e36e32b84b5062def4de2) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 40 ++++++++++-------------- - 1 file changed, 16 insertions(+), 24 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index e8fd01243c..d5f65d80ea 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -211,27 +211,6 @@ - wrmsr - .endm - --.macro DO_SPEC_CTRL_EXIT_TO_XEN --/* -- * Requires %rbx=stack_end -- * Clobbers %rax, %rcx, %rdx -- * -- * When returning to Xen context, look to see whether SPEC_CTRL shadowing is -- * in effect, and reload the shadow value. This covers race conditions which -- * exist with an NMI/MCE/etc hitting late in the return-to-guest path. -- */ -- xor %edx, %edx -- -- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) -- jz .L\@_skip -- -- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax -- mov $MSR_SPEC_CTRL, %ecx -- wrmsr -- --.L\@_skip: --.endm -- - .macro DO_SPEC_CTRL_EXIT_TO_GUEST - /* - * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo -@@ -340,11 +319,24 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - * Clobbers %rax, %rcx, %rdx - */ - testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) -- jz .L\@_skip -+ jz .L\@_skip_sc_msr - -- DO_SPEC_CTRL_EXIT_TO_XEN -+ /* -+ * When returning to Xen context, look to see whether SPEC_CTRL shadowing -+ * is in effect, and reload the shadow value. This covers race conditions -+ * which exist with an NMI/MCE/etc hitting late in the return-to-guest -+ * path. -+ */ -+ xor %edx, %edx - --.L\@_skip: -+ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) -+ jz .L\@_skip_sc_msr -+ -+ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax -+ mov $MSR_SPEC_CTRL, %ecx -+ wrmsr -+ -+.L\@_skip_sc_msr: - .endm - - #endif /* __ASSEMBLY__ */ --- -2.41.0 - diff --git a/xsa439-0004-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch b/xsa439-0004-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch deleted file mode 100644 index 3350750..0000000 --- a/xsa439-0004-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 3952c73bdbd05f0e666986fce633a591237b3c88 Mon Sep 17 00:00:00 2001 -Message-ID: <3952c73bdbd05f0e666986fce633a591237b3c88.1695733540.git.m.a.young@durham.ac.uk> -In-Reply-To: -References: -From: Andrew Cooper -Date: Fri, 1 Sep 2023 11:38:44 +0100 -Subject: [XEN PATCH 04/10] x86/spec-ctrl: Turn the remaining - SPEC_CTRL_{ENTRY,EXIT}_* into asm macros - -These have grown more complex over time, with some already having been -converted. - -Provide full Requires/Clobbers comments, otherwise missing at this level of -indirection. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 7125429aafb9e3c9c88fc93001fc2300e0ac2cc8) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 37 ++++++++++++++++++------ - 1 file changed, 28 insertions(+), 9 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index d5f65d80ea..c6d5f2ad01 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -231,26 +231,45 @@ - .endm - - /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ --#define SPEC_CTRL_ENTRY_FROM_PV \ -+.macro SPEC_CTRL_ENTRY_FROM_PV -+/* -+ * Requires %rsp=regs/cpuinfo, %rdx=0 -+ * Clobbers %rax, %rcx, %rdx -+ */ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \ -- X86_FEATURE_IBPB_ENTRY_PV; \ -- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ -+ X86_FEATURE_IBPB_ENTRY_PV -+ -+ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV -+ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ - X86_FEATURE_SC_MSR_PV -+.endm - - /* Use in interrupt/exception context. May interrupt Xen or PV context. */ --#define SPEC_CTRL_ENTRY_FROM_INTR \ -+.macro SPEC_CTRL_ENTRY_FROM_INTR -+/* -+ * Requires %rsp=regs, %r14=stack_end, %rdx=0 -+ * Clobbers %rax, %rcx, %rdx -+ */ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \ -- X86_FEATURE_IBPB_ENTRY_PV; \ -- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ -+ X86_FEATURE_IBPB_ENTRY_PV -+ -+ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV -+ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ - X86_FEATURE_SC_MSR_PV -+.endm - - /* Use when exiting to PV guest context. */ --#define SPEC_CTRL_EXIT_TO_PV \ -- ALTERNATIVE "", \ -- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV; \ -+.macro SPEC_CTRL_EXIT_TO_PV -+/* -+ * Requires %rax=spec_ctrl, %rsp=regs/info -+ * Clobbers %rcx, %rdx -+ */ -+ ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV -+ - DO_SPEC_CTRL_COND_VERW -+.endm - - /* - * Use in IST interrupt/exception context. May interrupt Xen or PV context. --- -2.41.0 - diff --git a/xsa439-0005-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch b/xsa439-0005-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch deleted file mode 100644 index dda088a..0000000 --- a/xsa439-0005-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch +++ /dev/null @@ -1,109 +0,0 @@ -From ba023e93d0b1e60b80251bf080bab694efb9f8e3 Mon Sep 17 00:00:00 2001 -Message-ID: -In-Reply-To: -References: -From: Andrew Cooper -Date: Wed, 30 Aug 2023 20:11:50 +0100 -Subject: [XEN PATCH 05/10] x86/spec-ctrl: Improve all SPEC_CTRL_{ENTER,EXIT}_* - comments - -... to better explain how they're used. - -Doing so highlights that SPEC_CTRL_EXIT_TO_XEN is missing a VERW flush for the -corner case when e.g. an NMI hits late in an exit-to-guest path. - -Leave a TODO, which will be addressed in subsequent patches which arrange for -VERW flushing to be safe within SPEC_CTRL_EXIT_TO_XEN. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 45f00557350dc7d0756551069803fc49c29184ca) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 36 ++++++++++++++++++++---- - 1 file changed, 31 insertions(+), 5 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index c6d5f2ad01..97c4db31cd 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -230,7 +230,10 @@ - wrmsr - .endm - --/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ -+/* -+ * Used after an entry from PV context: SYSCALL, SYSENTER, INT, -+ * etc. There is always a guest speculation state in context. -+ */ - .macro SPEC_CTRL_ENTRY_FROM_PV - /* - * Requires %rsp=regs/cpuinfo, %rdx=0 -@@ -245,7 +248,11 @@ - X86_FEATURE_SC_MSR_PV - .endm - --/* Use in interrupt/exception context. May interrupt Xen or PV context. */ -+/* -+ * Used after an exception or maskable interrupt, hitting Xen or PV context. -+ * There will either be a guest speculation context, or (barring fatal -+ * exceptions) a well-formed Xen speculation context. -+ */ - .macro SPEC_CTRL_ENTRY_FROM_INTR - /* - * Requires %rsp=regs, %r14=stack_end, %rdx=0 -@@ -260,7 +267,10 @@ - X86_FEATURE_SC_MSR_PV - .endm - --/* Use when exiting to PV guest context. */ -+/* -+ * Used when exiting from any entry context, back to PV context. This -+ * includes from an IST entry which moved onto the primary stack. -+ */ - .macro SPEC_CTRL_EXIT_TO_PV - /* - * Requires %rax=spec_ctrl, %rsp=regs/info -@@ -272,7 +282,13 @@ - .endm - - /* -- * Use in IST interrupt/exception context. May interrupt Xen or PV context. -+ * Used after an IST entry hitting Xen or PV context. Special care is needed, -+ * because when hitting Xen context, there may not be a well-formed -+ * speculation context. (i.e. it can hit in the middle of -+ * SPEC_CTRL_{ENTRY,EXIT}_* regions.) -+ * -+ * An IST entry which hits PV context moves onto the primary stack and leaves -+ * via SPEC_CTRL_EXIT_TO_PV, *not* SPEC_CTRL_EXIT_TO_XEN. - */ - .macro SPEC_CTRL_ENTRY_FROM_INTR_IST - /* -@@ -331,7 +347,14 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - UNLIKELY_END(\@_serialise) - .endm - --/* Use when exiting to Xen context. */ -+/* -+ * Use when exiting from any entry context, back to Xen context. This -+ * includes returning to other SPEC_CTRL_{ENTRY,EXIT}_* regions with an -+ * incomplete speculation context. -+ * -+ * Because we might have interrupted Xen beyond SPEC_CTRL_EXIT_TO_$GUEST, we -+ * need to treat this as if it were an EXIT_TO_$GUEST case too. -+ */ - .macro SPEC_CTRL_EXIT_TO_XEN - /* - * Requires %rbx=stack_end -@@ -356,6 +379,9 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - wrmsr - - .L\@_skip_sc_msr: -+ -+ /* TODO VERW */ -+ - .endm - - #endif /* __ASSEMBLY__ */ --- -2.41.0 - diff --git a/xsa439-0006-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch b/xsa439-0006-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch deleted file mode 100644 index e44998e..0000000 --- a/xsa439-0006-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 5f7efd47c8273fde972637d0360851802f76eca9 Mon Sep 17 00:00:00 2001 -Message-ID: <5f7efd47c8273fde972637d0360851802f76eca9.1695733540.git.m.a.young@durham.ac.uk> -In-Reply-To: -References: -From: Andrew Cooper -Date: Wed, 13 Sep 2023 13:48:16 +0100 -Subject: [XEN PATCH 06/10] x86/entry: Adjust restore_all_xen to hold stack_end - in %r14 - -All other SPEC_CTRL_{ENTRY,EXIT}_* helpers hold stack_end in %r14. Adjust it -for consistency. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 7aa28849a1155d856e214e9a80a7e65fffdc3e58) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 8 ++++---- - xen/arch/x86/x86_64/entry.S | 8 ++++---- - 2 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 97c4db31cd..66c706496f 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -357,10 +357,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - */ - .macro SPEC_CTRL_EXIT_TO_XEN - /* -- * Requires %rbx=stack_end -+ * Requires %r14=stack_end - * Clobbers %rax, %rcx, %rdx - */ -- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) -+ testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) - jz .L\@_skip_sc_msr - - /* -@@ -371,10 +371,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - */ - xor %edx, %edx - -- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) -+ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) - jz .L\@_skip_sc_msr - -- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax -+ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax - mov $MSR_SPEC_CTRL, %ecx - wrmsr - -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index b45a09823a..92279a225d 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -665,15 +665,15 @@ restore_all_xen: - * Check whether we need to switch to the per-CPU page tables, in - * case we return to late PV exit code (from an NMI or #MC). - */ -- GET_STACK_END(bx) -- cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx) -+ GET_STACK_END(14) -+ cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) - UNLIKELY_START(ne, exit_cr3) -- mov STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax -+ mov STACK_CPUINFO_FIELD(pv_cr3)(%r14), %rax - mov %rax, %cr3 - UNLIKELY_END(exit_cr3) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -- SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */ -+ SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */ - - RESTORE_ALL adj=8 - iretq --- -2.41.0 - diff --git a/xsa439-0007-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch b/xsa439-0007-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch deleted file mode 100644 index 2e36bcc..0000000 --- a/xsa439-0007-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch +++ /dev/null @@ -1,112 +0,0 @@ -From e4a71bc0da0baf7464bb0d8e33053f330e5ea366 Mon Sep 17 00:00:00 2001 -Message-ID: -In-Reply-To: -References: -From: Andrew Cooper -Date: Wed, 13 Sep 2023 12:20:12 +0100 -Subject: [XEN PATCH 07/10] x86/entry: Track the IST-ness of an entry for the - exit paths - -Use %r12 to hold an ist_exit boolean. This register is zero elsewhere in the -entry/exit asm, so it only needs setting in the IST path. - -As this is subtle and fragile, add check_ist_exit() to be used in debugging -builds to cross-check that the ist_exit boolean matches the entry vector. - -Write check_ist_exit() it in C, because it's debug only and the logic more -complicated than I care to maintain in asm. - -For now, we only need to use this signal in the exit-to-Xen path, but some -exit-to-guest paths happen in IST context too. Check the correctness in all -exit paths to avoid the logic bit-rotting. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 21bdc25b05a0f8ab6bc73520a9ca01327360732c) - -x86/entry: Partially revert IST-exit checks - -The patch adding check_ist_exit() didn't account for the fact that -reset_stack_and_jump() is not an ABI-preserving boundary. The IST-ness in -%r12 doesn't survive into the next context, and is a stale value C. - -This shows up in Gitlab CI for the Clang build: - - https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/5112783827 - -and in OSSTest for GCC 8: - - http://logs.test-lab.xenproject.org/osstest/logs/183045/test-amd64-amd64-xl-qemuu-debianhvm-amd64/serial-pinot0.log - -There's no straightforward way to reconstruct the IST-exit-ness on the -exit-to-guest path after a context switch. For now, we only need IST-exit on -the return-to-Xen path. - -Fixes: 21bdc25b05a0 ("x86/entry: Track the IST-ness of an entry for the exit paths") -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 9b57c800b79b96769ea3dcd6468578fa664d19f9) ---- - xen/arch/x86/traps.c | 13 +++++++++++++ - xen/arch/x86/x86_64/entry.S | 13 ++++++++++++- - 2 files changed, 25 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c -index d12004b1c6..e65cc60041 100644 ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -2315,6 +2315,19 @@ void asm_domain_crash_synchronous(unsigned long addr) - do_softirq(); - } - -+#ifdef CONFIG_DEBUG -+void check_ist_exit(const struct cpu_user_regs *regs, bool ist_exit) -+{ -+ const unsigned int ist_mask = -+ (1U << X86_EXC_NMI) | (1U << X86_EXC_DB) | -+ (1U << X86_EXC_DF) | (1U << X86_EXC_MC); -+ uint8_t ev = regs->entry_vector; -+ bool is_ist = (ev < TRAP_nr) && ((1U << ev) & ist_mask); -+ -+ ASSERT(is_ist == ist_exit); -+} -+#endif -+ - /* - * Local variables: - * mode: C -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 92279a225d..4cebc4fbe3 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -659,8 +659,15 @@ ENTRY(early_page_fault) - .section .text.entry, "ax", @progbits - - ALIGN --/* No special register assumptions. */ -+/* %r12=ist_exit */ - restore_all_xen: -+ -+#ifdef CONFIG_DEBUG -+ mov %rsp, %rdi -+ mov %r12, %rsi -+ call check_ist_exit -+#endif -+ - /* - * Check whether we need to switch to the per-CPU page tables, in - * case we return to late PV exit code (from an NMI or #MC). -@@ -1091,6 +1098,10 @@ handle_ist_exception: - .L_ist_dispatch_done: - mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - mov %bl, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) -+ -+ /* This is an IST exit */ -+ mov $1, %r12d -+ - cmpb $TRAP_nmi,UREGS_entry_vector(%rsp) - jne ret_from_intr - --- -2.41.0 - diff --git a/xsa439-0008-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch b/xsa439-0008-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch deleted file mode 100644 index 6e00ca6..0000000 --- a/xsa439-0008-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 2e2c3efcfc9f183674a8de6ed954ffbe7188b70d Mon Sep 17 00:00:00 2001 -Message-ID: <2e2c3efcfc9f183674a8de6ed954ffbe7188b70d.1695733540.git.m.a.young@durham.ac.uk> -In-Reply-To: -References: -From: Andrew Cooper -Date: Wed, 13 Sep 2023 13:53:33 +0100 -Subject: [XEN PATCH 08/10] x86/spec-ctrl: Issue VERW during IST exit to Xen - -There is a corner case where e.g. an NMI hitting an exit-to-guest path after -SPEC_CTRL_EXIT_TO_* would have run the entire NMI handler *after* the VERW -flush to scrub potentially sensitive data from uarch buffers. - -In order to compensate, issue VERW when exiting to Xen from an IST entry. - -SPEC_CTRL_EXIT_TO_XEN already has two reads of spec_ctrl_flags off the stack, -and we're about to add a third. Load the field into %ebx, and list the -register as clobbered. - -%r12 has been arranged to be the ist_exit signal, so add this as an input -dependency and use it to identify when to issue a VERW. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 3ee6066bcd737756b0990d417d94eddc0b0d2585) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 20 +++++++++++++++----- - xen/arch/x86/x86_64/entry.S | 2 +- - 2 files changed, 16 insertions(+), 6 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 66c706496f..28a75796e6 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -357,10 +357,12 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - */ - .macro SPEC_CTRL_EXIT_TO_XEN - /* -- * Requires %r14=stack_end -- * Clobbers %rax, %rcx, %rdx -+ * Requires %r12=ist_exit, %r14=stack_end -+ * Clobbers %rax, %rbx, %rcx, %rdx - */ -- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) -+ movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx -+ -+ testb $SCF_ist_sc_msr, %bl - jz .L\@_skip_sc_msr - - /* -@@ -371,7 +373,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - */ - xor %edx, %edx - -- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) -+ testb $SCF_use_shadow, %bl - jz .L\@_skip_sc_msr - - mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax -@@ -380,8 +382,16 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - - .L\@_skip_sc_msr: - -- /* TODO VERW */ -+ test %r12, %r12 -+ jz .L\@_skip_ist_exit -+ -+ /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */ -+ testb $SCF_verw, %bl -+ jz .L\@_skip_verw -+ verw STACK_CPUINFO_FIELD(verw_sel)(%r14) -+.L\@_skip_verw: - -+.L\@_skip_ist_exit: - .endm - - #endif /* __ASSEMBLY__ */ -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 4cebc4fbe3..c12e011b4d 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -680,7 +680,7 @@ UNLIKELY_START(ne, exit_cr3) - UNLIKELY_END(exit_cr3) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -- SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */ -+ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */ - - RESTORE_ALL adj=8 - iretq --- -2.41.0 - diff --git a/xsa439-0009-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch b/xsa439-0009-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch deleted file mode 100644 index 5f063b1..0000000 --- a/xsa439-0009-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 19ee1e1faa32b79274b3484cb1170a5970f1e602 Mon Sep 17 00:00:00 2001 -Message-ID: <19ee1e1faa32b79274b3484cb1170a5970f1e602.1695733540.git.m.a.young@durham.ac.uk> -In-Reply-To: -References: -From: Andrew Cooper -Date: Fri, 15 Sep 2023 12:13:51 +0100 -Subject: [XEN PATCH 09/10] x86/amd: Introduce is_zen{1,2}_uarch() predicates - -We already have 3 cases using STIBP as a Zen1/2 heuristic, and are about to -introduce a 4th. Wrap the heuristic into a pair of predicates rather than -opencoding it, and the explanation of the heuristic, at each usage site. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit de1d265001397f308c5c3c5d3ffc30e7ef8c0705) ---- - xen/arch/x86/cpu/amd.c | 18 ++++-------------- - xen/arch/x86/include/asm/amd.h | 11 +++++++++++ - 2 files changed, 15 insertions(+), 14 deletions(-) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index 1bb3044be1..e94ba5a0e0 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -855,15 +855,13 @@ void amd_set_legacy_ssbd(bool enable) - * non-branch instructions to be ignored. It is to be set unilaterally in - * newer microcode. - * -- * This chickenbit is something unrelated on Zen1, and Zen1 vs Zen2 isn't a -- * simple model number comparison, so use STIBP as a heuristic to separate the -- * two uarches in Fam17h(AMD)/18h(Hygon). -+ * This chickenbit is something unrelated on Zen1. - */ - void amd_init_spectral_chicken(void) - { - uint64_t val, chickenbit = 1 << 1; - -- if (cpu_has_hypervisor || !boot_cpu_has(X86_FEATURE_AMD_STIBP)) -+ if (cpu_has_hypervisor || !is_zen2_uarch()) - return; - - if (rdmsr_safe(MSR_AMD64_DE_CFG2, val) == 0 && !(val & chickenbit)) -@@ -912,11 +910,8 @@ void amd_check_zenbleed(void) - * With the Fam17h check above, most parts getting here are - * Zen1. They're not affected. Assume Zen2 ones making it - * here are affected regardless of microcode version. -- * -- * Zen1 vs Zen2 isn't a simple model number comparison, so use -- * STIBP as a heuristic to distinguish. - */ -- if (!boot_cpu_has(X86_FEATURE_AMD_STIBP)) -+ if (is_zen1_uarch()) - return; - good_rev = ~0U; - break; -@@ -1277,12 +1272,7 @@ static int __init cf_check zen2_c6_errata_check(void) - */ - s_time_t delta; - -- /* -- * Zen1 vs Zen2 isn't a simple model number comparison, so use STIBP as -- * a heuristic to separate the two uarches in Fam17h. -- */ -- if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || -- !boot_cpu_has(X86_FEATURE_AMD_STIBP)) -+ if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch()) - return 0; - - /* -diff --git a/xen/arch/x86/include/asm/amd.h b/xen/arch/x86/include/asm/amd.h -index a975d3de26..82324110ab 100644 ---- a/xen/arch/x86/include/asm/amd.h -+++ b/xen/arch/x86/include/asm/amd.h -@@ -140,6 +140,17 @@ - AMD_MODEL_RANGE(0x11, 0x0, 0x0, 0xff, 0xf), \ - AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0xff, 0xf)) - -+/* -+ * The Zen1 and Zen2 microarchitectures are implemented by AMD (Fam17h) and -+ * Hygon (Fam18h) but without simple model number rules. Instead, use STIBP -+ * as a heuristic that distinguishes the two. -+ * -+ * The caller is required to perform the appropriate vendor/family checks -+ * first. -+ */ -+#define is_zen1_uarch() (!boot_cpu_has(X86_FEATURE_AMD_STIBP)) -+#define is_zen2_uarch() boot_cpu_has(X86_FEATURE_AMD_STIBP) -+ - struct cpuinfo_x86; - int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...); - --- -2.41.0 - diff --git a/xsa439-0010-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch b/xsa439-0010-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch deleted file mode 100644 index 0dc6780..0000000 --- a/xsa439-0010-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch +++ /dev/null @@ -1,231 +0,0 @@ -From 9ac2f49f5fa3a5159409241d4f74fb0d721dd4c5 Mon Sep 17 00:00:00 2001 -Message-ID: <9ac2f49f5fa3a5159409241d4f74fb0d721dd4c5.1695733540.git.m.a.young@durham.ac.uk> -In-Reply-To: -References: -From: Andrew Cooper -Date: Wed, 30 Aug 2023 20:24:25 +0100 -Subject: [XEN PATCH 10/10] x86/spec-ctrl: Mitigate the Zen1 DIV leakage - -In the Zen1 microarchitecure, there is one divider in the pipeline which -services uops from both threads. In the case of #DE, the latched result from -the previous DIV to execute will be forwarded speculatively. - -This is an interesting covert channel that allows two threads to communicate -without any system calls. In also allows userspace to obtain the result of -the most recent DIV instruction executed (even speculatively) in the core, -which can be from a higher privilege context. - -Scrub the result from the divider by executing a non-faulting divide. This -needs performing on the exit-to-guest paths, and ist_exit-to-Xen. - -Alternatives in IST context is believed safe now that it's done in NMI -context. - -This is XSA-439 / CVE-2023-20588. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit b5926c6ecf05c28ee99c6248c42d691ccbf0c315) ---- - docs/misc/xen-command-line.pandoc | 6 ++- - xen/arch/x86/hvm/svm/entry.S | 1 + - xen/arch/x86/include/asm/cpufeatures.h | 2 +- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 17 +++++++++ - xen/arch/x86/spec_ctrl.c | 48 +++++++++++++++++++++++- - 5 files changed, 71 insertions(+), 3 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index d9dae740cc..b92c8f969c 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2315,7 +2315,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - > {msr-sc,rsb,md-clear,ibpb-entry}=|{pv,hvm}=, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, --> unpriv-mmio,gds-mit}= ]` -+> unpriv-mmio,gds-mit,div-scrub}= ]` - - Controls for speculative execution sidechannel mitigations. By default, Xen - will pick the most appropriate mitigations based on compiled in support, -@@ -2437,6 +2437,10 @@ has elected not to lock the configuration, Xen will use GDS_CTRL to mitigate - GDS with. Otherwise, Xen will mitigate by disabling AVX, which blocks the use - of the AVX2 Gather instructions. - -+On all hardware, the `div-scrub=` option can be used to force or prevent Xen -+from mitigating the DIV-leakage vulnerability. By default, Xen will mitigate -+DIV-leakage on hardware believed to be vulnerable. -+ - ### sync_console - > `= ` - -diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S -index 981cd82e7c..934f12cf5c 100644 ---- a/xen/arch/x86/hvm/svm/entry.S -+++ b/xen/arch/x86/hvm/svm/entry.S -@@ -74,6 +74,7 @@ __UNLIKELY_END(nsvm_hap) - 1: /* No Spectre v1 concerns. Execution will hit VMRUN imminently. */ - .endm - ALTERNATIVE "", svm_vmentry_spec_ctrl, X86_FEATURE_SC_MSR_HVM -+ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV - - pop %r15 - pop %r14 -diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h -index da0593de85..c3aad21c3b 100644 ---- a/xen/arch/x86/include/asm/cpufeatures.h -+++ b/xen/arch/x86/include/asm/cpufeatures.h -@@ -35,7 +35,7 @@ XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM - XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */ - XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */ - XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */ --/* Bits 23 unused. */ -+XEN_CPUFEATURE(SC_DIV, X86_SYNTH(23)) /* DIV scrub needed */ - XEN_CPUFEATURE(SC_RSB_IDLE, X86_SYNTH(24)) /* RSB overwrite needed for idle. */ - XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */ - XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */ -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 28a75796e6..f4b8b9d956 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -177,6 +177,19 @@ - .L\@_verw_skip: - .endm - -+.macro DO_SPEC_CTRL_DIV -+/* -+ * Requires nothing -+ * Clobbers %rax -+ * -+ * Issue a DIV for its flushing side effect (Zen1 uarch specific). Any -+ * non-faulting DIV will do; a byte DIV has least latency, and doesn't clobber -+ * %rdx. -+ */ -+ mov $1, %eax -+ div %al -+.endm -+ - .macro DO_SPEC_CTRL_ENTRY maybexen:req - /* - * Requires %rsp=regs (also cpuinfo if !maybexen) -@@ -279,6 +292,8 @@ - ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV - - DO_SPEC_CTRL_COND_VERW -+ -+ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV - .endm - - /* -@@ -391,6 +406,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - verw STACK_CPUINFO_FIELD(verw_sel)(%r14) - .L\@_skip_verw: - -+ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV -+ - .L\@_skip_ist_exit: - .endm - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 79b98f0fe7..0ff3c895ac 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -79,6 +79,7 @@ static int8_t __initdata opt_srb_lock = -1; - static bool __initdata opt_unpriv_mmio; - static bool __ro_after_init opt_fb_clear_mmio; - static int8_t __initdata opt_gds_mit = -1; -+static int8_t __initdata opt_div_scrub = -1; - - static int __init cf_check parse_spec_ctrl(const char *s) - { -@@ -133,6 +134,7 @@ static int __init cf_check parse_spec_ctrl(const char *s) - opt_srb_lock = 0; - opt_unpriv_mmio = false; - opt_gds_mit = 0; -+ opt_div_scrub = 0; - } - else if ( val > 0 ) - rc = -EINVAL; -@@ -285,6 +287,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) - opt_unpriv_mmio = val; - else if ( (val = parse_boolean("gds-mit", s, ss)) >= 0 ) - opt_gds_mit = val; -+ else if ( (val = parse_boolean("div-scrub", s, ss)) >= 0 ) -+ opt_div_scrub = val; - else - rc = -EINVAL; - -@@ -485,7 +489,7 @@ static void __init print_details(enum ind_thunk thunk) - "\n"); - - /* Settings for Xen's protection, irrespective of guests. */ -- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s\n", -+ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", - thunk == THUNK_NONE ? "N/A" : - thunk == THUNK_RETPOLINE ? "RETPOLINE" : - thunk == THUNK_LFENCE ? "LFENCE" : -@@ -510,6 +514,7 @@ static void __init print_details(enum ind_thunk thunk) - opt_l1d_flush ? " L1D_FLUSH" : "", - opt_md_clear_pv || opt_md_clear_hvm || - opt_fb_clear_mmio ? " VERW" : "", -+ opt_div_scrub ? " DIV" : "", - opt_branch_harden ? " BRANCH_HARDEN" : ""); - - /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */ -@@ -967,6 +972,45 @@ static void __init srso_calculations(bool hw_smt_enabled) - setup_force_cpu_cap(X86_FEATURE_SRSO_NO); - } - -+/* -+ * The Div leakage issue is specific to the AMD Zen1 microarchitecure. -+ * -+ * However, there's no $FOO_NO bit defined, so if we're virtualised we have no -+ * hope of spotting the case where we might move to vulnerable hardware. We -+ * also can't make any useful conclusion about SMT-ness. -+ * -+ * Don't check the hypervisor bit, so at least we do the safe thing when -+ * booting on something that looks like a Zen1 CPU. -+ */ -+static bool __init has_div_vuln(void) -+{ -+ if ( !(boot_cpu_data.x86_vendor & -+ (X86_VENDOR_AMD | X86_VENDOR_HYGON)) ) -+ return false; -+ -+ if ( boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18 ) -+ return false; -+ -+ return is_zen1_uarch(); -+} -+ -+static void __init div_calculations(bool hw_smt_enabled) -+{ -+ bool cpu_bug_div = has_div_vuln(); -+ -+ if ( opt_div_scrub == -1 ) -+ opt_div_scrub = cpu_bug_div; -+ -+ if ( opt_div_scrub ) -+ setup_force_cpu_cap(X86_FEATURE_SC_DIV); -+ -+ if ( opt_smt == -1 && !cpu_has_hypervisor && cpu_bug_div && hw_smt_enabled ) -+ warning_add( -+ "Booted on leaky-DIV hardware with SMT/Hyperthreading\n" -+ "enabled. Please assess your configuration and choose an\n" -+ "explicit 'smt=' setting. See XSA-439.\n"); -+} -+ - static void __init ibpb_calculations(void) - { - bool def_ibpb_entry = false; -@@ -1726,6 +1770,8 @@ void __init init_speculation_mitigations(void) - - ibpb_calculations(); - -+ div_calculations(hw_smt_enabled); -+ - /* Check whether Eager FPU should be enabled by default. */ - if ( opt_eager_fpu == -1 ) - opt_eager_fpu = should_use_eager_fpu(); --- -2.41.0 - diff --git a/xsa440-4.17.patch b/xsa440-4.17.patch deleted file mode 100644 index 4941afc..0000000 --- a/xsa440-4.17.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 5d8b3d1ec98e56155d9650d7f4a70cd8ba9dc27d Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Fri, 22 Sep 2023 11:32:16 +0100 -Subject: tools/xenstored: domain_entry_fix(): Handle conflicting transaction - -The function domain_entry_fix() will be initially called to check if the -quota is correct before attempt to commit any nodes. So it would be -possible that accounting is temporarily negative. This is the case -in the following sequence: - - 1) Create 50 nodes - 2) Start two transactions - 3) Delete all the nodes in each transaction - 4) Commit the two transactions - -Because the first transaction will have succeed and updated the -accounting, there is no guarantee that 'd->nbentry + num' will still -be above 0. So the assert() would be triggered. -The assert() was introduced in dbef1f748289 ("tools/xenstore: simplify -and fix per domain node accounting") with the assumption that the -value can't be negative. As this is not true revert to the original -check but restricted to the path where we don't update. Take the -opportunity to explain the rationale behind the check. - -This CVE-2023-34323 / XSA-440. - -Reported-by: Stanislav Uschakow -Fixes: dbef1f748289 ("tools/xenstore: simplify and fix per domain node accounting") -Signed-off-by: Julien Grall -Reviewed-by: Juergen Gross - -diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c -index aa86892fed9e..6074df210c6e 100644 ---- a/tools/xenstore/xenstored_domain.c -+++ b/tools/xenstore/xenstored_domain.c -@@ -1094,10 +1094,20 @@ int domain_entry_fix(unsigned int domid, int num, bool update) - } - - cnt = d->nbentry + num; -- assert(cnt >= 0); - -- if (update) -+ if (update) { -+ assert(cnt >= 0); - d->nbentry = cnt; -+ } else if (cnt < 0) { -+ /* -+ * In a transaction when a node is being added/removed AND -+ * the same node has been added/removed outside the -+ * transaction in parallel, the result value may be negative. -+ * This is no problem, as the transaction will fail due to -+ * the resulting conflict. So override 'cnt'. -+ */ -+ cnt = 0; -+ } - - return domid_is_unprivileged(domid) ? cnt : 0; - } diff --git a/xsa442-4.17.patch b/xsa442-4.17.patch deleted file mode 100644 index a78bfdd..0000000 --- a/xsa442-4.17.patch +++ /dev/null @@ -1,185 +0,0 @@ -From 5b2ccb60ff22fbff44dd66214c2956a434ee6271 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Tue, 13 Jun 2023 15:01:05 +0200 -Subject: [PATCH] iommu/amd-vi: flush IOMMU TLB when flushing the DTE -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The caching invalidation guidelines from the AMD-Vi specification (48882—Rev -3.07-PUB—Oct 2022) seem to be misleading on some hardware, as devices will -malfunction (see stale DMA mappings) if some fields of the DTE are updated but -the IOMMU TLB is not flushed. This has been observed in practice on AMD -systems. Due to the lack of guidance from the currently published -specification this patch aims to increase the flushing done in order to prevent -device malfunction. - -In order to fix, issue an INVALIDATE_IOMMU_PAGES command from -amd_iommu_flush_device(), flushing all the address space. Note this requires -callers to be adjusted in order to pass the DomID on the DTE previous to the -modification. - -Some call sites don't provide a valid DomID to amd_iommu_flush_device() in -order to avoid the flush. That's because the device had address translations -disabled and hence the previous DomID on the DTE is not valid. Note the -current logic relies on the entity disabling address translations to also flush -the TLB of the in use DomID. - -Device I/O TLB flushing when ATS are enabled is not covered by the current -change, as ATS usage is not security supported. - -This is XSA-442 / CVE-2023-34326 - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich ---- - xen/drivers/passthrough/amd/iommu.h | 3 ++- - xen/drivers/passthrough/amd/iommu_cmd.c | 10 +++++++++- - xen/drivers/passthrough/amd/iommu_guest.c | 5 +++-- - xen/drivers/passthrough/amd/iommu_init.c | 6 +++++- - xen/drivers/passthrough/amd/pci_amd_iommu.c | 14 ++++++++++---- - 5 files changed, 29 insertions(+), 9 deletions(-) - -diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h -index 5429ada58ef5..a58be28bf96d 100644 ---- a/xen/drivers/passthrough/amd/iommu.h -+++ b/xen/drivers/passthrough/amd/iommu.h -@@ -283,7 +283,8 @@ void amd_iommu_flush_pages(struct domain *d, unsigned long dfn, - unsigned int order); - void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev, - uint64_t gaddr, unsigned int order); --void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf); -+void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf, -+ domid_t domid); - void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf); - void amd_iommu_flush_all_caches(struct amd_iommu *iommu); - -diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c -index 40ddf366bb4d..cb28b36abc38 100644 ---- a/xen/drivers/passthrough/amd/iommu_cmd.c -+++ b/xen/drivers/passthrough/amd/iommu_cmd.c -@@ -363,10 +363,18 @@ void amd_iommu_flush_pages(struct domain *d, - _amd_iommu_flush_pages(d, __dfn_to_daddr(dfn), order); - } - --void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf) -+void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf, -+ domid_t domid) - { - invalidate_dev_table_entry(iommu, bdf); - flush_command_buffer(iommu, 0); -+ -+ /* Also invalidate IOMMU TLB entries when flushing the DTE. */ -+ if ( domid != DOMID_INVALID ) -+ { -+ invalidate_iommu_pages(iommu, INV_IOMMU_ALL_PAGES_ADDRESS, domid, 0); -+ flush_command_buffer(iommu, 0); -+ } - } - - void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf) -diff --git a/xen/drivers/passthrough/amd/iommu_guest.c b/xen/drivers/passthrough/amd/iommu_guest.c -index 80a331f546ed..be86bce6fb03 100644 ---- a/xen/drivers/passthrough/amd/iommu_guest.c -+++ b/xen/drivers/passthrough/amd/iommu_guest.c -@@ -385,7 +385,7 @@ static int do_completion_wait(struct domain *d, cmd_entry_t *cmd) - - static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd) - { -- uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id; -+ uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id, prev_domid; - struct amd_iommu_dte *gdte, *mdte, *dte_base; - struct amd_iommu *iommu = NULL; - struct guest_iommu *g_iommu; -@@ -445,13 +445,14 @@ static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd) - req_id = get_dma_requestor_id(iommu->seg, mbdf); - dte_base = iommu->dev_table.buffer; - mdte = &dte_base[req_id]; -+ prev_domid = mdte->domain_id; - - spin_lock_irqsave(&iommu->lock, flags); - dte_set_gcr3_table(mdte, hdom_id, gcr3_mfn << PAGE_SHIFT, gv, glx); - - spin_unlock_irqrestore(&iommu->lock, flags); - -- amd_iommu_flush_device(iommu, req_id); -+ amd_iommu_flush_device(iommu, req_id, prev_domid); - - return 0; - } -diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c -index 166570648d26..101a60ce1794 100644 ---- a/xen/drivers/passthrough/amd/iommu_init.c -+++ b/xen/drivers/passthrough/amd/iommu_init.c -@@ -1547,7 +1547,11 @@ static int cf_check _invalidate_all_devices( - req_id = ivrs_mappings[bdf].dte_requestor_id; - if ( iommu ) - { -- amd_iommu_flush_device(iommu, req_id); -+ /* -+ * IOMMU TLB flush performed separately (see -+ * invalidate_all_domain_pages()). -+ */ -+ amd_iommu_flush_device(iommu, req_id, DOMID_INVALID); - amd_iommu_flush_intremap(iommu, req_id); - } - } -diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c -index 94e37755064b..8641b84712a0 100644 ---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c -+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c -@@ -192,10 +192,13 @@ static int __must_check amd_iommu_setup_domain_device( - - spin_unlock_irqrestore(&iommu->lock, flags); - -- amd_iommu_flush_device(iommu, req_id); -+ /* DTE didn't have DMA translations enabled, do not flush the TLB. */ -+ amd_iommu_flush_device(iommu, req_id, DOMID_INVALID); - } - else if ( dte->pt_root != mfn_x(page_to_mfn(root_pg)) ) - { -+ domid_t prev_domid = dte->domain_id; -+ - /* - * Strictly speaking if the device is the only one with this requestor - * ID, it could be allowed to be re-assigned regardless of unity map -@@ -252,7 +255,7 @@ static int __must_check amd_iommu_setup_domain_device( - - spin_unlock_irqrestore(&iommu->lock, flags); - -- amd_iommu_flush_device(iommu, req_id); -+ amd_iommu_flush_device(iommu, req_id, prev_domid); - } - else - spin_unlock_irqrestore(&iommu->lock, flags); -@@ -421,6 +424,8 @@ static void amd_iommu_disable_domain_device(const struct domain *domain, - spin_lock_irqsave(&iommu->lock, flags); - if ( dte->tv || dte->v ) - { -+ domid_t prev_domid = dte->domain_id; -+ - /* See the comment in amd_iommu_setup_device_table(). */ - dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_ABORTED; - smp_wmb(); -@@ -439,7 +444,7 @@ static void amd_iommu_disable_domain_device(const struct domain *domain, - - spin_unlock_irqrestore(&iommu->lock, flags); - -- amd_iommu_flush_device(iommu, req_id); -+ amd_iommu_flush_device(iommu, req_id, prev_domid); - - AMD_IOMMU_DEBUG("Disable: device id = %#x, " - "domain = %d, paging mode = %d\n", -@@ -610,7 +615,8 @@ static int cf_check amd_iommu_add_device(u8 devfn, struct pci_dev *pdev) - - spin_unlock_irqrestore(&iommu->lock, flags); - -- amd_iommu_flush_device(iommu, bdf); -+ /* DTE didn't have DMA translations enabled, do not flush the TLB. */ -+ amd_iommu_flush_device(iommu, bdf, DOMID_INVALID); - } - - if ( amd_iommu_reserve_domain_unity_map( --- -2.42.0 - diff --git a/xsa443-4.17-01.patch b/xsa443-4.17-01.patch deleted file mode 100644 index d9ca3f8..0000000 --- a/xsa443-4.17-01.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 7e48562bf34e90f907491a0595782d2daa1ff3ad Mon Sep 17 00:00:00 2001 -From: Alejandro Vallejo -Date: Thu, 14 Sep 2023 13:22:50 +0100 -Subject: [PATCH 01/11] libfsimage/xfs: Remove dead code - -xfs_info.agnolog (and related code) and XFS_INO_AGBNO_BITS are dead code -that serve no purpose. - -This is part of XSA-443 / CVE-2023-34325 - -Signed-off-by: Alejandro Vallejo -Reviewed-by: Jan Beulich ---- - tools/libfsimage/xfs/fsys_xfs.c | 18 ------------------ - 1 file changed, 18 deletions(-) - -diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c -index d735a88e55f3..2800699f5985 100644 ---- a/tools/libfsimage/xfs/fsys_xfs.c -+++ b/tools/libfsimage/xfs/fsys_xfs.c -@@ -37,7 +37,6 @@ struct xfs_info { - int blklog; - int inopblog; - int agblklog; -- int agnolog; - unsigned int nextents; - xfs_daddr_t next; - xfs_daddr_t daddr; -@@ -65,9 +64,7 @@ static struct xfs_info xfs; - - #define XFS_INO_MASK(k) ((xfs_uint32_t)((1ULL << (k)) - 1)) - #define XFS_INO_OFFSET_BITS xfs.inopblog --#define XFS_INO_AGBNO_BITS xfs.agblklog - #define XFS_INO_AGINO_BITS (xfs.agblklog + xfs.inopblog) --#define XFS_INO_AGNO_BITS xfs.agnolog - - static inline xfs_agblock_t - agino2agbno (xfs_agino_t agino) -@@ -149,20 +146,6 @@ xt_len (xfs_bmbt_rec_32_t *r) - return le32(r->l3) & mask32lo(21); - } - --static inline int --xfs_highbit32(xfs_uint32_t v) --{ -- int i; -- -- if (--v) { -- for (i = 0; i < 31; i++, v >>= 1) { -- if (v == 0) -- return i; -- } -- } -- return 0; --} -- - static int - isinxt (xfs_fileoff_t key, xfs_fileoff_t offset, xfs_filblks_t len) - { -@@ -472,7 +455,6 @@ xfs_mount (fsi_file_t *ffi, const char *options) - - xfs.inopblog = super.sb_inopblog; - xfs.agblklog = super.sb_agblklog; -- xfs.agnolog = xfs_highbit32 (le32(super.sb_agcount)); - - xfs.btnode_ptr0_off = - ((xfs.bsize - sizeof(xfs_btree_block_t)) / --- -2.42.0 - diff --git a/xsa443-4.17-02.patch b/xsa443-4.17-02.patch deleted file mode 100644 index 0f2edaf..0000000 --- a/xsa443-4.17-02.patch +++ /dev/null @@ -1,32 +0,0 @@ -From c26327795b78c93f6fa6d5d46e34f59dc4046601 Mon Sep 17 00:00:00 2001 -From: Alejandro Vallejo -Date: Thu, 14 Sep 2023 13:22:51 +0100 -Subject: [PATCH 02/11] libfsimage/xfs: Amend mask32lo() to allow the value 32 - -agblklog could plausibly be 32, but that would overflow this shift. -Perform the shift as ULL and cast to u32 at the end instead. - -This is part of XSA-443 / CVE-2023-34325 - -Signed-off-by: Alejandro Vallejo -Acked-by: Jan Beulich ---- - tools/libfsimage/xfs/fsys_xfs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c -index 2800699f5985..4720bb4505c8 100644 ---- a/tools/libfsimage/xfs/fsys_xfs.c -+++ b/tools/libfsimage/xfs/fsys_xfs.c -@@ -60,7 +60,7 @@ static struct xfs_info xfs; - #define inode ((xfs_dinode_t *)((char *)FSYS_BUF + 8192)) - #define icore (inode->di_core) - --#define mask32lo(n) (((xfs_uint32_t)1 << (n)) - 1) -+#define mask32lo(n) ((xfs_uint32_t)((1ull << (n)) - 1)) - - #define XFS_INO_MASK(k) ((xfs_uint32_t)((1ULL << (k)) - 1)) - #define XFS_INO_OFFSET_BITS xfs.inopblog --- -2.42.0 - diff --git a/xsa443-4.17-03.patch b/xsa443-4.17-03.patch deleted file mode 100644 index b89721a..0000000 --- a/xsa443-4.17-03.patch +++ /dev/null @@ -1,137 +0,0 @@ -From 199f0538bbec052028679a55ea512437170854c9 Mon Sep 17 00:00:00 2001 -From: Alejandro Vallejo -Date: Thu, 14 Sep 2023 13:22:52 +0100 -Subject: [PATCH 03/11] libfsimage/xfs: Sanity-check the superblock during - mounts - -Sanity-check the XFS superblock for wellformedness at the mount handler. -This forces pygrub to abort parsing a potentially malformed filesystem and -ensures the invariants assumed throughout the rest of the code hold. - -Also, derive parameters from previously sanitized parameters where possible -(rather than reading them off the superblock) - -The code doesn't try to avoid overflowing the end of the disk, because -that's an unlikely and benign error. Parameters used in calculations of -xfs_daddr_t (like the root inode index) aren't in critical need of being -sanitized. - -The sanitization of agblklog is basically checking that no obvious -overflows happen on agblklog, and then ensuring agblocks is contained in -the range (2^(sb_agblklog-1), 2^sb_agblklog]. - -This is part of XSA-443 / CVE-2023-34325 - -Reported-by: Ferdinand Nölscher -Signed-off-by: Alejandro Vallejo -Reviewed-by: Jan Beulich ---- - tools/libfsimage/xfs/fsys_xfs.c | 48 ++++++++++++++++++++++++++------- - tools/libfsimage/xfs/xfs.h | 12 +++++++++ - 2 files changed, 50 insertions(+), 10 deletions(-) - -diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c -index 4720bb4505c8..e4eb7e1ee26f 100644 ---- a/tools/libfsimage/xfs/fsys_xfs.c -+++ b/tools/libfsimage/xfs/fsys_xfs.c -@@ -17,6 +17,7 @@ - * along with this program; If not, see . - */ - -+#include - #include - #include "xfs.h" - -@@ -433,29 +434,56 @@ first_dentry (fsi_file_t *ffi, xfs_ino_t *ino) - return next_dentry (ffi, ino); - } - -+static bool -+xfs_sb_is_invalid (const xfs_sb_t *super) -+{ -+ return (le32(super->sb_magicnum) != XFS_SB_MAGIC) -+ || ((le16(super->sb_versionnum) & XFS_SB_VERSION_NUMBITS) != -+ XFS_SB_VERSION_4) -+ || (super->sb_inodelog < XFS_SB_INODELOG_MIN) -+ || (super->sb_inodelog > XFS_SB_INODELOG_MAX) -+ || (super->sb_blocklog < XFS_SB_BLOCKLOG_MIN) -+ || (super->sb_blocklog > XFS_SB_BLOCKLOG_MAX) -+ || (super->sb_blocklog < super->sb_inodelog) -+ || (super->sb_agblklog > XFS_SB_AGBLKLOG_MAX) -+ || ((1ull << super->sb_agblklog) < le32(super->sb_agblocks)) -+ || (((1ull << super->sb_agblklog) >> 1) >= -+ le32(super->sb_agblocks)) -+ || ((super->sb_blocklog + super->sb_dirblklog) >= -+ XFS_SB_DIRBLK_NUMBITS); -+} -+ - static int - xfs_mount (fsi_file_t *ffi, const char *options) - { - xfs_sb_t super; - - if (!devread (ffi, 0, 0, sizeof(super), (char *)&super) -- || (le32(super.sb_magicnum) != XFS_SB_MAGIC) -- || ((le16(super.sb_versionnum) -- & XFS_SB_VERSION_NUMBITS) != XFS_SB_VERSION_4) ) { -+ || xfs_sb_is_invalid(&super)) { - return 0; - } - -- xfs.bsize = le32 (super.sb_blocksize); -- xfs.blklog = super.sb_blocklog; -- xfs.bdlog = xfs.blklog - SECTOR_BITS; -+ /* -+ * Not sanitized. It's exclusively used to generate disk addresses, -+ * so it's not important from a security standpoint. -+ */ - xfs.rootino = le64 (super.sb_rootino); -- xfs.isize = le16 (super.sb_inodesize); -- xfs.agblocks = le32 (super.sb_agblocks); -- xfs.dirbsize = xfs.bsize << super.sb_dirblklog; - -- xfs.inopblog = super.sb_inopblog; -+ /* -+ * Sanitized to be consistent with each other, only used to -+ * generate disk addresses, so it's safe -+ */ -+ xfs.agblocks = le32 (super.sb_agblocks); - xfs.agblklog = super.sb_agblklog; - -+ /* Derived from sanitized parameters */ -+ xfs.bsize = 1 << super.sb_blocklog; -+ xfs.blklog = super.sb_blocklog; -+ xfs.bdlog = super.sb_blocklog - SECTOR_BITS; -+ xfs.isize = 1 << super.sb_inodelog; -+ xfs.dirbsize = 1 << (super.sb_blocklog + super.sb_dirblklog); -+ xfs.inopblog = super.sb_blocklog - super.sb_inodelog; -+ - xfs.btnode_ptr0_off = - ((xfs.bsize - sizeof(xfs_btree_block_t)) / - (sizeof (xfs_bmbt_key_t) + sizeof (xfs_bmbt_ptr_t))) -diff --git a/tools/libfsimage/xfs/xfs.h b/tools/libfsimage/xfs/xfs.h -index 40699281e44d..b87e37d3d7e9 100644 ---- a/tools/libfsimage/xfs/xfs.h -+++ b/tools/libfsimage/xfs/xfs.h -@@ -134,6 +134,18 @@ typedef struct xfs_sb - xfs_uint8_t sb_dummy[7]; /* padding */ - } xfs_sb_t; - -+/* Bound taken from xfs.c in GRUB2. It doesn't exist in the spec */ -+#define XFS_SB_DIRBLK_NUMBITS 27 -+/* Implied by the XFS specification. The minimum block size is 512 octets */ -+#define XFS_SB_BLOCKLOG_MIN 9 -+/* Implied by the XFS specification. The maximum block size is 65536 octets */ -+#define XFS_SB_BLOCKLOG_MAX 16 -+/* Implied by the XFS specification. The minimum inode size is 256 octets */ -+#define XFS_SB_INODELOG_MIN 8 -+/* Implied by the XFS specification. The maximum inode size is 2048 octets */ -+#define XFS_SB_INODELOG_MAX 11 -+/* High bound for sb_agblklog */ -+#define XFS_SB_AGBLKLOG_MAX 32 - - /* those are from xfs_btree.h */ - --- -2.42.0 - diff --git a/xsa443-4.17-04.patch b/xsa443-4.17-04.patch deleted file mode 100644 index dde095e..0000000 --- a/xsa443-4.17-04.patch +++ /dev/null @@ -1,61 +0,0 @@ -From c66fd01277939634c624c8340838682d9d4fd839 Mon Sep 17 00:00:00 2001 -From: Alejandro Vallejo -Date: Thu, 14 Sep 2023 13:22:53 +0100 -Subject: [PATCH 04/11] libfsimage/xfs: Add compile-time check to libfsimage - -Adds the common tools include folder to the -I compile flags -of libfsimage. This allows us to use: - xen-tools/common-macros.h:BUILD_BUG_ON() - -With it, statically assert a sanitized "blocklog - SECTOR_BITS" cannot -underflow. - -This is part of XSA-443 / CVE-2023-34325 - -Signed-off-by: Alejandro Vallejo -Reviewed-by: Jan Beulich ---- - tools/libfsimage/common.mk | 2 +- - tools/libfsimage/xfs/fsys_xfs.c | 4 +++- - 2 files changed, 4 insertions(+), 2 deletions(-) - -diff --git a/tools/libfsimage/common.mk b/tools/libfsimage/common.mk -index 4fc8c6679599..e4336837d045 100644 ---- a/tools/libfsimage/common.mk -+++ b/tools/libfsimage/common.mk -@@ -1,7 +1,7 @@ - include $(XEN_ROOT)/tools/Rules.mk - - FSDIR := $(libdir)/xenfsimage --CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ -DFSIMAGE_FSDIR=\"$(FSDIR)\" -+CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ $(CFLAGS_xeninclude) -DFSIMAGE_FSDIR=\"$(FSDIR)\" - CFLAGS += -D_GNU_SOURCE - LDFLAGS += -L../common/ - -diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c -index e4eb7e1ee26f..4a8dd6f2397b 100644 ---- a/tools/libfsimage/xfs/fsys_xfs.c -+++ b/tools/libfsimage/xfs/fsys_xfs.c -@@ -19,6 +19,7 @@ - - #include - #include -+#include - #include "xfs.h" - - #define MAX_LINK_COUNT 8 -@@ -477,9 +478,10 @@ xfs_mount (fsi_file_t *ffi, const char *options) - xfs.agblklog = super.sb_agblklog; - - /* Derived from sanitized parameters */ -+ BUILD_BUG_ON(XFS_SB_BLOCKLOG_MIN < SECTOR_BITS); -+ xfs.bdlog = super.sb_blocklog - SECTOR_BITS; - xfs.bsize = 1 << super.sb_blocklog; - xfs.blklog = super.sb_blocklog; -- xfs.bdlog = super.sb_blocklog - SECTOR_BITS; - xfs.isize = 1 << super.sb_inodelog; - xfs.dirbsize = 1 << (super.sb_blocklog + super.sb_dirblklog); - xfs.inopblog = super.sb_blocklog - super.sb_inodelog; --- -2.42.0 - diff --git a/xsa443-4.17-05.patch b/xsa443-4.17-05.patch deleted file mode 100644 index b2f5daa..0000000 --- a/xsa443-4.17-05.patch +++ /dev/null @@ -1,59 +0,0 @@ -From ad5d0db5e68e5d4e79255fa85d9cb0069bb1c5d5 Mon Sep 17 00:00:00 2001 -From: Alejandro Vallejo -Date: Mon, 25 Sep 2023 18:32:21 +0100 -Subject: [PATCH 05/11] tools/pygrub: Remove unnecessary hypercall - -There's a hypercall being issued in order to determine whether PV64 is -supported, but since Xen 4.3 that's strictly true so it's not required. - -Plus, this way we can avoid mapping the privcmd interface altogether in the -depriv pygrub. - -This is part of XSA-443 / CVE-2023-34325 - -Signed-off-by: Alejandro Vallejo -Reviewed-by: Andrew Cooper ---- - tools/pygrub/src/pygrub | 12 +----------- - 1 file changed, 1 insertion(+), 11 deletions(-) - -diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub -index ce7ab0eb8cf3..ce4e07d3e823 100755 ---- a/tools/pygrub/src/pygrub -+++ b/tools/pygrub/src/pygrub -@@ -18,7 +18,6 @@ import os, sys, string, struct, tempfile, re, traceback, stat, errno - import copy - import logging - import platform --import xen.lowlevel.xc - - import curses, _curses, curses.textpad, curses.ascii - import getopt -@@ -668,14 +667,6 @@ def run_grub(file, entry, fs, cfg_args): - - return grubcfg - --def supports64bitPVguest(): -- xc = xen.lowlevel.xc.xc() -- caps = xc.xeninfo()['xen_caps'].split(" ") -- for cap in caps: -- if cap == "xen-3.0-x86_64": -- return True -- return False -- - # If nothing has been specified, look for a Solaris domU. If found, perform the - # necessary tweaks. - def sniff_solaris(fs, cfg): -@@ -684,8 +675,7 @@ def sniff_solaris(fs, cfg): - return cfg - - if not cfg["kernel"]: -- if supports64bitPVguest() and \ -- fs.file_exists("/platform/i86xpv/kernel/amd64/unix"): -+ if fs.file_exists("/platform/i86xpv/kernel/amd64/unix"): - cfg["kernel"] = "/platform/i86xpv/kernel/amd64/unix" - cfg["ramdisk"] = "/platform/i86pc/amd64/boot_archive" - elif fs.file_exists("/platform/i86xpv/kernel/unix"): --- -2.42.0 - diff --git a/xsa443-4.17-06.patch b/xsa443-4.17-06.patch deleted file mode 100644 index 22af109..0000000 --- a/xsa443-4.17-06.patch +++ /dev/null @@ -1,65 +0,0 @@ -From d3ceb0b314005a656dd2ca4b2821575a36f8426d Mon Sep 17 00:00:00 2001 -From: Alejandro Vallejo -Date: Mon, 25 Sep 2023 18:32:22 +0100 -Subject: [PATCH 06/11] tools/pygrub: Small refactors - -Small tidy up to ensure output_directory always has a trailing '/' to ease -concatenating paths and that `output` can only be a filename or None. - -This is part of XSA-443 / CVE-2023-34325 - -Signed-off-by: Alejandro Vallejo -Acked-by: Andrew Cooper ---- - tools/pygrub/src/pygrub | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub -index ce4e07d3e823..1042c05b8676 100755 ---- a/tools/pygrub/src/pygrub -+++ b/tools/pygrub/src/pygrub -@@ -793,7 +793,7 @@ if __name__ == "__main__": - debug = False - not_really = False - output_format = "sxp" -- output_directory = "/var/run/xen/pygrub" -+ output_directory = "/var/run/xen/pygrub/" - - # what was passed in - incfg = { "kernel": None, "ramdisk": None, "args": "" } -@@ -815,7 +815,8 @@ if __name__ == "__main__": - usage() - sys.exit() - elif o in ("--output",): -- output = a -+ if a != "-": -+ output = a - elif o in ("--kernel",): - incfg["kernel"] = a - elif o in ("--ramdisk",): -@@ -847,12 +848,11 @@ if __name__ == "__main__": - if not os.path.isdir(a): - print("%s is not an existing directory" % a) - sys.exit(1) -- output_directory = a -+ output_directory = a + '/' - - if debug: - logging.basicConfig(level=logging.DEBUG) - -- - try: - os.makedirs(output_directory, 0o700) - except OSError as e: -@@ -861,7 +861,7 @@ if __name__ == "__main__": - else: - raise - -- if output is None or output == "-": -+ if output is None: - fd = sys.stdout.fileno() - else: - fd = os.open(output, os.O_WRONLY) --- -2.42.0 - diff --git a/xsa443-4.17-07.patch b/xsa443-4.17-07.patch deleted file mode 100644 index 94da883..0000000 --- a/xsa443-4.17-07.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 9e80cfecde338cea0db136c2fb5ed78d6081e05f Mon Sep 17 00:00:00 2001 -From: Alejandro Vallejo -Date: Mon, 25 Sep 2023 18:32:23 +0100 -Subject: [PATCH 07/11] tools/pygrub: Open the output files earlier - -This patch allows pygrub to get ahold of every RW file descriptor it needs -early on. A later patch will clamp the filesystem it can access so it can't -obtain any others. - -This is part of XSA-443 / CVE-2023-34325 - -Signed-off-by: Alejandro Vallejo -Acked-by: Andrew Cooper ---- - tools/pygrub/src/pygrub | 37 ++++++++++++++++++++++--------------- - 1 file changed, 22 insertions(+), 15 deletions(-) - -diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub -index 1042c05b8676..91e2ec2ab105 100755 ---- a/tools/pygrub/src/pygrub -+++ b/tools/pygrub/src/pygrub -@@ -738,8 +738,7 @@ if __name__ == "__main__": - def usage(): - print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--offset=] " %(sys.argv[0],), file=sys.stderr) - -- def copy_from_image(fs, file_to_read, file_type, output_directory, -- not_really): -+ def copy_from_image(fs, file_to_read, file_type, fd_dst, path_dst, not_really): - if not_really: - if fs.file_exists(file_to_read): - return "<%s:%s>" % (file_type, file_to_read) -@@ -750,21 +749,18 @@ if __name__ == "__main__": - except Exception as e: - print(e, file=sys.stderr) - sys.exit("Error opening %s in guest" % file_to_read) -- (tfd, ret) = tempfile.mkstemp(prefix="boot_"+file_type+".", -- dir=output_directory) - dataoff = 0 - while True: - data = datafile.read(FS_READ_MAX, dataoff) - if len(data) == 0: -- os.close(tfd) -+ os.close(fd_dst) - del datafile -- return ret -+ return - try: -- os.write(tfd, data) -+ os.write(fd_dst, data) - except Exception as e: - print(e, file=sys.stderr) -- os.close(tfd) -- os.unlink(ret) -+ os.unlink(path_dst) - del datafile - sys.exit("Error writing temporary copy of "+file_type) - dataoff += len(data) -@@ -861,6 +857,14 @@ if __name__ == "__main__": - else: - raise - -+ if not_really: -+ fd_kernel = path_kernel = fd_ramdisk = path_ramdisk = None -+ else: -+ (fd_kernel, path_kernel) = tempfile.mkstemp(prefix="boot_kernel.", -+ dir=output_directory) -+ (fd_ramdisk, path_ramdisk) = tempfile.mkstemp(prefix="boot_ramdisk.", -+ dir=output_directory) -+ - if output is None: - fd = sys.stdout.fileno() - else: -@@ -920,20 +924,23 @@ if __name__ == "__main__": - if fs is None: - raise RuntimeError("Unable to find partition containing kernel") - -- bootcfg["kernel"] = copy_from_image(fs, chosencfg["kernel"], "kernel", -- output_directory, not_really) -+ copy_from_image(fs, chosencfg["kernel"], "kernel", -+ fd_kernel, path_kernel, not_really) -+ bootcfg["kernel"] = path_kernel - - if chosencfg["ramdisk"]: - try: -- bootcfg["ramdisk"] = copy_from_image(fs, chosencfg["ramdisk"], -- "ramdisk", output_directory, -- not_really) -+ copy_from_image(fs, chosencfg["ramdisk"], "ramdisk", -+ fd_ramdisk, path_ramdisk, not_really) - except: - if not not_really: -- os.unlink(bootcfg["kernel"]) -+ os.unlink(path_kernel) - raise -+ bootcfg["ramdisk"] = path_ramdisk - else: - initrd = None -+ if not not_really: -+ os.unlink(path_ramdisk) - - args = None - if chosencfg["args"]: --- -2.42.0 - diff --git a/xsa443-4.17-08.patch b/xsa443-4.17-08.patch deleted file mode 100644 index bd7de1d..0000000 --- a/xsa443-4.17-08.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 2fb4cdcedd8720f78c4bd44739a5d30dd1a7d9a5 Mon Sep 17 00:00:00 2001 -From: Alejandro Vallejo -Date: Mon, 25 Sep 2023 18:32:24 +0100 -Subject: [PATCH 08/11] tools/libfsimage: Export a new function to preload all - plugins - -This is work required in order to let pygrub operate in highly deprivileged -chroot mode. This patch adds a function that preloads every plugin, hence -ensuring that a on function exit, every shared library is loaded in memory. - -The new "init" function is supposed to be used before depriv, but that's -fine because it's not acting on untrusted data. - -This is part of XSA-443 / CVE-2023-34325 - -Signed-off-by: Alejandro Vallejo -Acked-by: Andrew Cooper ---- - tools/libfsimage/common/fsimage_plugin.c | 4 ++-- - tools/libfsimage/common/mapfile-GNU | 1 + - tools/libfsimage/common/mapfile-SunOS | 1 + - tools/libfsimage/common/xenfsimage.h | 8 ++++++++ - tools/pygrub/src/fsimage/fsimage.c | 15 +++++++++++++++ - 5 files changed, 27 insertions(+), 2 deletions(-) - -diff --git a/tools/libfsimage/common/fsimage_plugin.c b/tools/libfsimage/common/fsimage_plugin.c -index de1412b4233a..d0cb9e96a654 100644 ---- a/tools/libfsimage/common/fsimage_plugin.c -+++ b/tools/libfsimage/common/fsimage_plugin.c -@@ -119,7 +119,7 @@ fail: - return (-1); - } - --static int load_plugins(void) -+int fsi_init(void) - { - const char *fsdir = getenv("XEN_FSIMAGE_FSDIR"); - struct dirent *dp = NULL; -@@ -180,7 +180,7 @@ int find_plugin(fsi_t *fsi, const char *path, const char *options) - fsi_plugin_t *fp; - int ret = 0; - -- if (plugins == NULL && (ret = load_plugins()) != 0) -+ if (plugins == NULL && (ret = fsi_init()) != 0) - goto out; - - for (fp = plugins; fp != NULL; fp = fp->fp_next) { -diff --git a/tools/libfsimage/common/mapfile-GNU b/tools/libfsimage/common/mapfile-GNU -index 26d4d7a69ec7..2d54d527d7f5 100644 ---- a/tools/libfsimage/common/mapfile-GNU -+++ b/tools/libfsimage/common/mapfile-GNU -@@ -1,6 +1,7 @@ - VERSION { - libfsimage.so.1.0 { - global: -+ fsi_init; - fsi_open_fsimage; - fsi_close_fsimage; - fsi_file_exists; -diff --git a/tools/libfsimage/common/mapfile-SunOS b/tools/libfsimage/common/mapfile-SunOS -index e99b90b65077..48deedb4252f 100644 ---- a/tools/libfsimage/common/mapfile-SunOS -+++ b/tools/libfsimage/common/mapfile-SunOS -@@ -1,5 +1,6 @@ - libfsimage.so.1.0 { - global: -+ fsi_init; - fsi_open_fsimage; - fsi_close_fsimage; - fsi_file_exists; -diff --git a/tools/libfsimage/common/xenfsimage.h b/tools/libfsimage/common/xenfsimage.h -index 201abd54f23a..341883b2d71a 100644 ---- a/tools/libfsimage/common/xenfsimage.h -+++ b/tools/libfsimage/common/xenfsimage.h -@@ -35,6 +35,14 @@ extern C { - typedef struct fsi fsi_t; - typedef struct fsi_file fsi_file_t; - -+/* -+ * Optional initialization function. If invoked it loads the associated -+ * dynamic libraries for the backends ahead of time. This is required if -+ * the library is to run as part of a highly deprivileged executable, as -+ * the libraries may not be reachable after depriv. -+ */ -+int fsi_init(void); -+ - fsi_t *fsi_open_fsimage(const char *, uint64_t, const char *); - void fsi_close_fsimage(fsi_t *); - -diff --git a/tools/pygrub/src/fsimage/fsimage.c b/tools/pygrub/src/fsimage/fsimage.c -index 2ebbbe35df92..92fbf2851f01 100644 ---- a/tools/pygrub/src/fsimage/fsimage.c -+++ b/tools/pygrub/src/fsimage/fsimage.c -@@ -286,6 +286,15 @@ fsimage_getbootstring(PyObject *o, PyObject *args) - return Py_BuildValue("s", bootstring); - } - -+static PyObject * -+fsimage_init(PyObject *o, PyObject *args) -+{ -+ if (!PyArg_ParseTuple(args, "")) -+ return (NULL); -+ -+ return Py_BuildValue("i", fsi_init()); -+} -+ - PyDoc_STRVAR(fsimage_open__doc__, - "open(name, [offset=off]) - Open the given file as a filesystem image.\n" - "\n" -@@ -297,7 +306,13 @@ PyDoc_STRVAR(fsimage_getbootstring__doc__, - "getbootstring(fs) - Return the boot string needed for this file system " - "or NULL if none is needed.\n"); - -+PyDoc_STRVAR(fsimage_init__doc__, -+ "init() - Loads every dynamic library contained in xenfsimage " -+ "into memory so that it can be used in chrooted environments.\n"); -+ - static struct PyMethodDef fsimage_module_methods[] = { -+ { "init", (PyCFunction)fsimage_init, -+ METH_VARARGS, fsimage_init__doc__ }, - { "open", (PyCFunction)fsimage_open, - METH_VARARGS|METH_KEYWORDS, fsimage_open__doc__ }, - { "getbootstring", (PyCFunction)fsimage_getbootstring, --- -2.42.0 - diff --git a/xsa443-4.17-09.patch b/xsa443-4.17-09.patch deleted file mode 100644 index 2e3ebd8..0000000 --- a/xsa443-4.17-09.patch +++ /dev/null @@ -1,307 +0,0 @@ -From 150771ce86a07e469e34941a63c56e2cf242223b Mon Sep 17 00:00:00 2001 -From: Alejandro Vallejo -Date: Mon, 25 Sep 2023 18:32:25 +0100 -Subject: [PATCH 09/11] tools/pygrub: Deprivilege pygrub - -Introduce a --runas= flag to deprivilege pygrub on Linux and *BSDs. It -also implicitly creates a chroot env where it drops a deprivileged forked -process. The chroot itself is cleaned up at the end. - -If the --runas arg is present, then pygrub forks, leaving the child to -deprivilege itself, and waiting for it to complete. When the child exists, -the parent performs cleanup and exits with the same error code. - -This is roughly what the child does: - 1. Initialize libfsimage (this loads every .so in memory so the chroot - can avoid bind-mounting /{,usr}/lib* - 2. Create a temporary empty chroot directory - 3. Mount tmpfs in it - 4. Bind mount the disk inside, because libfsimage expects a path, not a - file descriptor. - 5. Remount the root tmpfs to be stricter (ro,nosuid,nodev) - 6. Set RLIMIT_FSIZE to a sensibly high amount (128 MiB) - 7. Depriv gid, groups and uid - -With this scheme in place, the "output" files are writable (up to -RLIMIT_FSIZE octets) and the exposed filesystem is immutable and contains -the single only file we can't easily get rid of (the disk). - -If running on Linux, the child process also unshares mount, IPC, and -network namespaces before dropping its privileges. - -This is part of XSA-443 / CVE-2023-34325 - -Signed-off-by: Alejandro Vallejo -Acked-by: Andrew Cooper ---- - tools/pygrub/setup.py | 2 +- - tools/pygrub/src/pygrub | 162 +++++++++++++++++++++++++++++++++++++--- - 2 files changed, 154 insertions(+), 10 deletions(-) - -diff --git a/tools/pygrub/setup.py b/tools/pygrub/setup.py -index 0e4e3d02d372..06b96733d020 100644 ---- a/tools/pygrub/setup.py -+++ b/tools/pygrub/setup.py -@@ -17,7 +17,7 @@ xenfsimage = Extension("xenfsimage", - pkgs = [ 'grub' ] - - setup(name='pygrub', -- version='0.6', -+ version='0.7', - description='Boot loader that looks a lot like grub for Xen', - author='Jeremy Katz', - author_email='katzj@redhat.com', -diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub -index 91e2ec2ab105..7cea496ade08 100755 ---- a/tools/pygrub/src/pygrub -+++ b/tools/pygrub/src/pygrub -@@ -16,8 +16,11 @@ from __future__ import print_function - - import os, sys, string, struct, tempfile, re, traceback, stat, errno - import copy -+import ctypes, ctypes.util - import logging - import platform -+import resource -+import subprocess - - import curses, _curses, curses.textpad, curses.ascii - import getopt -@@ -27,10 +30,135 @@ import grub.GrubConf - import grub.LiloConf - import grub.ExtLinuxConf - --PYGRUB_VER = 0.6 -+PYGRUB_VER = 0.7 - FS_READ_MAX = 1024 * 1024 - SECTOR_SIZE = 512 - -+# Unless provided through the env variable PYGRUB_MAX_FILE_SIZE_MB, then -+# this is the maximum filesize allowed for files written by the depriv -+# pygrub -+LIMIT_FSIZE = 128 << 20 -+ -+CLONE_NEWNS = 0x00020000 # mount namespace -+CLONE_NEWNET = 0x40000000 # network namespace -+CLONE_NEWIPC = 0x08000000 # IPC namespace -+ -+def unshare(flags): -+ if not sys.platform.startswith("linux"): -+ print("skip_unshare reason=not_linux platform=%s", sys.platform, file=sys.stderr) -+ return -+ -+ libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True) -+ unshare_prototype = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, use_errno=True) -+ unshare = unshare_prototype(('unshare', libc)) -+ -+ if unshare(flags) < 0: -+ raise OSError(ctypes.get_errno(), os.strerror(ctypes.get_errno())) -+ -+def bind_mount(src, dst, options): -+ open(dst, "a").close() # touch -+ -+ rc = subprocess.call(["mount", "--bind", "-o", options, src, dst]) -+ if rc != 0: -+ raise RuntimeError("bad_mount: src=%s dst=%s opts=%s" % -+ (src, dst, options)) -+ -+def downgrade_rlimits(): -+ # Wipe the authority to use unrequired resources -+ resource.setrlimit(resource.RLIMIT_NPROC, (0, 0)) -+ resource.setrlimit(resource.RLIMIT_CORE, (0, 0)) -+ resource.setrlimit(resource.RLIMIT_MEMLOCK, (0, 0)) -+ -+ # py2's resource module doesn't know about resource.RLIMIT_MSGQUEUE -+ # -+ # TODO: Use resource.RLIMIT_MSGQUEUE after python2 is deprecated -+ if sys.platform.startswith('linux'): -+ RLIMIT_MSGQUEUE = 12 -+ resource.setrlimit(RLIMIT_MSGQUEUE, (0, 0)) -+ -+ # The final look of the filesystem for this process is fully RO, but -+ # note we have some file descriptor already open (notably, kernel and -+ # ramdisk). In order to avoid a compromised pygrub from filling up the -+ # filesystem we set RLIMIT_FSIZE to a high bound, so that the file -+ # write permissions are bound. -+ fsize = LIMIT_FSIZE -+ if "PYGRUB_MAX_FILE_SIZE_MB" in os.environ.keys(): -+ fsize = os.environ["PYGRUB_MAX_FILE_SIZE_MB"] << 20 -+ -+ resource.setrlimit(resource.RLIMIT_FSIZE, (fsize, fsize)) -+ -+def depriv(output_directory, output, device, uid, path_kernel, path_ramdisk): -+ # The only point of this call is to force the loading of libfsimage. -+ # That way, we don't need to bind-mount it into the chroot -+ rc = xenfsimage.init() -+ if rc != 0: -+ os.unlink(path_ramdisk) -+ os.unlink(path_kernel) -+ raise RuntimeError("bad_xenfsimage: rc=%d" % rc) -+ -+ # Create a temporary directory for the chroot -+ chroot = tempfile.mkdtemp(prefix=str(uid)+'-', dir=output_directory) + '/' -+ device_path = '/device' -+ -+ pid = os.fork() -+ if pid: -+ # parent -+ _, rc = os.waitpid(pid, 0) -+ -+ for path in [path_kernel, path_ramdisk]: -+ # If the child didn't write anything, just get rid of it, -+ # otherwise we end up consuming a 0-size file when parsing -+ # systems without a ramdisk that the ultimate caller of pygrub -+ # may just be unaware of -+ if rc != 0 or os.path.getsize(path) == 0: -+ os.unlink(path) -+ -+ # Normally, unshare(CLONE_NEWNS) will ensure this is not required. -+ # However, this syscall doesn't exist in *BSD systems and doesn't -+ # auto-unmount everything on older Linux kernels (At least as of -+ # Linux 4.19, but it seems fixed in 5.15). Either way, -+ # recursively unmount everything if needed. Quietly. -+ with open('/dev/null', 'w') as devnull: -+ subprocess.call(["umount", "-f", chroot + device_path], -+ stdout=devnull, stderr=devnull) -+ subprocess.call(["umount", "-f", chroot], -+ stdout=devnull, stderr=devnull) -+ os.rmdir(chroot) -+ -+ sys.exit(rc) -+ -+ # By unsharing the namespace we're making sure it's all bulk-released -+ # at the end, when the namespaces disappear. This means the kernel does -+ # (almost) all the cleanup for us and the parent just has to remove the -+ # temporary directory. -+ unshare(CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWNET) -+ -+ # Set sensible limits using the setrlimit interface -+ downgrade_rlimits() -+ -+ # We'll mount tmpfs on the chroot to ensure the deprivileged child -+ # cannot affect the persistent state. It's RW now in order to -+ # bind-mount the device, but note it's remounted RO after that. -+ rc = subprocess.call(["mount", "-t", "tmpfs", "none", chroot]) -+ if rc != 0: -+ raise RuntimeError("mount_tmpfs rc=%d dst=\"%s\"" % (rc, chroot)) -+ -+ # Bind the untrusted device RO -+ bind_mount(device, chroot + device_path, "ro,nosuid,noexec") -+ -+ rc = subprocess.call(["mount", "-t", "tmpfs", "-o", "remount,ro,nosuid,noexec,nodev", "none", chroot]) -+ if rc != 0: -+ raise RuntimeError("remount_tmpfs rc=%d dst=\"%s\"" % (rc, chroot)) -+ -+ # Drop superpowers! -+ os.chroot(chroot) -+ os.chdir('/') -+ os.setgid(uid) -+ os.setgroups([uid]) -+ os.setuid(uid) -+ -+ return device_path -+ - def read_size_roundup(fd, size): - if platform.system() != 'FreeBSD': - return size -@@ -736,7 +864,7 @@ if __name__ == "__main__": - sel = None - - def usage(): -- print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--offset=] " %(sys.argv[0],), file=sys.stderr) -+ print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--runas=] [--offset=] " %(sys.argv[0],), file=sys.stderr) - - def copy_from_image(fs, file_to_read, file_type, fd_dst, path_dst, not_really): - if not_really: -@@ -760,7 +888,8 @@ if __name__ == "__main__": - os.write(fd_dst, data) - except Exception as e: - print(e, file=sys.stderr) -- os.unlink(path_dst) -+ if path_dst: -+ os.unlink(path_dst) - del datafile - sys.exit("Error writing temporary copy of "+file_type) - dataoff += len(data) -@@ -769,7 +898,7 @@ if __name__ == "__main__": - opts, args = getopt.gnu_getopt(sys.argv[1:], 'qilnh::', - ["quiet", "interactive", "list-entries", "not-really", "help", - "output=", "output-format=", "output-directory=", "offset=", -- "entry=", "kernel=", -+ "runas=", "entry=", "kernel=", - "ramdisk=", "args=", "isconfig", "debug"]) - except getopt.GetoptError: - usage() -@@ -790,6 +919,7 @@ if __name__ == "__main__": - not_really = False - output_format = "sxp" - output_directory = "/var/run/xen/pygrub/" -+ uid = None - - # what was passed in - incfg = { "kernel": None, "ramdisk": None, "args": "" } -@@ -813,6 +943,13 @@ if __name__ == "__main__": - elif o in ("--output",): - if a != "-": - output = a -+ elif o in ("--runas",): -+ try: -+ uid = int(a) -+ except ValueError: -+ print("runas value must be an integer user id") -+ usage() -+ sys.exit(1) - elif o in ("--kernel",): - incfg["kernel"] = a - elif o in ("--ramdisk",): -@@ -849,6 +986,10 @@ if __name__ == "__main__": - if debug: - logging.basicConfig(level=logging.DEBUG) - -+ if interactive and uid: -+ print("In order to use --runas, you must also set --entry or -q", file=sys.stderr) -+ sys.exit(1) -+ - try: - os.makedirs(output_directory, 0o700) - except OSError as e: -@@ -870,6 +1011,9 @@ if __name__ == "__main__": - else: - fd = os.open(output, os.O_WRONLY) - -+ if uid: -+ file = depriv(output_directory, output, file, uid, path_kernel, path_ramdisk) -+ - # debug - if isconfig: - chosencfg = run_grub(file, entry, fs, incfg["args"]) -@@ -925,21 +1069,21 @@ if __name__ == "__main__": - raise RuntimeError("Unable to find partition containing kernel") - - copy_from_image(fs, chosencfg["kernel"], "kernel", -- fd_kernel, path_kernel, not_really) -+ fd_kernel, None if uid else path_kernel, not_really) - bootcfg["kernel"] = path_kernel - - if chosencfg["ramdisk"]: - try: - copy_from_image(fs, chosencfg["ramdisk"], "ramdisk", -- fd_ramdisk, path_ramdisk, not_really) -+ fd_ramdisk, None if uid else path_ramdisk, not_really) - except: -- if not not_really: -- os.unlink(path_kernel) -+ if not uid and not not_really: -+ os.unlink(path_kernel) - raise - bootcfg["ramdisk"] = path_ramdisk - else: - initrd = None -- if not not_really: -+ if not uid and not not_really: - os.unlink(path_ramdisk) - - args = None --- -2.42.0 - diff --git a/xsa443-4.17-10.patch b/xsa443-4.17-10.patch deleted file mode 100644 index 7c91f32..0000000 --- a/xsa443-4.17-10.patch +++ /dev/null @@ -1,250 +0,0 @@ -From 698b451473a6d868ca0f60a124fc4f31d81cd7b1 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Mon, 25 Sep 2023 14:30:20 +0200 -Subject: [PATCH 10/11] libxl: add support for running bootloader in restricted - mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Much like the device model depriv mode, add the same kind of support for the -bootloader. Such feature allows passing a UID as a parameter for the -bootloader to run as, together with the bootloader itself taking the necessary -actions to isolate. - -Note that the user to run the bootloader as must have the right permissions to -access the guest disk image (in read mode only), and that the bootloader will -be run in non-interactive mode when restricted. - -If enabled bootloader restrict mode will attempt to re-use the user(s) from the -QEMU depriv implementation if no user is provided on the configuration file or -the environment. See docs/features/qemu-deprivilege.pandoc for more -information about how to setup those users. - -Bootloader restrict mode is not enabled by default as it requires certain -setup to be done first (setup of the user(s) to use in restrict mode). - -This is part of XSA-443 / CVE-2023-34325 - -Signed-off-by: Roger Pau Monné -Reviewed-by: Anthony PERARD ---- - docs/man/xl.1.pod.in | 33 +++++++++++ - tools/libs/light/libxl_bootloader.c | 89 ++++++++++++++++++++++++++++- - tools/libs/light/libxl_dm.c | 8 +-- - tools/libs/light/libxl_internal.h | 8 +++ - 4 files changed, 131 insertions(+), 7 deletions(-) - -diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in -index 101e14241d1c..4831e122427d 100644 ---- a/docs/man/xl.1.pod.in -+++ b/docs/man/xl.1.pod.in -@@ -1957,6 +1957,39 @@ ignored: - - =back - -+=head1 ENVIRONMENT VARIABLES -+ -+The following environment variables shall affect the execution of xl: -+ -+=over 4 -+ -+=item LIBXL_BOOTLOADER_RESTRICT -+ -+Attempt to restrict the bootloader after startup, to limit the -+consequences of security vulnerabilities due to parsing guest -+owned image files. -+ -+See docs/features/qemu-deprivilege.pandoc for more information -+on how to setup the unprivileged users. -+ -+Note that running the bootloader in restricted mode also implies using -+non-interactive mode, and the disk image must be readable by the -+restricted user. -+ -+Having this variable set is equivalent to enabling the option, even if the -+value is 0. -+ -+=item LIBXL_BOOTLOADER_USER -+ -+When using bootloader_restrict, run the bootloader as this user. If -+not set the default QEMU restrict users will be used. -+ -+NOTE: Each domain MUST have a SEPARATE username. -+ -+See docs/features/qemu-deprivilege.pandoc for more information. -+ -+=back -+ - =head1 SEE ALSO - - The following man pages: -diff --git a/tools/libs/light/libxl_bootloader.c b/tools/libs/light/libxl_bootloader.c -index 108329b4a5bb..23c0ef3e8935 100644 ---- a/tools/libs/light/libxl_bootloader.c -+++ b/tools/libs/light/libxl_bootloader.c -@@ -14,6 +14,7 @@ - - #include "libxl_osdeps.h" /* must come before any other headers */ - -+#include - #include - #ifdef HAVE_UTMP_H - #include -@@ -42,8 +43,71 @@ static void bootloader_arg(libxl__bootloader_state *bl, const char *arg) - bl->args[bl->nargs++] = arg; - } - --static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl, -- const char *bootloader_path) -+static int bootloader_uid(libxl__gc *gc, domid_t guest_domid, -+ const char *user, uid_t *intended_uid) -+{ -+ struct passwd *user_base, user_pwbuf; -+ int rc; -+ -+ if (user) { -+ rc = userlookup_helper_getpwnam(gc, user, &user_pwbuf, &user_base); -+ if (rc) return rc; -+ -+ if (!user_base) { -+ LOGD(ERROR, guest_domid, "Couldn't find user %s", user); -+ return ERROR_INVAL; -+ } -+ -+ *intended_uid = user_base->pw_uid; -+ return 0; -+ } -+ -+ /* Re-use QEMU user range for the bootloader. */ -+ rc = userlookup_helper_getpwnam(gc, LIBXL_QEMU_USER_RANGE_BASE, -+ &user_pwbuf, &user_base); -+ if (rc) return rc; -+ -+ if (user_base) { -+ struct passwd *user_clash, user_clash_pwbuf; -+ uid_t temp_uid = user_base->pw_uid + guest_domid; -+ -+ rc = userlookup_helper_getpwuid(gc, temp_uid, &user_clash_pwbuf, -+ &user_clash); -+ if (rc) return rc; -+ -+ if (user_clash) { -+ LOGD(ERROR, guest_domid, -+ "wanted to use uid %ld (%s + %d) but that is user %s !", -+ (long)temp_uid, LIBXL_QEMU_USER_RANGE_BASE, -+ guest_domid, user_clash->pw_name); -+ return ERROR_INVAL; -+ } -+ -+ *intended_uid = temp_uid; -+ return 0; -+ } -+ -+ rc = userlookup_helper_getpwnam(gc, LIBXL_QEMU_USER_SHARED, &user_pwbuf, -+ &user_base); -+ if (rc) return rc; -+ -+ if (user_base) { -+ LOGD(WARN, guest_domid, "Could not find user %s, falling back to %s", -+ LIBXL_QEMU_USER_RANGE_BASE, LIBXL_QEMU_USER_SHARED); -+ *intended_uid = user_base->pw_uid; -+ -+ return 0; -+ } -+ -+ LOGD(ERROR, guest_domid, -+ "Could not find user %s or range base pseudo-user %s, cannot restrict", -+ LIBXL_QEMU_USER_SHARED, LIBXL_QEMU_USER_RANGE_BASE); -+ -+ return ERROR_INVAL; -+} -+ -+static int make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl, -+ const char *bootloader_path) - { - const libxl_domain_build_info *info = bl->info; - -@@ -61,6 +125,23 @@ static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl, - ARG(GCSPRINTF("--ramdisk=%s", info->ramdisk)); - if (info->cmdline && *info->cmdline != '\0') - ARG(GCSPRINTF("--args=%s", info->cmdline)); -+ if (getenv("LIBXL_BOOTLOADER_RESTRICT") || -+ getenv("LIBXL_BOOTLOADER_USER")) { -+ uid_t uid = -1; -+ int rc = bootloader_uid(gc, bl->domid, getenv("LIBXL_BOOTLOADER_USER"), -+ &uid); -+ -+ if (rc) return rc; -+ -+ assert(uid != -1); -+ if (!uid) { -+ LOGD(ERROR, bl->domid, "bootloader restrict UID is 0 (root)!"); -+ return ERROR_INVAL; -+ } -+ LOGD(DEBUG, bl->domid, "using uid %ld", (long)uid); -+ ARG(GCSPRINTF("--runas=%ld", (long)uid)); -+ ARG("--quiet"); -+ } - - ARG(GCSPRINTF("--output=%s", bl->outputpath)); - ARG("--output-format=simple0"); -@@ -79,6 +160,7 @@ static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl, - /* Sentinel for execv */ - ARG(NULL); - -+ return 0; - #undef ARG - } - -@@ -443,7 +525,8 @@ static void bootloader_disk_attached_cb(libxl__egc *egc, - bootloader = bltmp; - } - -- make_bootloader_args(gc, bl, bootloader); -+ rc = make_bootloader_args(gc, bl, bootloader); -+ if (rc) goto out; - - bl->openpty.ao = ao; - bl->openpty.callback = bootloader_gotptys; -diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c -index fc264a3a13a6..14b593110f7c 100644 ---- a/tools/libs/light/libxl_dm.c -+++ b/tools/libs/light/libxl_dm.c -@@ -80,10 +80,10 @@ static int libxl__create_qemu_logfile(libxl__gc *gc, char *name) - * On error, return a libxl-style error code. - */ - #define DEFINE_USERLOOKUP_HELPER(NAME,SPEC_TYPE,STRUCTNAME,SYSCONF) \ -- static int userlookup_helper_##NAME(libxl__gc *gc, \ -- SPEC_TYPE spec, \ -- struct STRUCTNAME *resultbuf, \ -- struct STRUCTNAME **out) \ -+ int userlookup_helper_##NAME(libxl__gc *gc, \ -+ SPEC_TYPE spec, \ -+ struct STRUCTNAME *resultbuf, \ -+ struct STRUCTNAME **out) \ - { \ - struct STRUCTNAME *resultp = NULL; \ - char *buf = NULL; \ -diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h -index 7ad38de30e0b..f1e3a9a15b13 100644 ---- a/tools/libs/light/libxl_internal.h -+++ b/tools/libs/light/libxl_internal.h -@@ -4873,6 +4873,14 @@ struct libxl__cpu_policy { - struct xc_msr *msr; - }; - -+struct passwd; -+_hidden int userlookup_helper_getpwnam(libxl__gc*, const char *user, -+ struct passwd *res, -+ struct passwd **out); -+_hidden int userlookup_helper_getpwuid(libxl__gc*, uid_t uid, -+ struct passwd *res, -+ struct passwd **out); -+ - #endif - - /* --- -2.42.0 - diff --git a/xsa443-4.17-11.patch b/xsa443-4.17-11.patch deleted file mode 100644 index 27e6f78..0000000 --- a/xsa443-4.17-11.patch +++ /dev/null @@ -1,157 +0,0 @@ -From 9d480426bfa2c68843ac8395b512e06fbdbcf53e Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 28 Sep 2023 12:22:35 +0200 -Subject: [PATCH 11/11] libxl: limit bootloader execution in restricted mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Introduce a timeout for bootloader execution when running in restricted mode. - -Allow overwriting the default time out with an environment provided value. - -This is part of XSA-443 / CVE-2023-34325 - -Signed-off-by: Roger Pau Monné -Reviewed-by: Anthony PERARD ---- - docs/man/xl.1.pod.in | 8 ++++++ - tools/libs/light/libxl_bootloader.c | 40 +++++++++++++++++++++++++++++ - tools/libs/light/libxl_internal.h | 2 ++ - 3 files changed, 50 insertions(+) - -diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in -index 4831e122427d..c3eb6570ab8b 100644 ---- a/docs/man/xl.1.pod.in -+++ b/docs/man/xl.1.pod.in -@@ -1988,6 +1988,14 @@ NOTE: Each domain MUST have a SEPARATE username. - - See docs/features/qemu-deprivilege.pandoc for more information. - -+=item LIBXL_BOOTLOADER_TIMEOUT -+ -+Timeout in seconds for bootloader execution when running in restricted mode. -+Otherwise the build time default in LIBXL_BOOTLOADER_TIMEOUT will be used. -+ -+If defined the value must be an unsigned integer between 0 and INT_MAX, -+otherwise behavior is undefined. Setting to 0 disables the timeout. -+ - =back - - =head1 SEE ALSO -diff --git a/tools/libs/light/libxl_bootloader.c b/tools/libs/light/libxl_bootloader.c -index 23c0ef3e8935..ee26d08f3765 100644 ---- a/tools/libs/light/libxl_bootloader.c -+++ b/tools/libs/light/libxl_bootloader.c -@@ -30,6 +30,8 @@ static void bootloader_keystrokes_copyfail(libxl__egc *egc, - libxl__datacopier_state *dc, int rc, int onwrite, int errnoval); - static void bootloader_display_copyfail(libxl__egc *egc, - libxl__datacopier_state *dc, int rc, int onwrite, int errnoval); -+static void bootloader_timeout(libxl__egc *egc, libxl__ev_time *ev, -+ const struct timeval *requested_abs, int rc); - static void bootloader_domaindeath(libxl__egc*, libxl__domaindeathcheck *dc, - int rc); - static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child, -@@ -297,6 +299,7 @@ void libxl__bootloader_init(libxl__bootloader_state *bl) - bl->ptys[0].master = bl->ptys[0].slave = 0; - bl->ptys[1].master = bl->ptys[1].slave = 0; - libxl__ev_child_init(&bl->child); -+ libxl__ev_time_init(&bl->time); - libxl__domaindeathcheck_init(&bl->deathcheck); - bl->keystrokes.ao = bl->ao; libxl__datacopier_init(&bl->keystrokes); - bl->display.ao = bl->ao; libxl__datacopier_init(&bl->display); -@@ -314,6 +317,7 @@ static void bootloader_cleanup(libxl__egc *egc, libxl__bootloader_state *bl) - libxl__domaindeathcheck_stop(gc,&bl->deathcheck); - libxl__datacopier_kill(&bl->keystrokes); - libxl__datacopier_kill(&bl->display); -+ libxl__ev_time_deregister(gc, &bl->time); - for (i=0; i<2; i++) { - libxl__carefd_close(bl->ptys[i].master); - libxl__carefd_close(bl->ptys[i].slave); -@@ -375,6 +379,7 @@ static void bootloader_stop(libxl__egc *egc, - - libxl__datacopier_kill(&bl->keystrokes); - libxl__datacopier_kill(&bl->display); -+ libxl__ev_time_deregister(gc, &bl->time); - if (libxl__ev_child_inuse(&bl->child)) { - r = kill(bl->child.pid, SIGTERM); - if (r) LOGED(WARN, bl->domid, "%sfailed to kill bootloader [%lu]", -@@ -637,6 +642,25 @@ static void bootloader_gotptys(libxl__egc *egc, libxl__openpty_state *op) - - struct termios termattr; - -+ if (getenv("LIBXL_BOOTLOADER_RESTRICT") || -+ getenv("LIBXL_BOOTLOADER_USER")) { -+ const char *timeout_env = getenv("LIBXL_BOOTLOADER_TIMEOUT"); -+ int timeout = timeout_env ? atoi(timeout_env) -+ : LIBXL_BOOTLOADER_TIMEOUT; -+ -+ if (timeout) { -+ /* Set execution timeout */ -+ rc = libxl__ev_time_register_rel(ao, &bl->time, -+ bootloader_timeout, -+ timeout * 1000); -+ if (rc) { -+ LOGED(ERROR, bl->domid, -+ "unable to register timeout for bootloader execution"); -+ goto out; -+ } -+ } -+ } -+ - pid_t pid = libxl__ev_child_fork(gc, &bl->child, bootloader_finished); - if (pid == -1) { - rc = ERROR_FAIL; -@@ -702,6 +726,21 @@ static void bootloader_display_copyfail(libxl__egc *egc, - libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, display); - bootloader_copyfail(egc, "bootloader output", bl, 1, rc,onwrite,errnoval); - } -+static void bootloader_timeout(libxl__egc *egc, libxl__ev_time *ev, -+ const struct timeval *requested_abs, int rc) -+{ -+ libxl__bootloader_state *bl = CONTAINER_OF(ev, *bl, time); -+ STATE_AO_GC(bl->ao); -+ -+ libxl__ev_time_deregister(gc, &bl->time); -+ -+ assert(libxl__ev_child_inuse(&bl->child)); -+ LOGD(ERROR, bl->domid, "killing bootloader because of timeout"); -+ -+ libxl__ev_child_kill_deregister(ao, &bl->child, SIGKILL); -+ -+ bootloader_callback(egc, bl, rc); -+} - - static void bootloader_domaindeath(libxl__egc *egc, - libxl__domaindeathcheck *dc, -@@ -718,6 +757,7 @@ static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child, - STATE_AO_GC(bl->ao); - int rc; - -+ libxl__ev_time_deregister(gc, &bl->time); - libxl__datacopier_kill(&bl->keystrokes); - libxl__datacopier_kill(&bl->display); - -diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h -index f1e3a9a15b13..d05783617ff5 100644 ---- a/tools/libs/light/libxl_internal.h -+++ b/tools/libs/light/libxl_internal.h -@@ -102,6 +102,7 @@ - #define LIBXL_QMP_CMD_TIMEOUT 10 - #define LIBXL_STUBDOM_START_TIMEOUT 30 - #define LIBXL_QEMU_BODGE_TIMEOUT 2 -+#define LIBXL_BOOTLOADER_TIMEOUT 120 - #define LIBXL_XENCONSOLE_LIMIT 1048576 - #define LIBXL_XENCONSOLE_PROTOCOL "vt100" - #define LIBXL_MAXMEM_CONSTANT 1024 -@@ -3744,6 +3745,7 @@ struct libxl__bootloader_state { - libxl__openpty_state openpty; - libxl__openpty_result ptys[2]; /* [0] is for bootloader */ - libxl__ev_child child; -+ libxl__ev_time time; - libxl__domaindeathcheck deathcheck; - int nargs, argsspace; - const char **args; --- -2.42.0 - diff --git a/xsa444-4.17-1.patch b/xsa444-4.17-1.patch deleted file mode 100644 index 5a4b2e5..0000000 --- a/xsa444-4.17-1.patch +++ /dev/null @@ -1,93 +0,0 @@ -From: Andrew Cooper -Subject: x86/svm: Fix asymmetry with AMD DR MASK context switching - -The handling of MSR_DR{0..3}_MASK is asymmetric between PV and HVM guests. - -HVM guests context switch in based on the guest view of DBEXT, whereas PV -guest switch in base on the host capability. Both guest types leave the -context dirty for the next vCPU. - -This leads to the following issue: - - * PV or HVM guest has debugging active (%dr7 + mask) - * Switch-out deactivates %dr7 but leaves other state stale in hardware - * Another HVM guest with masks unavailable has debugging active - * Switch in loads %dr7 but leaves the mask MSRs alone - -Now, the second guest's vCPU is operating in the context of the prior vCPU's -mask MSR, while the environment the vCPU can see says there are no mask MSRs. - -As a stopgap, adjust the HVM path to switch in the masks based on host -capabilities rather than guest visibility (i.e. like the PV path). Adjustment -of the intercepts still needs to be dependent on the guest visibility of -DBEXT. - -This is part of XSA-444 / CVE-2023-34327 - -Fixes: c097f54912d3 ("x86/SVM: support data breakpoint extension registers") -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c -index e8f50e7c5ec7..fd32600ae364 100644 ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -339,6 +339,10 @@ static void svm_save_dr(struct vcpu *v) - v->arch.hvm.flag_dr_dirty = 0; - vmcb_set_dr_intercepts(vmcb, ~0u); - -+ /* -+ * The guest can only have changed the mask MSRs if we previous dropped -+ * intercepts. Re-read them from hardware. -+ */ - if ( v->domain->arch.cpuid->extd.dbext ) - { - svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_RW); -@@ -370,17 +374,25 @@ static void __restore_debug_registers(struct vmcb_struct *vmcb, struct vcpu *v) - - ASSERT(v == current); - -- if ( v->domain->arch.cpuid->extd.dbext ) -+ /* -+ * Both the PV and HVM paths leave stale DR_MASK values in hardware on -+ * context-switch-out. If we're activating %dr7 for the guest, we must -+ * sync the DR_MASKs too, whether or not the guest can see them. -+ */ -+ if ( boot_cpu_has(X86_FEATURE_DBEXT) ) - { -- svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_NONE); -- svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_NONE); -- svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_NONE); -- svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_NONE); -- - wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, v->arch.msrs->dr_mask[0]); - wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, v->arch.msrs->dr_mask[1]); - wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, v->arch.msrs->dr_mask[2]); - wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, v->arch.msrs->dr_mask[3]); -+ -+ if ( v->domain->arch.cpuid->extd.dbext ) -+ { -+ svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_NONE); -+ svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_NONE); -+ svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_NONE); -+ svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_NONE); -+ } - } - - write_debugreg(0, v->arch.dr[0]); -diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c -index e65cc6004148..06c4f3868b7a 100644 ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -2281,6 +2281,11 @@ void activate_debugregs(const struct vcpu *curr) - if ( curr->arch.dr7 & DR7_ACTIVE_MASK ) - write_debugreg(7, curr->arch.dr7); - -+ /* -+ * Both the PV and HVM paths leave stale DR_MASK values in hardware on -+ * context-switch-out. If we're activating %dr7 for the guest, we must -+ * sync the DR_MASKs too, whether or not the guest can see them. -+ */ - if ( boot_cpu_has(X86_FEATURE_DBEXT) ) - { - wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, curr->arch.msrs->dr_mask[0]); diff --git a/xsa444-4.17-2.patch b/xsa444-4.17-2.patch deleted file mode 100644 index 2687bd1..0000000 --- a/xsa444-4.17-2.patch +++ /dev/null @@ -1,72 +0,0 @@ -From: Andrew Cooper -Subject: x86/pv: Correct the auditing of guest breakpoint addresses - -The use of access_ok() is buggy, because it permits access to the compat -translation area. 64bit PV guests don't use the XLAT area, but on AMD -hardware, the DBEXT feature allows a breakpoint to match up to a 4G aligned -region, allowing the breakpoint to reach outside of the XLAT area. - -Prior to c/s cda16c1bb223 ("x86: mirror compat argument translation area for -32-bit PV"), the live GDT was within 4G of the XLAT area. - -All together, this allowed a malicious 64bit PV guest on AMD hardware to place -a breakpoint over the live GDT, and trigger a #DB livelock (CVE-2015-8104). - -Introduce breakpoint_addr_ok() and explain why __addr_ok() happens to be an -appropriate check in this case. - -For Xen 4.14 and later, this is a latent bug because the XLAT area has moved -to be on its own with nothing interesting adjacent. For Xen 4.13 and older on -AMD hardware, this fixes a PV-trigger-able DoS. - -This is part of XSA-444 / CVE-2023-34328. - -Fixes: 65e355490817 ("x86/PV: support data breakpoint extension registers") -Signed-off-by: Andrew Cooper -Reviewed-by: Roger Pau Monné -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/include/asm/debugreg.h b/xen/arch/x86/include/asm/debugreg.h -index c57914efc6e8..cc298265244b 100644 ---- a/xen/arch/x86/include/asm/debugreg.h -+++ b/xen/arch/x86/include/asm/debugreg.h -@@ -77,6 +77,26 @@ - asm volatile ( "mov %%db" #reg ",%0" : "=r" (__val) ); \ - __val; \ - }) -+ -+/* -+ * Architecturally, %dr{0..3} can have any arbitrary value. However, Xen -+ * can't allow the guest to breakpoint the Xen address range, so we limit the -+ * guest to the lower canonical half, or above the Xen range in the higher -+ * canonical half. -+ * -+ * Breakpoint lengths are specified to mask the low order address bits, -+ * meaning all breakpoints are naturally aligned. With %dr7, the widest -+ * breakpoint is 8 bytes. With DBEXT, the widest breakpoint is 4G. Both of -+ * the Xen boundaries have >4G alignment. -+ * -+ * In principle we should account for HYPERVISOR_COMPAT_VIRT_START(d), but -+ * 64bit Xen has never enforced this for compat guests, and there's no problem -+ * (to Xen) if the guest breakpoints it's alias of the M2P. Skipping this -+ * aspect simplifies the logic, and causes us not to reject a migrating guest -+ * which operated fine on prior versions of Xen. -+ */ -+#define breakpoint_addr_ok(a) __addr_ok(a) -+ - long set_debugreg(struct vcpu *, unsigned int reg, unsigned long value); - void activate_debugregs(const struct vcpu *); - -diff --git a/xen/arch/x86/pv/misc-hypercalls.c b/xen/arch/x86/pv/misc-hypercalls.c -index aaaf70eb6330..f8636de907ae 100644 ---- a/xen/arch/x86/pv/misc-hypercalls.c -+++ b/xen/arch/x86/pv/misc-hypercalls.c -@@ -72,7 +72,7 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value) - switch ( reg ) - { - case 0 ... 3: -- if ( !access_ok(value, sizeof(long)) ) -+ if ( !breakpoint_addr_ok(value) ) - return -EPERM; - - v->arch.dr[reg] = value; diff --git a/xsa445-4.17.patch b/xsa445-4.17.patch deleted file mode 100644 index db66d7c..0000000 --- a/xsa445-4.17.patch +++ /dev/null @@ -1,63 +0,0 @@ -From a43127d4f1f9a364334fe16b6239c211b35fd238 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Wed, 11 Oct 2023 13:14:21 +0200 -Subject: [PATCH] iommu/amd-vi: use correct level for quarantine domain page - tables -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current setup of the quarantine page tables assumes that the quarantine -domain (dom_io) has been initialized with an address width of -DEFAULT_DOMAIN_ADDRESS_WIDTH (48). - -However dom_io being a PV domain gets the AMD-Vi IOMMU page tables levels based -on the maximum (hot pluggable) RAM address, and hence on systems with no RAM -above the 512GB mark only 3 page-table levels are configured in the IOMMU. - -On systems without RAM above the 512GB boundary amd_iommu_quarantine_init() -will setup page tables for the scratch page with 4 levels, while the IOMMU will -be configured to use 3 levels only. The page destined to be used as level 1, -and to contain a directory of PTEs ends up being the address in a PTE itself, -and thus level 1 page becomes the leaf page. Without the level mismatch it's -level 0 page that should be the leaf page instead. - -The level 1 page won't be used as such, and hence it's not possible to use it -to gain access to other memory on the system. However that page is not cleared -in amd_iommu_quarantine_init() as part of re-initialization of the device -quarantine page tables, and hence data on the level 1 page can be leaked -between device usages. - -Fix this by making sure the paging levels setup by amd_iommu_quarantine_init() -match the number configured on the IOMMUs. - -Note that IVMD regions are not affected by this issue, as those areas are -mapped taking the configured paging levels into account. - -This is XSA-445 / CVE-2023-46835 - -Fixes: ea38867831da ('x86 / iommu: set up a scratch page in the quarantine domain') -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich ---- - xen/drivers/passthrough/amd/iommu_map.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c -index 993bac6f8878..e0f4fe736a8d 100644 ---- a/xen/drivers/passthrough/amd/iommu_map.c -+++ b/xen/drivers/passthrough/amd/iommu_map.c -@@ -837,9 +837,7 @@ static int fill_qpt(union amd_iommu_pte *this, unsigned int level, - int cf_check amd_iommu_quarantine_init(struct pci_dev *pdev, bool scratch_page) - { - struct domain_iommu *hd = dom_iommu(dom_io); -- unsigned long end_gfn = -- 1ul << (DEFAULT_DOMAIN_ADDRESS_WIDTH - PAGE_SHIFT); -- unsigned int level = amd_iommu_get_paging_mode(end_gfn); -+ unsigned int level = hd->arch.amd.paging_mode; - unsigned int req_id = get_dma_requestor_id(pdev->seg, pdev->sbdf.bdf); - const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg); - int rc; --- -2.42.0 - diff --git a/xsa446.patch b/xsa446.patch deleted file mode 100644 index acf1d0f..0000000 --- a/xsa446.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 80d5aada598c3a800a350003d5d582931545e13c Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 26 Oct 2023 14:37:38 +0100 -Subject: [PATCH] x86/spec-ctrl: Remove conditional IRQs-on-ness for INT - $0x80/0x82 paths -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Before speculation defences, some paths in Xen could genuinely get away with -being IRQs-on at entry. But XPTI invalidated this property on most paths, and -attempting to maintain it on the remaining paths was a mistake. - -Fast forward, and DO_SPEC_CTRL_COND_IBPB (protection for AMD BTC/SRSO) is not -IRQ-safe, running with IRQs enabled in some cases. The other actions taken on -these paths happen to be IRQ-safe. - -Make entry_int82() and int80_direct_trap() unconditionally Interrupt Gates -rather than Trap Gates. Remove the conditional re-adjustment of -int80_direct_trap() in smp_prepare_cpus(), and have entry_int82() explicitly -enable interrupts when safe to do so. - -In smp_prepare_cpus(), with the conditional re-adjustment removed, the -clearing of pv_cr3 is the only remaining action gated on XPTI, and it is out -of place anyway, repeating work already done by smp_prepare_boot_cpu(). Drop -the entire if() condition to avoid leaving an incorrect vestigial remnant. - -Also drop comments which make incorrect statements about when its safe to -enable interrupts. - -This is XSA-446 / CVE-2023-46836 - -Signed-off-by: Andrew Cooper -Reviewed-by: Roger Pau Monné ---- - xen/arch/x86/pv/traps.c | 4 ++-- - xen/arch/x86/smpboot.c | 14 -------------- - xen/arch/x86/x86_64/compat/entry.S | 2 ++ - xen/arch/x86/x86_64/entry.S | 1 - - 4 files changed, 4 insertions(+), 17 deletions(-) - -diff --git a/xen/arch/x86/pv/traps.c b/xen/arch/x86/pv/traps.c -index 74f333da7e1c..240d1a2db7a3 100644 ---- a/xen/arch/x86/pv/traps.c -+++ b/xen/arch/x86/pv/traps.c -@@ -139,11 +139,11 @@ void __init pv_trap_init(void) - #ifdef CONFIG_PV32 - /* The 32-on-64 hypercall vector is only accessible from ring 1. */ - _set_gate(idt_table + HYPERCALL_VECTOR, -- SYS_DESC_trap_gate, 1, entry_int82); -+ SYS_DESC_irq_gate, 1, entry_int82); - #endif - - /* Fast trap for int80 (faster than taking the #GP-fixup path). */ -- _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_trap_gate, 3, -+ _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_irq_gate, 3, - &int80_direct_trap); - - open_softirq(NMI_SOFTIRQ, nmi_softirq); -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 3a1a659082c6..4c54ecbc91d7 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -1158,20 +1158,6 @@ void __init smp_prepare_cpus(void) - - stack_base[0] = (void *)((unsigned long)stack_start & ~(STACK_SIZE - 1)); - -- if ( opt_xpti_hwdom || opt_xpti_domu ) -- { -- get_cpu_info()->pv_cr3 = 0; -- --#ifdef CONFIG_PV -- /* -- * All entry points which may need to switch page tables have to start -- * with interrupts off. Re-write what pv_trap_init() has put there. -- */ -- _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_irq_gate, 3, -- &int80_direct_trap); --#endif -- } -- - set_nr_sockets(); - - socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets); -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index bd5abd8040bd..fcc3a721f147 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -21,6 +21,8 @@ ENTRY(entry_int82) - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - -+ sti -+ - CR4_PV32_RESTORE - - GET_CURRENT(bx) -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 5ca74f5f62b2..9a7b129aa7e4 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -327,7 +327,6 @@ ENTRY(sysenter_entry) - #ifdef CONFIG_XEN_SHSTK - ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK - #endif -- /* sti could live here when we don't switch page tables below. */ - pushq $FLAT_USER_SS - pushq $0 - pushfq - -base-commit: 7befef87cc9b1bb8ca15d866ce1ecd9165ccb58c -prerequisite-patch-id: 142a87c707411d49e136c3fb76f1b14963ec6dc8 --- -2.30.2 -