diff --git a/.gitignore b/.gitignore index 348376d..8bcbc57 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ lwip-1.3.0.tar.gz pciutils-2.2.9.tar.bz2 zlib-1.2.3.tar.gz polarssl-1.1.4-gpl.tgz -/xen-4.9.0.tar.gz +/xen-4.9.1.tar.gz diff --git a/qemu.git-3d90c6254863693a6b13d918d2b8682e08bbc681.patch b/qemu.git-3d90c6254863693a6b13d918d2b8682e08bbc681.patch deleted file mode 100644 index 02261da..0000000 --- a/qemu.git-3d90c6254863693a6b13d918d2b8682e08bbc681.patch +++ /dev/null @@ -1,497 +0,0 @@ -From 3d90c6254863693a6b13d918d2b8682e08bbc681 Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Mon, 28 Aug 2017 14:29:06 +0200 -Subject: [PATCH] vga: stop passing pointers to vga_draw_line* functions - -Instead pass around the address (aka offset into vga memory). -Add vga_read_* helper functions which apply vbe_size_mask to -the address, to make sure the address stays within the valid -range, similar to the cirrus blitter fixes (commits ffaf857778 -and 026aeffcb4). - -Impact: DoS for privileged guest users. qemu crashes with -a segfault, when hitting the guard page after vga memory -allocation, while reading vga memory for display updates. - -Fixes: CVE-2017-13672 -Cc: P J P -Reported-by: David Buchanan -Signed-off-by: Gerd Hoffmann -Message-id: 20170828122906.18993-1-kraxel@redhat.com ---- - hw/display/vga-helpers.h | 202 ++++++++++++++++++++++++++--------------------- - hw/display/vga.c | 5 +- - hw/display/vga_int.h | 1 + - 3 files changed, 114 insertions(+), 94 deletions(-) - -diff --git a/hw/display/vga-helpers.h b/hw/display/vga-helpers.h -index 94f6de2..5a752b3 100644 ---- a/hw/display/vga-helpers.h -+++ b/hw/display/vga-helpers.h -@@ -95,20 +95,46 @@ static void vga_draw_glyph9(uint8_t *d, int linesize, - } while (--h); - } - -+static inline uint8_t vga_read_byte(VGACommonState *vga, uint32_t addr) -+{ -+ return vga->vram_ptr[addr & vga->vbe_size_mask]; -+} -+ -+static inline uint16_t vga_read_word_le(VGACommonState *vga, uint32_t addr) -+{ -+ uint32_t offset = addr & vga->vbe_size_mask & ~1; -+ uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset); -+ return lduw_le_p(ptr); -+} -+ -+static inline uint16_t vga_read_word_be(VGACommonState *vga, uint32_t addr) -+{ -+ uint32_t offset = addr & vga->vbe_size_mask & ~1; -+ uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset); -+ return lduw_be_p(ptr); -+} -+ -+static inline uint32_t vga_read_dword_le(VGACommonState *vga, uint32_t addr) -+{ -+ uint32_t offset = addr & vga->vbe_size_mask & ~3; -+ uint32_t *ptr = (uint32_t *)(vga->vram_ptr + offset); -+ return ldl_le_p(ptr); -+} -+ - /* - * 4 color mode - */ --static void vga_draw_line2(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line2(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { - uint32_t plane_mask, *palette, data, v; - int x; - -- palette = s1->last_palette; -- plane_mask = mask16[s1->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; -+ palette = vga->last_palette; -+ plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; - width >>= 3; - for(x = 0; x < width; x++) { -- data = ((uint32_t *)s)[0]; -+ data = vga_read_dword_le(vga, addr); - data &= plane_mask; - v = expand2[GET_PLANE(data, 0)]; - v |= expand2[GET_PLANE(data, 2)] << 2; -@@ -124,7 +150,7 @@ static void vga_draw_line2(VGACommonState *s1, uint8_t *d, - ((uint32_t *)d)[6] = palette[(v >> 4) & 0xf]; - ((uint32_t *)d)[7] = palette[(v >> 0) & 0xf]; - d += 32; -- s += 4; -+ addr += 4; - } - } - -@@ -134,17 +160,17 @@ static void vga_draw_line2(VGACommonState *s1, uint8_t *d, - /* - * 4 color mode, dup2 horizontal - */ --static void vga_draw_line2d2(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line2d2(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { - uint32_t plane_mask, *palette, data, v; - int x; - -- palette = s1->last_palette; -- plane_mask = mask16[s1->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; -+ palette = vga->last_palette; -+ plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; - width >>= 3; - for(x = 0; x < width; x++) { -- data = ((uint32_t *)s)[0]; -+ data = vga_read_dword_le(vga, addr); - data &= plane_mask; - v = expand2[GET_PLANE(data, 0)]; - v |= expand2[GET_PLANE(data, 2)] << 2; -@@ -160,24 +186,24 @@ static void vga_draw_line2d2(VGACommonState *s1, uint8_t *d, - PUT_PIXEL2(d, 6, palette[(v >> 4) & 0xf]); - PUT_PIXEL2(d, 7, palette[(v >> 0) & 0xf]); - d += 64; -- s += 4; -+ addr += 4; - } - } - - /* - * 16 color mode - */ --static void vga_draw_line4(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line4(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { - uint32_t plane_mask, data, v, *palette; - int x; - -- palette = s1->last_palette; -- plane_mask = mask16[s1->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; -+ palette = vga->last_palette; -+ plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; - width >>= 3; - for(x = 0; x < width; x++) { -- data = ((uint32_t *)s)[0]; -+ data = vga_read_dword_le(vga, addr); - data &= plane_mask; - v = expand4[GET_PLANE(data, 0)]; - v |= expand4[GET_PLANE(data, 1)] << 1; -@@ -192,24 +218,24 @@ static void vga_draw_line4(VGACommonState *s1, uint8_t *d, - ((uint32_t *)d)[6] = palette[(v >> 4) & 0xf]; - ((uint32_t *)d)[7] = palette[(v >> 0) & 0xf]; - d += 32; -- s += 4; -+ addr += 4; - } - } - - /* - * 16 color mode, dup2 horizontal - */ --static void vga_draw_line4d2(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line4d2(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { - uint32_t plane_mask, data, v, *palette; - int x; - -- palette = s1->last_palette; -- plane_mask = mask16[s1->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; -+ palette = vga->last_palette; -+ plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; - width >>= 3; - for(x = 0; x < width; x++) { -- data = ((uint32_t *)s)[0]; -+ data = vga_read_dword_le(vga, addr); - data &= plane_mask; - v = expand4[GET_PLANE(data, 0)]; - v |= expand4[GET_PLANE(data, 1)] << 1; -@@ -224,7 +250,7 @@ static void vga_draw_line4d2(VGACommonState *s1, uint8_t *d, - PUT_PIXEL2(d, 6, palette[(v >> 4) & 0xf]); - PUT_PIXEL2(d, 7, palette[(v >> 0) & 0xf]); - d += 64; -- s += 4; -+ addr += 4; - } - } - -@@ -233,21 +259,21 @@ static void vga_draw_line4d2(VGACommonState *s1, uint8_t *d, - * - * XXX: add plane_mask support (never used in standard VGA modes) - */ --static void vga_draw_line8d2(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line8d2(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { - uint32_t *palette; - int x; - -- palette = s1->last_palette; -+ palette = vga->last_palette; - width >>= 3; - for(x = 0; x < width; x++) { -- PUT_PIXEL2(d, 0, palette[s[0]]); -- PUT_PIXEL2(d, 1, palette[s[1]]); -- PUT_PIXEL2(d, 2, palette[s[2]]); -- PUT_PIXEL2(d, 3, palette[s[3]]); -+ PUT_PIXEL2(d, 0, palette[vga_read_byte(vga, addr + 0)]); -+ PUT_PIXEL2(d, 1, palette[vga_read_byte(vga, addr + 1)]); -+ PUT_PIXEL2(d, 2, palette[vga_read_byte(vga, addr + 2)]); -+ PUT_PIXEL2(d, 3, palette[vga_read_byte(vga, addr + 3)]); - d += 32; -- s += 4; -+ addr += 4; - } - } - -@@ -256,63 +282,63 @@ static void vga_draw_line8d2(VGACommonState *s1, uint8_t *d, - * - * XXX: add plane_mask support (never used in standard VGA modes) - */ --static void vga_draw_line8(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line8(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { - uint32_t *palette; - int x; - -- palette = s1->last_palette; -+ palette = vga->last_palette; - width >>= 3; - for(x = 0; x < width; x++) { -- ((uint32_t *)d)[0] = palette[s[0]]; -- ((uint32_t *)d)[1] = palette[s[1]]; -- ((uint32_t *)d)[2] = palette[s[2]]; -- ((uint32_t *)d)[3] = palette[s[3]]; -- ((uint32_t *)d)[4] = palette[s[4]]; -- ((uint32_t *)d)[5] = palette[s[5]]; -- ((uint32_t *)d)[6] = palette[s[6]]; -- ((uint32_t *)d)[7] = palette[s[7]]; -+ ((uint32_t *)d)[0] = palette[vga_read_byte(vga, addr + 0)]; -+ ((uint32_t *)d)[1] = palette[vga_read_byte(vga, addr + 1)]; -+ ((uint32_t *)d)[2] = palette[vga_read_byte(vga, addr + 2)]; -+ ((uint32_t *)d)[3] = palette[vga_read_byte(vga, addr + 3)]; -+ ((uint32_t *)d)[4] = palette[vga_read_byte(vga, addr + 4)]; -+ ((uint32_t *)d)[5] = palette[vga_read_byte(vga, addr + 5)]; -+ ((uint32_t *)d)[6] = palette[vga_read_byte(vga, addr + 6)]; -+ ((uint32_t *)d)[7] = palette[vga_read_byte(vga, addr + 7)]; - d += 32; -- s += 8; -+ addr += 8; - } - } - - /* - * 15 bit color - */ --static void vga_draw_line15_le(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line15_le(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { - int w; - uint32_t v, r, g, b; - - w = width; - do { -- v = lduw_le_p((void *)s); -+ v = vga_read_word_le(vga, addr); - r = (v >> 7) & 0xf8; - g = (v >> 2) & 0xf8; - b = (v << 3) & 0xf8; - ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b); -- s += 2; -+ addr += 2; - d += 4; - } while (--w != 0); - } - --static void vga_draw_line15_be(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line15_be(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { - int w; - uint32_t v, r, g, b; - - w = width; - do { -- v = lduw_be_p((void *)s); -+ v = vga_read_word_be(vga, addr); - r = (v >> 7) & 0xf8; - g = (v >> 2) & 0xf8; - b = (v << 3) & 0xf8; - ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b); -- s += 2; -+ addr += 2; - d += 4; - } while (--w != 0); - } -@@ -320,38 +346,38 @@ static void vga_draw_line15_be(VGACommonState *s1, uint8_t *d, - /* - * 16 bit color - */ --static void vga_draw_line16_le(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line16_le(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { - int w; - uint32_t v, r, g, b; - - w = width; - do { -- v = lduw_le_p((void *)s); -+ v = vga_read_word_le(vga, addr); - r = (v >> 8) & 0xf8; - g = (v >> 3) & 0xfc; - b = (v << 3) & 0xf8; - ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b); -- s += 2; -+ addr += 2; - d += 4; - } while (--w != 0); - } - --static void vga_draw_line16_be(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line16_be(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { - int w; - uint32_t v, r, g, b; - - w = width; - do { -- v = lduw_be_p((void *)s); -+ v = vga_read_word_be(vga, addr); - r = (v >> 8) & 0xf8; - g = (v >> 3) & 0xfc; - b = (v << 3) & 0xf8; - ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b); -- s += 2; -+ addr += 2; - d += 4; - } while (--w != 0); - } -@@ -359,36 +385,36 @@ static void vga_draw_line16_be(VGACommonState *s1, uint8_t *d, - /* - * 24 bit color - */ --static void vga_draw_line24_le(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line24_le(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { - int w; - uint32_t r, g, b; - - w = width; - do { -- b = s[0]; -- g = s[1]; -- r = s[2]; -+ b = vga_read_byte(vga, addr + 0); -+ g = vga_read_byte(vga, addr + 1); -+ r = vga_read_byte(vga, addr + 2); - ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b); -- s += 3; -+ addr += 3; - d += 4; - } while (--w != 0); - } - --static void vga_draw_line24_be(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line24_be(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { - int w; - uint32_t r, g, b; - - w = width; - do { -- r = s[0]; -- g = s[1]; -- b = s[2]; -+ r = vga_read_byte(vga, addr + 0); -+ g = vga_read_byte(vga, addr + 1); -+ b = vga_read_byte(vga, addr + 2); - ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b); -- s += 3; -+ addr += 3; - d += 4; - } while (--w != 0); - } -@@ -396,44 +422,36 @@ static void vga_draw_line24_be(VGACommonState *s1, uint8_t *d, - /* - * 32 bit color - */ --static void vga_draw_line32_le(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line32_le(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { --#ifndef HOST_WORDS_BIGENDIAN -- memcpy(d, s, width * 4); --#else - int w; - uint32_t r, g, b; - - w = width; - do { -- b = s[0]; -- g = s[1]; -- r = s[2]; -+ b = vga_read_byte(vga, addr + 0); -+ g = vga_read_byte(vga, addr + 1); -+ r = vga_read_byte(vga, addr + 2); - ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b); -- s += 4; -+ addr += 4; - d += 4; - } while (--w != 0); --#endif - } - --static void vga_draw_line32_be(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width) -+static void vga_draw_line32_be(VGACommonState *vga, uint8_t *d, -+ uint32_t addr, int width) - { --#ifdef HOST_WORDS_BIGENDIAN -- memcpy(d, s, width * 4); --#else - int w; - uint32_t r, g, b; - - w = width; - do { -- r = s[1]; -- g = s[2]; -- b = s[3]; -+ r = vga_read_byte(vga, addr + 1); -+ g = vga_read_byte(vga, addr + 2); -+ b = vga_read_byte(vga, addr + 3); - ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b); -- s += 4; -+ addr += 4; - d += 4; - } while (--w != 0); --#endif - } -diff --git a/hw/display/vga.c b/hw/display/vga.c -index ad7a465..6fc8c87 100644 ---- a/hw/display/vga.c -+++ b/hw/display/vga.c -@@ -1005,7 +1005,7 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val) - } - - typedef void vga_draw_line_func(VGACommonState *s1, uint8_t *d, -- const uint8_t *s, int width); -+ uint32_t srcaddr, int width); - - #include "vga-helpers.h" - -@@ -1666,7 +1666,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) - if (y_start < 0) - y_start = y; - if (!(is_buffer_shared(surface))) { -- vga_draw_line(s, d, s->vram_ptr + addr, width); -+ vga_draw_line(s, d, addr, width); - if (s->cursor_draw_line) - s->cursor_draw_line(s, d, y); - } -@@ -2170,6 +2170,7 @@ void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate) - if (!s->vbe_size) { - s->vbe_size = s->vram_size; - } -+ s->vbe_size_mask = s->vbe_size - 1; - - s->is_vbe_vmstate = 1; - memory_region_init_ram(&s->vram, obj, "vga.vram", s->vram_size, -diff --git a/hw/display/vga_int.h b/hw/display/vga_int.h -index dd6c958..ad34a1f 100644 ---- a/hw/display/vga_int.h -+++ b/hw/display/vga_int.h -@@ -94,6 +94,7 @@ typedef struct VGACommonState { - uint32_t vram_size; - uint32_t vram_size_mb; /* property */ - uint32_t vbe_size; -+ uint32_t vbe_size_mask; - uint32_t latch; - bool has_chain4_alias; - MemoryRegion chain4_alias; --- -1.8.3.1 - diff --git a/qemu.git-fec5e8c92becad223df9d972770522f64aafdb72.patch b/qemu.git-fec5e8c92becad223df9d972770522f64aafdb72.patch index cabcf26..ad8407e 100644 --- a/qemu.git-fec5e8c92becad223df9d972770522f64aafdb72.patch +++ b/qemu.git-fec5e8c92becad223df9d972770522f64aafdb72.patch @@ -92,7 +92,7 @@ index 3991b88..b2516c8 100644 - if (page1 > page_max) - page_max = page1; if (!(is_buffer_shared(surface))) { - vga_draw_line(s, d, s->vram_ptr + addr, width); + vga_draw_line(s, d, addr, width); if (s->cursor_draw_line) @@ -1687,13 +1691,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) dpy_gfx_update(s->con, 0, y_start, diff --git a/sources b/sources index ae4a035..4704816 100644 --- a/sources +++ b/sources @@ -4,4 +4,4 @@ SHA512 (newlib-1.16.0.tar.gz) = 40eb96bbc6736a16b6399e0cdb73e853d0d90b685c967e77 SHA512 (zlib-1.2.3.tar.gz) = 021b958fcd0d346c4ba761bcf0cc40f3522de6186cf5a0a6ea34a70504ce9622b1c2626fce40675bc8282cf5f5ade18473656abc38050f72f5d6480507a2106e SHA512 (polarssl-1.1.4-gpl.tgz) = 88da614e4d3f4409c4fd3bb3e44c7587ba051e3fed4e33d526069a67e8180212e1ea22da984656f50e290049f60ddca65383e5983c0f8884f648d71f698303ad SHA512 (pciutils-2.2.9.tar.bz2) = 2b3d98d027e46d8c08037366dde6f0781ca03c610ef2b380984639e4ef39899ed8d8b8e4cd9c9dc54df101279b95879bd66bfd4d04ad07fef41e847ea7ae32b5 -SHA512 (xen-4.9.0.tar.gz) = 97f8075c49ef9ec0adbe95106c0cff4f9379578fd568777697565476c3fd948335d72ddcacf8be65fd9db219c0a35dcdc007f355f7e5874dd950fd4c0a0f966f +SHA512 (xen-4.9.1.tar.gz) = 9d22f0aa5dcd01a1c105d17c14bce570cc597e884ddb9b4a46b80a72f647625b76ae5213cede423d0458c14e1906983595a9269bb6e6ff2e9e7e4dea840f4274 diff --git a/xen.gcc7.fix.patch b/xen.gcc7.fix.patch index d5ba87d..b18ba2b 100644 --- a/xen.gcc7.fix.patch +++ b/xen.gcc7.fix.patch @@ -10,13 +10,3 @@ endif ifneq ($(APP_OBJS)-$(lwip),-y) ---- xen-4.8.0/tools/libxc/xc_dom_arm.c.orig 2016-12-05 12:03:27.000000000 +0000 -+++ xen-4.8.0/tools/libxc/xc_dom_arm.c 2017-04-05 21:11:05.343337130 +0100 -@@ -223,6 +223,7 @@ - - domctl.domain = domid; - domctl.cmd = XEN_DOMCTL_set_address_size; -+ domctl.u.address_size.size = 0; - for ( i = 0; i < ARRAY_SIZE(types); i++ ) - if ( !strcmp(types[i].guest, guest_type) ) - domctl.u.address_size.size = types[i].size; diff --git a/xen.hypervisor.config b/xen.hypervisor.config index 6da827a..e4d2301 100644 --- a/xen.hypervisor.config +++ b/xen.hypervisor.config @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Xen/x86 4.8.0 Configuration +# Xen/x86 4.9.1 Configuration # CONFIG_X86_64=y CONFIG_X86=y @@ -10,6 +10,9 @@ CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" # Architecture Features # CONFIG_NR_CPUS=256 +CONFIG_PV=y +CONFIG_PV_LINEAR_PT=y +CONFIG_HVM=y CONFIG_SHADOW_PAGING=y # CONFIG_BIGMEM is not set # CONFIG_HVM_FEP is not set @@ -37,10 +40,12 @@ CONFIG_SCHED_CREDIT=y CONFIG_SCHED_CREDIT2=y CONFIG_SCHED_RTDS=y CONFIG_SCHED_ARINC653=y +CONFIG_SCHED_NULL=y CONFIG_SCHED_DEFAULT="credit" CONFIG_CRYPTO=y CONFIG_LIVEPATCH=y CONFIG_FAST_SYMBOL_LOOKUP=y +CONFIG_CMDLINE="" # # Device Drivers diff --git a/xen.spec b/xen.spec index daac442..cde6163 100644 --- a/xen.spec +++ b/xen.spec @@ -49,12 +49,12 @@ Summary: Xen is a virtual machine monitor Name: xen -Version: 4.9.0 -Release: 14%{?dist} +Version: 4.9.1 +Release: 1%{?dist} Group: Development/Libraries License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ -Source0: http://bits.xensource.com/oss-xen/release/%{version}/xen-%{version}.tar.gz +Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz Source2: %{name}.logrotate # used by stubdoms Source10: lwip-1.3.0.tar.gz @@ -108,13 +108,7 @@ Patch57: qemu.git-d63fb193e71644a073b77ff5ac6f1216f2f6cf6e.patch Patch58: qemu.git-8409dc884a201bf74b30a9d232b6bbdd00cb7e2b.patch Patch59: qemu.git-215902d7b6fb50c6fc216fc74f770858278ed904.patch Patch60: qemu.trad.CVE-2017-7718.patch -Patch62: xsa227.patch -Patch63: xsa228.patch -Patch64: xsa230.patch -Patch65: xsa226.0001-gnttab-dont-use-possibly-unbounded-tail-calls.patch -Patch66: xsa226.0002-gnttab-fix-transitive-grant-handling.patch Patch67: droplibvirtconflict.patch -Patch68: xsa235-4.9.patch Patch69: qemu.git-3268a845f41253fb55852a8429c32b50f36f349a.patch Patch70: qemu.trad.CVE-2017-8309.patch Patch71: qemu.git-fa18f36a461984eae50ab957e47ec78dae3c14fc.patch @@ -124,31 +118,8 @@ Patch74: qemu.git-df8ad9f128c15aa0a0ebc7b24e9a22c9775b67af.patch Patch75: qemu.git-0c9390d978cbf61e8f16c9f580fa96b305c43568.patch Patch76: qemu.git-041e32b8d9d076980b4e35317c0339e57ab888f1.patch Patch77: qemu.git-04bf2526ce87f21b32c9acba1c5518708c243ad0.patch -Patch78: xsa231-4.9.patch -Patch79: xsa232.patch -Patch80: xsa233.patch -Patch81: xsa234-4.9.patch -Patch82: xsa245-0001-xen-page_alloc-Cover-memory-unreserved-after-boot-in.patch -Patch83: xsa245-0002-xen-arm-Correctly-report-the-memory-region-in-the-du.patch Patch84: qemu.git-fec5e8c92becad223df9d972770522f64aafdb72.patch Patch85: qemu.git-e65294157d4b69393b3f819c99f4f647452b48e3.patch -Patch86: qemu.git-3d90c6254863693a6b13d918d2b8682e08bbc681.patch -Patch87: xsa237-4.9-0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch -Patch88: xsa237-4.9-0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch -Patch89: xsa237-4.9-0003-x86-MSI-disallow-redundant-enabling.patch -Patch90: xsa237-4.9-0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch -Patch91: xsa237-4.9-0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch -Patch92: xsa238.patch -Patch93: xsa239.patch -Patch94: xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch -Patch95: xsa240-4.9-0002-x86-mm-Disable-PV-linear-pagetables-by-default.patch -Patch96: xsa241-4.9.patch -Patch97: xsa242-4.9.patch -Patch98: xsa243.patch -Patch99: xsa244.patch -Patch100: xsa236-4.9.patch -Patch101: xsa240-4.9-0003-x86-dont-wrongly-trigger-linear-page-table-assertion.patch -Patch102: xsa243-2.patch BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root @@ -349,35 +320,7 @@ manage Xen virtual machines. %patch28 -p1 %patch52 -p1 %patch53 -p1 -%patch62 -p1 -%patch63 -p1 -%patch64 -p1 -%patch65 -p1 -%patch66 -p1 %patch67 -p1 -%patch68 -p1 -%patch78 -p1 -%patch79 -p1 -%patch80 -p1 -%patch81 -p1 -%patch82 -p1 -%patch83 -p1 -%patch87 -p1 -%patch88 -p1 -%patch89 -p1 -%patch90 -p1 -%patch91 -p1 -%patch92 -p1 -%patch93 -p1 -%patch94 -p1 -%patch95 -p1 -%patch96 -p1 -%patch97 -p1 -%patch98 -p1 -%patch99 -p1 -%patch100 -p1 -%patch101 -p1 -%patch102 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -409,7 +352,6 @@ pushd tools/qemu-xen %patch77 -p1 %patch84 -p1 %patch85 -p1 -%patch86 -p1 popd # stubdom sources @@ -918,6 +860,14 @@ rm -rf %{buildroot} %endif %changelog +* Thu Nov 23 2017 Michael Young - 4.9.1-1 +- update to 4.9.1 (#1515818) + adjust xen.use.fedora.ipxe.patch + and qemu.git-fec5e8c92becad223df9d972770522f64aafdb72.patch + remove patches for issues now fixed upstream and parts of xen.gcc7.fix.patch + update xen.hypervisor.config +- update Source0 location + * Wed Nov 15 2017 Michael Young - 4.9.0-14 - fix an issue in patch for [XSA-240, CVE-2017-15595] that might be a security issue diff --git a/xen.use.fedora.ipxe.patch b/xen.use.fedora.ipxe.patch index 2f7c672..67eee5e 100644 --- a/xen.use.fedora.ipxe.patch +++ b/xen.use.fedora.ipxe.patch @@ -19,7 +19,7 @@ +ETHERBOOT_NICS ?= 10ec8139 8086100e - QEMU_TRADITIONAL_REVISION ?= xen-4.9.0 + QEMU_TRADITIONAL_REVISION ?= xen-4.9.1 --- xen-4.2.0/tools/firmware/Makefile.orig 2012-05-27 21:57:04.480812871 +0100 +++ xen-4.2.0/tools/firmware/Makefile 2012-06-02 19:03:52.254691484 +0100 @@ -10,7 +10,7 @@ diff --git a/xsa226.0001-gnttab-dont-use-possibly-unbounded-tail-calls.patch b/xsa226.0001-gnttab-dont-use-possibly-unbounded-tail-calls.patch deleted file mode 100644 index d60bbe2..0000000 --- a/xsa226.0001-gnttab-dont-use-possibly-unbounded-tail-calls.patch +++ /dev/null @@ -1,149 +0,0 @@ -From: Jan Beulich -Subject: gnttab: don't use possibly unbounded tail calls - -There is no guarantee that the compiler would actually translate them -to branches instead of calls, so only ones with a known recursion limit -are okay: -- __release_grant_for_copy() can call itself only once, as - __acquire_grant_for_copy() won't permit use of multi-level transitive - grants, -- __acquire_grant_for_copy() is fine to call itself with the last - argument false, as that prevents further recursion, -- __acquire_grant_for_copy() must not call itself to recover from an - observed change to the active entry's pin count - -This is part of CVE-2017-12135 / XSA-226. - -Signed-off-by: Jan Beulich - ---- a/xen/common/compat/grant_table.c -+++ b/xen/common/compat/grant_table.c -@@ -258,9 +258,9 @@ int compat_grant_table_op(unsigned int cmd, - rc = gnttab_copy(guest_handle_cast(nat.uop, gnttab_copy_t), n); - if ( rc > 0 ) - { -- ASSERT(rc < n); -- i -= n - rc; -- n = rc; -+ ASSERT(rc <= n); -+ i -= rc; -+ n -= rc; - } - if ( rc >= 0 ) - { ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2103,8 +2103,10 @@ __release_grant_for_copy( - - if ( td != rd ) - { -- /* Recursive calls, but they're tail calls, so it's -- okay. */ -+ /* -+ * Recursive calls, but they're bounded (acquire permits only a single -+ * level of transitivity), so it's okay. -+ */ - if ( released_write ) - __release_grant_for_copy(td, trans_gref, 0); - else if ( released_read ) -@@ -2255,10 +2257,11 @@ __acquire_grant_for_copy( - return rc; - } - -- /* We dropped the lock, so we have to check that nobody -- else tried to pin (or, for that matter, unpin) the -- reference in *this* domain. If they did, just give up -- and try again. */ -+ /* -+ * We dropped the lock, so we have to check that nobody else tried -+ * to pin (or, for that matter, unpin) the reference in *this* -+ * domain. If they did, just give up and tell the caller to retry. -+ */ - if ( act->pin != old_pin ) - { - __fixup_status_for_copy_pin(act, status); -@@ -2266,9 +2269,8 @@ __acquire_grant_for_copy( - active_entry_release(act); - grant_read_unlock(rgt); - put_page(*page); -- return __acquire_grant_for_copy(rd, gref, ldom, readonly, -- frame, page, page_off, length, -- allow_transitive); -+ *page = NULL; -+ return ERESTART; - } - - /* The actual remote remote grant may or may not be a -@@ -2574,7 +2576,7 @@ static int gnttab_copy_one(const struct - { - gnttab_copy_release_buf(src); - rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref); -- if ( rc < 0 ) -+ if ( rc ) - goto out; - } - -@@ -2584,7 +2586,7 @@ static int gnttab_copy_one(const struct - { - gnttab_copy_release_buf(dest); - rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref); -- if ( rc < 0 ) -+ if ( rc ) - goto out; - } - -@@ -2593,6 +2595,14 @@ static int gnttab_copy_one(const struct - return rc; - } - -+/* -+ * gnttab_copy(), other than the various other helpers of -+ * do_grant_table_op(), returns (besides possible error indicators) -+ * "count - i" rather than "i" to ensure that even if no progress -+ * was made at all (perhaps due to gnttab_copy_one() returning a -+ * positive value) a non-zero value is being handed back (zero needs -+ * to be avoided, as that means "success, all done"). -+ */ - static long gnttab_copy( - XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) uop, unsigned int count) - { -@@ -2606,7 +2616,7 @@ static long gnttab_copy( - { - if ( i && hypercall_preempt_check() ) - { -- rc = i; -+ rc = count - i; - break; - } - -@@ -2616,13 +2626,20 @@ static long gnttab_copy( - break; - } - -- op.status = gnttab_copy_one(&op, &dest, &src); -- if ( op.status != GNTST_okay ) -+ rc = gnttab_copy_one(&op, &dest, &src); -+ if ( rc > 0 ) -+ { -+ rc = count - i; -+ break; -+ } -+ if ( rc != GNTST_okay ) - { - gnttab_copy_release_buf(&src); - gnttab_copy_release_buf(&dest); - } - -+ op.status = rc; -+ rc = 0; - if ( unlikely(__copy_field_to_guest(uop, &op, status)) ) - { - rc = -EFAULT; -@@ -3160,6 +3177,7 @@ do_grant_table_op( - rc = gnttab_copy(copy, count); - if ( rc > 0 ) - { -+ rc = count - rc; - guest_handle_add_offset(copy, rc); - uop = guest_handle_cast(copy, void); - } diff --git a/xsa226.0002-gnttab-fix-transitive-grant-handling.patch b/xsa226.0002-gnttab-fix-transitive-grant-handling.patch deleted file mode 100644 index 2cf93bd..0000000 --- a/xsa226.0002-gnttab-fix-transitive-grant-handling.patch +++ /dev/null @@ -1,280 +0,0 @@ -From: Jan Beulich -Subject: gnttab: fix transitive grant handling - -Processing of transitive grants must not use the fast path, or else -reference counting breaks due to the skipped recursive call to -__acquire_grant_for_copy() (its __release_grant_for_copy() -counterpart occurs independent of original pin count). Furthermore -after re-acquiring temporarily dropped locks we need to verify no grant -properties changed if the original pin count was non-zero; checking -just the pin counts is sufficient only for well-behaved guests. As a -result, __release_grant_for_copy() needs to mirror that new behavior. - -Furthermore a __release_grant_for_copy() invocation was missing on the -retry path of __acquire_grant_for_copy(), and gnttab_set_version() also -needs to bail out upon encountering a transitive grant. - -This is part of CVE-2017-12135 / XSA-226. - -Reported-by: Andrew Cooper -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2050,13 +2050,8 @@ __release_grant_for_copy( - unsigned long r_frame; - uint16_t *status; - grant_ref_t trans_gref; -- int released_read; -- int released_write; - struct domain *td; - -- released_read = 0; -- released_write = 0; -- - grant_read_lock(rgt); - - act = active_entry_acquire(rgt, gref); -@@ -2086,17 +2081,11 @@ __release_grant_for_copy( - - act->pin -= GNTPIN_hstw_inc; - if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) ) -- { -- released_write = 1; - gnttab_clear_flag(_GTF_writing, status); -- } - } - - if ( !act->pin ) -- { - gnttab_clear_flag(_GTF_reading, status); -- released_read = 1; -- } - - active_entry_release(act); - grant_read_unlock(rgt); -@@ -2104,13 +2093,10 @@ __release_grant_for_copy( - if ( td != rd ) - { - /* -- * Recursive calls, but they're bounded (acquire permits only a single -+ * Recursive call, but it is bounded (acquire permits only a single - * level of transitivity), so it's okay. - */ -- if ( released_write ) -- __release_grant_for_copy(td, trans_gref, 0); -- else if ( released_read ) -- __release_grant_for_copy(td, trans_gref, 1); -+ __release_grant_for_copy(td, trans_gref, readonly); - - rcu_unlock_domain(td); - } -@@ -2184,8 +2170,108 @@ __acquire_grant_for_copy( - act->domid, ldom, act->pin); - - old_pin = act->pin; -- if ( !act->pin || -- (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) ) -+ if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive ) -+ { -+ if ( (!old_pin || (!readonly && -+ !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)))) && -+ (rc = _set_status_v2(ldom, readonly, 0, shah, act, -+ status)) != GNTST_okay ) -+ goto unlock_out; -+ -+ if ( !allow_transitive ) -+ PIN_FAIL(unlock_out_clear, GNTST_general_error, -+ "transitive grant when transitivity not allowed\n"); -+ -+ trans_domid = sha2->transitive.trans_domid; -+ trans_gref = sha2->transitive.gref; -+ barrier(); /* Stop the compiler from re-loading -+ trans_domid from shared memory */ -+ if ( trans_domid == rd->domain_id ) -+ PIN_FAIL(unlock_out_clear, GNTST_general_error, -+ "transitive grants cannot be self-referential\n"); -+ -+ /* -+ * We allow the trans_domid == ldom case, which corresponds to a -+ * grant being issued by one domain, sent to another one, and then -+ * transitively granted back to the original domain. Allowing it -+ * is easy, and means that you don't need to go out of your way to -+ * avoid it in the guest. -+ */ -+ -+ /* We need to leave the rrd locked during the grant copy. */ -+ td = rcu_lock_domain_by_id(trans_domid); -+ if ( td == NULL ) -+ PIN_FAIL(unlock_out_clear, GNTST_general_error, -+ "transitive grant referenced bad domain %d\n", -+ trans_domid); -+ -+ /* -+ * __acquire_grant_for_copy() could take the lock on the -+ * remote table (if rd == td), so we have to drop the lock -+ * here and reacquire. -+ */ -+ active_entry_release(act); -+ grant_read_unlock(rgt); -+ -+ rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id, -+ readonly, &grant_frame, page, -+ &trans_page_off, &trans_length, 0); -+ -+ grant_read_lock(rgt); -+ act = active_entry_acquire(rgt, gref); -+ -+ if ( rc != GNTST_okay ) -+ { -+ __fixup_status_for_copy_pin(act, status); -+ rcu_unlock_domain(td); -+ active_entry_release(act); -+ grant_read_unlock(rgt); -+ return rc; -+ } -+ -+ /* -+ * We dropped the lock, so we have to check that the grant didn't -+ * change, and that nobody else tried to pin/unpin it. If anything -+ * changed, just give up and tell the caller to retry. -+ */ -+ if ( rgt->gt_version != 2 || -+ act->pin != old_pin || -+ (old_pin && (act->domid != ldom || act->frame != grant_frame || -+ act->start != trans_page_off || -+ act->length != trans_length || -+ act->trans_domain != td || -+ act->trans_gref != trans_gref || -+ !act->is_sub_page)) ) -+ { -+ __release_grant_for_copy(td, trans_gref, readonly); -+ __fixup_status_for_copy_pin(act, status); -+ rcu_unlock_domain(td); -+ active_entry_release(act); -+ grant_read_unlock(rgt); -+ put_page(*page); -+ *page = NULL; -+ return ERESTART; -+ } -+ -+ if ( !old_pin ) -+ { -+ act->domid = ldom; -+ act->start = trans_page_off; -+ act->length = trans_length; -+ act->trans_domain = td; -+ act->trans_gref = trans_gref; -+ act->frame = grant_frame; -+ act->gfn = -1ul; -+ /* -+ * The actual remote remote grant may or may not be a sub-page, -+ * but we always treat it as one because that blocks mappings of -+ * transitive grants. -+ */ -+ act->is_sub_page = 1; -+ } -+ } -+ else if ( !old_pin || -+ (!readonly && !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) ) - { - if ( (rc = _set_status(rgt->gt_version, ldom, - readonly, 0, shah, act, -@@ -2206,79 +2292,6 @@ __acquire_grant_for_copy( - trans_page_off = 0; - trans_length = PAGE_SIZE; - } -- else if ( (shah->flags & GTF_type_mask) == GTF_transitive ) -- { -- if ( !allow_transitive ) -- PIN_FAIL(unlock_out_clear, GNTST_general_error, -- "transitive grant when transitivity not allowed\n"); -- -- trans_domid = sha2->transitive.trans_domid; -- trans_gref = sha2->transitive.gref; -- barrier(); /* Stop the compiler from re-loading -- trans_domid from shared memory */ -- if ( trans_domid == rd->domain_id ) -- PIN_FAIL(unlock_out_clear, GNTST_general_error, -- "transitive grants cannot be self-referential\n"); -- -- /* We allow the trans_domid == ldom case, which -- corresponds to a grant being issued by one domain, sent -- to another one, and then transitively granted back to -- the original domain. Allowing it is easy, and means -- that you don't need to go out of your way to avoid it -- in the guest. */ -- -- /* We need to leave the rrd locked during the grant copy */ -- td = rcu_lock_domain_by_id(trans_domid); -- if ( td == NULL ) -- PIN_FAIL(unlock_out_clear, GNTST_general_error, -- "transitive grant referenced bad domain %d\n", -- trans_domid); -- -- /* -- * __acquire_grant_for_copy() could take the lock on the -- * remote table (if rd == td), so we have to drop the lock -- * here and reacquire -- */ -- active_entry_release(act); -- grant_read_unlock(rgt); -- -- rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id, -- readonly, &grant_frame, page, -- &trans_page_off, &trans_length, 0); -- -- grant_read_lock(rgt); -- act = active_entry_acquire(rgt, gref); -- -- if ( rc != GNTST_okay ) { -- __fixup_status_for_copy_pin(act, status); -- rcu_unlock_domain(td); -- active_entry_release(act); -- grant_read_unlock(rgt); -- return rc; -- } -- -- /* -- * We dropped the lock, so we have to check that nobody else tried -- * to pin (or, for that matter, unpin) the reference in *this* -- * domain. If they did, just give up and tell the caller to retry. -- */ -- if ( act->pin != old_pin ) -- { -- __fixup_status_for_copy_pin(act, status); -- rcu_unlock_domain(td); -- active_entry_release(act); -- grant_read_unlock(rgt); -- put_page(*page); -- *page = NULL; -- return ERESTART; -- } -- -- /* The actual remote remote grant may or may not be a -- sub-page, but we always treat it as one because that -- blocks mappings of transitive grants. */ -- is_sub_page = 1; -- act->gfn = -1ul; -- } - else if ( !(sha2->hdr.flags & GTF_sub_page) ) - { - rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, readonly, rd); -@@ -2710,10 +2723,13 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA - case 2: - for ( i = 0; i < GNTTAB_NR_RESERVED_ENTRIES; i++ ) - { -- if ( ((shared_entry_v2(gt, i).hdr.flags & GTF_type_mask) == -- GTF_permit_access) && -- (shared_entry_v2(gt, i).full_page.frame >> 32) ) -+ switch ( shared_entry_v2(gt, i).hdr.flags & GTF_type_mask ) - { -+ case GTF_permit_access: -+ if ( !(shared_entry_v2(gt, i).full_page.frame >> 32) ) -+ break; -+ /* fall through */ -+ case GTF_transitive: - gdprintk(XENLOG_WARNING, - "tried to change grant table version to 1 with non-representable entries\n"); - res = -ERANGE; diff --git a/xsa227.patch b/xsa227.patch deleted file mode 100644 index 86aa41e..0000000 --- a/xsa227.patch +++ /dev/null @@ -1,52 +0,0 @@ -From fa7268b94f8a0a7792ee12d5b8e23a60e52a3a84 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 20 Jun 2017 19:18:54 +0100 -Subject: [PATCH] x86/grant: Disallow misaligned PTEs - -Pagetable entries must be aligned to function correctly. Disallow attempts -from the guest to have a grant PTE created at a misaligned address, which -would result in corruption of the L1 table with largely-guest-controlled -values. - -This is XSA-227 - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich ---- - xen/arch/x86/mm.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 97b3b4b..00f517a 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -3763,6 +3763,9 @@ static int create_grant_pte_mapping( - l1_pgentry_t ol1e; - struct domain *d = v->domain; - -+ if ( !IS_ALIGNED(pte_addr, sizeof(nl1e)) ) -+ return GNTST_general_error; -+ - adjust_guest_l1e(nl1e, d); - - gmfn = pte_addr >> PAGE_SHIFT; -@@ -3819,6 +3822,16 @@ static int destroy_grant_pte_mapping( - struct page_info *page; - l1_pgentry_t ol1e; - -+ /* -+ * addr comes from Xen's active_entry tracking so isn't guest controlled, -+ * but it had still better be PTE-aligned. -+ */ -+ if ( !IS_ALIGNED(addr, sizeof(ol1e)) ) -+ { -+ ASSERT_UNREACHABLE(); -+ return GNTST_general_error; -+ } -+ - gmfn = addr >> PAGE_SHIFT; - page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); - --- -2.1.4 - diff --git a/xsa228.patch b/xsa228.patch deleted file mode 100644 index 65add3a..0000000 --- a/xsa228.patch +++ /dev/null @@ -1,198 +0,0 @@ -From 9a52c78eb4ff7836bf7ac9ecd918b289cead1f3f Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Mon, 31 Jul 2017 15:17:56 +0100 -Subject: [PATCH] gnttab: split maptrack lock to make it fulfill its purpose - again - -The way the lock is currently being used in get_maptrack_handle(), it -protects only the maptrack limit: The function acts on current's list -only, so races on list accesses are impossible even without the lock. - -Otoh list access races are possible between __get_maptrack_handle() and -put_maptrack_handle(), due to the invocation of the former for other -than current from steal_maptrack_handle(). Introduce a per-vCPU lock -for list accesses to become race free again. This lock will be -uncontended except when it becomes necessary to take the steal path, -i.e. in the common case there should be no meaningful performance -impact. - -When in get_maptrack_handle adds a stolen entry to a fresh, empty, -freelist, we think that there is probably no concurrency. However, -this is not a fast path and adding the locking there makes the code -clearly correct. - -Also, while we are here: the stolen maptrack_entry's tail pointer was -not properly set. Set it. - -This is XSA-228. - -Reported-by: Ian Jackson -Signed-off-by: Jan Beulich -Signed-off-by: Ian Jackson ---- - docs/misc/grant-tables.txt | 7 ++++++- - xen/common/grant_table.c | 30 ++++++++++++++++++++++++------ - xen/include/xen/grant_table.h | 2 +- - xen/include/xen/sched.h | 1 + - 4 files changed, 32 insertions(+), 8 deletions(-) - -diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt -index 417ce2d..64da5cf 100644 ---- a/docs/misc/grant-tables.txt -+++ b/docs/misc/grant-tables.txt -@@ -87,7 +87,8 @@ is complete. - inconsistent grant table state such as current - version, partially initialized active table pages, - etc. -- grant_table->maptrack_lock : spinlock used to protect the maptrack free list -+ grant_table->maptrack_lock : spinlock used to protect the maptrack limit -+ v->maptrack_freelist_lock : spinlock used to protect the maptrack free list - active_grant_entry->lock : spinlock used to serialize modifications to - active entries - -@@ -102,6 +103,10 @@ is complete. - The maptrack free list is protected by its own spinlock. The maptrack - lock may be locked while holding the grant table lock. - -+ The maptrack_freelist_lock is an innermost lock. It may be locked -+ while holding other locks, but no other locks may be acquired within -+ it. -+ - Active entries are obtained by calling active_entry_acquire(gt, ref). - This function returns a pointer to the active entry after locking its - spinlock. The caller must hold the grant table read lock before -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index ae34547..ee33bd8 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -304,11 +304,16 @@ __get_maptrack_handle( - { - unsigned int head, next, prev_head; - -+ spin_lock(&v->maptrack_freelist_lock); -+ - do { - /* No maptrack pages allocated for this VCPU yet? */ - head = read_atomic(&v->maptrack_head); - if ( unlikely(head == MAPTRACK_TAIL) ) -+ { -+ spin_unlock(&v->maptrack_freelist_lock); - return -1; -+ } - - /* - * Always keep one entry in the free list to make it easier to -@@ -316,12 +321,17 @@ __get_maptrack_handle( - */ - next = read_atomic(&maptrack_entry(t, head).ref); - if ( unlikely(next == MAPTRACK_TAIL) ) -+ { -+ spin_unlock(&v->maptrack_freelist_lock); - return -1; -+ } - - prev_head = head; - head = cmpxchg(&v->maptrack_head, prev_head, next); - } while ( head != prev_head ); - -+ spin_unlock(&v->maptrack_freelist_lock); -+ - return head; - } - -@@ -380,6 +390,8 @@ put_maptrack_handle( - /* 2. Add entry to the tail of the list on the original VCPU. */ - v = currd->vcpu[maptrack_entry(t, handle).vcpu]; - -+ spin_lock(&v->maptrack_freelist_lock); -+ - cur_tail = read_atomic(&v->maptrack_tail); - do { - prev_tail = cur_tail; -@@ -388,6 +400,8 @@ put_maptrack_handle( - - /* 3. Update the old tail entry to point to the new entry. */ - write_atomic(&maptrack_entry(t, prev_tail).ref, handle); -+ -+ spin_unlock(&v->maptrack_freelist_lock); - } - - static inline int -@@ -411,10 +425,6 @@ get_maptrack_handle( - */ - if ( nr_maptrack_frames(lgt) >= max_maptrack_frames ) - { -- /* -- * Can drop the lock since no other VCPU can be adding a new -- * frame once they've run out. -- */ - spin_unlock(&lgt->maptrack_lock); - - /* -@@ -426,8 +436,12 @@ get_maptrack_handle( - handle = steal_maptrack_handle(lgt, curr); - if ( handle == -1 ) - return -1; -+ spin_lock(&curr->maptrack_freelist_lock); -+ maptrack_entry(lgt, handle).ref = MAPTRACK_TAIL; - curr->maptrack_tail = handle; -- write_atomic(&curr->maptrack_head, handle); -+ if ( curr->maptrack_head == MAPTRACK_TAIL ) -+ write_atomic(&curr->maptrack_head, handle); -+ spin_unlock(&curr->maptrack_freelist_lock); - } - return steal_maptrack_handle(lgt, curr); - } -@@ -460,12 +474,15 @@ get_maptrack_handle( - smp_wmb(); - lgt->maptrack_limit += MAPTRACK_PER_PAGE; - -+ spin_unlock(&lgt->maptrack_lock); -+ spin_lock(&curr->maptrack_freelist_lock); -+ - do { - new_mt[i - 1].ref = read_atomic(&curr->maptrack_head); - head = cmpxchg(&curr->maptrack_head, new_mt[i - 1].ref, handle + 1); - } while ( head != new_mt[i - 1].ref ); - -- spin_unlock(&lgt->maptrack_lock); -+ spin_unlock(&curr->maptrack_freelist_lock); - - return handle; - } -@@ -3475,6 +3492,7 @@ grant_table_destroy( - - void grant_table_init_vcpu(struct vcpu *v) - { -+ spin_lock_init(&v->maptrack_freelist_lock); - v->maptrack_head = MAPTRACK_TAIL; - v->maptrack_tail = MAPTRACK_TAIL; - } -diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h -index 4e77899..100f2b3 100644 ---- a/xen/include/xen/grant_table.h -+++ b/xen/include/xen/grant_table.h -@@ -78,7 +78,7 @@ struct grant_table { - /* Mapping tracking table per vcpu. */ - struct grant_mapping **maptrack; - unsigned int maptrack_limit; -- /* Lock protecting the maptrack page list, head, and limit */ -+ /* Lock protecting the maptrack limit */ - spinlock_t maptrack_lock; - /* The defined versions are 1 and 2. Set to 0 if we don't know - what version to use yet. */ -diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h -index 6673b27..8690f29 100644 ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -230,6 +230,7 @@ struct vcpu - int controller_pause_count; - - /* Grant table map tracking. */ -+ spinlock_t maptrack_freelist_lock; - unsigned int maptrack_head; - unsigned int maptrack_tail; - --- -2.1.4 - diff --git a/xsa230.patch b/xsa230.patch deleted file mode 100644 index c3b50c8..0000000 --- a/xsa230.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Jan Beulich -Subject: gnttab: correct pin status fixup for copy - -Regardless of copy operations only setting GNTPIN_hst*, GNTPIN_dev* -also need to be taken into account when deciding whether to clear -_GTF_{read,writ}ing. At least for consistency with code elsewhere the -read part better doesn't use any mask at all. - -This is XSA-230. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index ae34547..9c9d33c 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2107,10 +2107,10 @@ __release_grant_for_copy( - static void __fixup_status_for_copy_pin(const struct active_grant_entry *act, - uint16_t *status) - { -- if ( !(act->pin & GNTPIN_hstw_mask) ) -+ if ( !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) ) - gnttab_clear_flag(_GTF_writing, status); - -- if ( !(act->pin & GNTPIN_hstr_mask) ) -+ if ( !act->pin ) - gnttab_clear_flag(_GTF_reading, status); - } - -@@ -2318,7 +2318,7 @@ __acquire_grant_for_copy( - - unlock_out_clear: - if ( !(readonly) && -- !(act->pin & GNTPIN_hstw_mask) ) -+ !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) ) - gnttab_clear_flag(_GTF_writing, status); - - if ( !act->pin ) diff --git a/xsa231-4.9.patch b/xsa231-4.9.patch deleted file mode 100644 index 251165e..0000000 --- a/xsa231-4.9.patch +++ /dev/null @@ -1,108 +0,0 @@ -From: George Dunlap -Subject: xen/mm: make sure node is less than MAX_NUMNODES - -The output of MEMF_get_node(memflags) can be as large as nodeid_t can -hold (currently 255). This is then used as an index to arrays of size -MAX_NUMNODE, which is 64 on x86 and 1 on ARM, can be passed in by an -untrusted guest (via memory_exchange and increase_reservation) and is -not currently bounds-checked. - -Check the value in page_alloc.c before using it, and also check the -value in the hypercall call sites and return -EINVAL if appropriate. -Don't permit domains other than the hardware or control domain to -allocate node-constrained memory. - -This is XSA-231. - -Reported-by: Matthew Daley -Signed-off-by: George Dunlap -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -411,6 +411,31 @@ static void decrease_reservation(struct - a->nr_done = i; - } - -+static bool propagate_node(unsigned int xmf, unsigned int *memflags) -+{ -+ const struct domain *currd = current->domain; -+ -+ BUILD_BUG_ON(XENMEMF_get_node(0) != NUMA_NO_NODE); -+ BUILD_BUG_ON(MEMF_get_node(0) != NUMA_NO_NODE); -+ -+ if ( XENMEMF_get_node(xmf) == NUMA_NO_NODE ) -+ return true; -+ -+ if ( is_hardware_domain(currd) || is_control_domain(currd) ) -+ { -+ if ( XENMEMF_get_node(xmf) >= MAX_NUMNODES ) -+ return false; -+ -+ *memflags |= MEMF_node(XENMEMF_get_node(xmf)); -+ if ( xmf & XENMEMF_exact_node_request ) -+ *memflags |= MEMF_exact_node; -+ } -+ else if ( xmf & XENMEMF_exact_node_request ) -+ return false; -+ -+ return true; -+} -+ - static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg) - { - struct xen_memory_exchange exch; -@@ -483,6 +508,12 @@ static long memory_exchange(XEN_GUEST_HA - } - } - -+ if ( unlikely(!propagate_node(exch.out.mem_flags, &memflags)) ) -+ { -+ rc = -EINVAL; -+ goto fail_early; -+ } -+ - d = rcu_lock_domain_by_any_id(exch.in.domid); - if ( d == NULL ) - { -@@ -501,7 +532,6 @@ static long memory_exchange(XEN_GUEST_HA - d, - XENMEMF_get_address_bits(exch.out.mem_flags) ? : - (BITS_PER_LONG+PAGE_SHIFT))); -- memflags |= MEMF_node(XENMEMF_get_node(exch.out.mem_flags)); - - for ( i = (exch.nr_exchanged >> in_chunk_order); - i < (exch.in.nr_extents >> in_chunk_order); -@@ -864,12 +894,8 @@ static int construct_memop_from_reservat - } - read_unlock(&d->vnuma_rwlock); - } -- else -- { -- a->memflags |= MEMF_node(XENMEMF_get_node(r->mem_flags)); -- if ( r->mem_flags & XENMEMF_exact_node_request ) -- a->memflags |= MEMF_exact_node; -- } -+ else if ( unlikely(!propagate_node(r->mem_flags, &a->memflags)) ) -+ return -EINVAL; - - return 0; - } ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -706,9 +706,13 @@ static struct page_info *alloc_heap_page - if ( node >= MAX_NUMNODES ) - node = cpu_to_node(smp_processor_id()); - } -+ else if ( unlikely(node >= MAX_NUMNODES) ) -+ { -+ ASSERT_UNREACHABLE(); -+ return NULL; -+ } - first_node = node; - -- ASSERT(node < MAX_NUMNODES); - ASSERT(zone_lo <= zone_hi); - ASSERT(zone_hi < NR_ZONES); - diff --git a/xsa232.patch b/xsa232.patch deleted file mode 100644 index 9e5f35c..0000000 --- a/xsa232.patch +++ /dev/null @@ -1,23 +0,0 @@ -From: Andrew Cooper -Subject: grant_table: fix GNTTABOP_cache_flush handling - -Don't fall over a NULL grant_table pointer when the owner of the domain -is a system domain (DOMID_{XEN,IO} etc). - -This is XSA-232. - -Reported-by: Matthew Daley -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -3053,7 +3053,7 @@ static int cache_flush(gnttab_cache_flus - - page = mfn_to_page(mfn); - owner = page_get_owner_and_reference(page); -- if ( !owner ) -+ if ( !owner || !owner->grant_table ) - { - rcu_unlock_domain(d); - return -EPERM; diff --git a/xsa233.patch b/xsa233.patch deleted file mode 100644 index 6013c52..0000000 --- a/xsa233.patch +++ /dev/null @@ -1,52 +0,0 @@ -From: Juergen Gross -Subject: tools/xenstore: dont unlink connection object twice - -A connection object of a domain with associated stubdom has two -parents: the domain and the stubdom. When cleaning up the list of -active domains in domain_cleanup() make sure not to unlink the -connection twice from the same domain. This could happen when the -domain and its stubdom are being destroyed at the same time leading -to the domain loop being entered twice. - -Additionally don't use talloc_free() in this case as it will remove -a random parent link, leading eventually to a memory leak. Use -talloc_unlink() instead specifying the context from which the -connection object should be removed. - -This is XSA-233. - -Reported-by: Eric Chanudet -Signed-off-by: Juergen Gross -Reviewed-by: Ian Jackson - ---- a/tools/xenstore/xenstored_domain.c -+++ b/tools/xenstore/xenstored_domain.c -@@ -221,10 +221,11 @@ static int destroy_domain(void *_domain) - static void domain_cleanup(void) - { - xc_dominfo_t dominfo; -- struct domain *domain, *tmp; -+ struct domain *domain; - int notify = 0; - -- list_for_each_entry_safe(domain, tmp, &domains, list) { -+ again: -+ list_for_each_entry(domain, &domains, list) { - if (xc_domain_getinfo(*xc_handle, domain->domid, 1, - &dominfo) == 1 && - dominfo.domid == domain->domid) { -@@ -236,8 +237,12 @@ static void domain_cleanup(void) - if (!dominfo.dying) - continue; - } -- talloc_free(domain->conn); -- notify = 0; /* destroy_domain() fires the watch */ -+ if (domain->conn) { -+ talloc_unlink(talloc_autofree_context(), domain->conn); -+ domain->conn = NULL; -+ notify = 0; /* destroy_domain() fires the watch */ -+ goto again; -+ } - } - - if (notify) diff --git a/xsa234-4.9.patch b/xsa234-4.9.patch deleted file mode 100644 index 8dbf401..0000000 --- a/xsa234-4.9.patch +++ /dev/null @@ -1,192 +0,0 @@ -From: Jan Beulich -Subject: gnttab: also validate PTE permissions upon destroy/replace - -In order for PTE handling to match up with the reference counting done -by common code, presence and writability of grant mapping PTEs must -also be taken into account; validating just the frame number is not -enough. This is in particular relevant if a guest fiddles with grant -PTEs via non-grant hypercalls. - -Note that the flags being passed to replace_grant_host_mapping() -already happen to be those of the existing mapping, so no new function -parameter is needed. - -This is XSA-234. - -Reported-by: Andrew Cooper -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -4058,7 +4058,8 @@ static int create_grant_pte_mapping( - } - - static int destroy_grant_pte_mapping( -- uint64_t addr, unsigned long frame, struct domain *d) -+ uint64_t addr, unsigned long frame, unsigned int grant_pte_flags, -+ struct domain *d) - { - int rc = GNTST_okay; - void *va; -@@ -4104,17 +4105,29 @@ static int destroy_grant_pte_mapping( - - ol1e = *(l1_pgentry_t *)va; - -- /* Check that the virtual address supplied is actually mapped to frame. */ -- if ( unlikely(l1e_get_pfn(ol1e) != frame) ) -+ /* -+ * Check that the PTE supplied actually maps frame (with appropriate -+ * permissions). -+ */ -+ if ( unlikely(l1e_get_pfn(ol1e) != frame) || -+ unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ (_PAGE_PRESENT | _PAGE_RW)) ) - { - page_unlock(page); -- gdprintk(XENLOG_WARNING, -- "PTE entry %"PRIpte" for address %"PRIx64" doesn't match frame %lx\n", -- l1e_get_intpte(ol1e), addr, frame); -+ gdprintk(XENLOG_ERR, -+ "PTE %"PRIpte" at %"PRIx64" doesn't match grant (%"PRIpte")\n", -+ l1e_get_intpte(ol1e), addr, -+ l1e_get_intpte(l1e_from_pfn(frame, grant_pte_flags))); - rc = GNTST_general_error; - goto failed; - } - -+ if ( unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ ~(_PAGE_AVAIL | PAGE_CACHE_ATTRS)) ) -+ gdprintk(XENLOG_WARNING, -+ "PTE flags %x at %"PRIx64" don't match grant (%x)\n", -+ l1e_get_flags(ol1e), addr, grant_pte_flags); -+ - /* Delete pagetable entry. */ - if ( unlikely(!UPDATE_ENTRY - (l1, -@@ -4123,7 +4136,8 @@ static int destroy_grant_pte_mapping( - 0)) ) - { - page_unlock(page); -- gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %p\n", va); -+ gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %"PRIx64"\n", -+ addr); - rc = GNTST_general_error; - goto failed; - } -@@ -4191,7 +4205,8 @@ static int create_grant_va_mapping( - } - - static int replace_grant_va_mapping( -- unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu *v) -+ unsigned long addr, unsigned long frame, unsigned int grant_pte_flags, -+ l1_pgentry_t nl1e, struct vcpu *v) - { - l1_pgentry_t *pl1e, ol1e; - unsigned long gl1mfn; -@@ -4227,20 +4242,33 @@ static int replace_grant_va_mapping( - - ol1e = *pl1e; - -- /* Check that the virtual address supplied is actually mapped to frame. */ -- if ( unlikely(l1e_get_pfn(ol1e) != frame) ) -- { -- gdprintk(XENLOG_WARNING, -- "PTE entry %lx for address %lx doesn't match frame %lx\n", -- l1e_get_pfn(ol1e), addr, frame); -+ /* -+ * Check that the virtual address supplied is actually mapped to frame -+ * (with appropriate permissions). -+ */ -+ if ( unlikely(l1e_get_pfn(ol1e) != frame) || -+ unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ (_PAGE_PRESENT | _PAGE_RW)) ) -+ { -+ gdprintk(XENLOG_ERR, -+ "PTE %"PRIpte" for %lx doesn't match grant (%"PRIpte")\n", -+ l1e_get_intpte(ol1e), addr, -+ l1e_get_intpte(l1e_from_pfn(frame, grant_pte_flags))); - rc = GNTST_general_error; - goto unlock_and_out; - } - -+ if ( unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ ~(_PAGE_AVAIL | PAGE_CACHE_ATTRS)) ) -+ gdprintk(XENLOG_WARNING, -+ "PTE flags %x for %"PRIx64" don't match grant (%x)\n", -+ l1e_get_flags(ol1e), addr, grant_pte_flags); -+ - /* Delete pagetable entry. */ - if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) ) - { -- gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %p\n", pl1e); -+ gdprintk(XENLOG_WARNING, "Cannot delete PTE entry for %"PRIx64"\n", -+ addr); - rc = GNTST_general_error; - goto unlock_and_out; - } -@@ -4254,9 +4282,11 @@ static int replace_grant_va_mapping( - } - - static int destroy_grant_va_mapping( -- unsigned long addr, unsigned long frame, struct vcpu *v) -+ unsigned long addr, unsigned long frame, unsigned int grant_pte_flags, -+ struct vcpu *v) - { -- return replace_grant_va_mapping(addr, frame, l1e_empty(), v); -+ return replace_grant_va_mapping(addr, frame, grant_pte_flags, -+ l1e_empty(), v); - } - - static int create_grant_p2m_mapping(uint64_t addr, unsigned long frame, -@@ -4351,20 +4381,39 @@ int replace_grant_host_mapping( - unsigned long gl1mfn; - struct page_info *l1pg; - int rc; -+ unsigned int grant_pte_flags; - - if ( paging_mode_external(current->domain) ) - return replace_grant_p2m_mapping(addr, frame, new_addr, flags); - -+ grant_pte_flags = -+ _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_GNTTAB | _PAGE_NX; -+ -+ if ( flags & GNTMAP_application_map ) -+ grant_pte_flags |= _PAGE_USER; -+ if ( !(flags & GNTMAP_readonly) ) -+ grant_pte_flags |= _PAGE_RW; -+ /* -+ * On top of the explicit settings done by create_grant_host_mapping() -+ * also open-code relevant parts of adjust_guest_l1e(). Don't mirror -+ * available and cachability flags, though. -+ */ -+ if ( !is_pv_32bit_domain(curr->domain) ) -+ grant_pte_flags |= (grant_pte_flags & _PAGE_USER) -+ ? _PAGE_GLOBAL -+ : _PAGE_GUEST_KERNEL | _PAGE_USER; -+ - if ( flags & GNTMAP_contains_pte ) - { - if ( !new_addr ) -- return destroy_grant_pte_mapping(addr, frame, curr->domain); -+ return destroy_grant_pte_mapping(addr, frame, grant_pte_flags, -+ curr->domain); - - return GNTST_general_error; - } - - if ( !new_addr ) -- return destroy_grant_va_mapping(addr, frame, curr); -+ return destroy_grant_va_mapping(addr, frame, grant_pte_flags, curr); - - pl1e = guest_map_l1e(new_addr, &gl1mfn); - if ( !pl1e ) -@@ -4412,7 +4461,7 @@ int replace_grant_host_mapping( - put_page(l1pg); - guest_unmap_l1e(pl1e); - -- rc = replace_grant_va_mapping(addr, frame, ol1e, curr); -+ rc = replace_grant_va_mapping(addr, frame, grant_pte_flags, ol1e, curr); - if ( rc && !paging_mode_refcounts(curr->domain) ) - put_page_from_l1e(ol1e, curr->domain); - diff --git a/xsa235-4.9.patch b/xsa235-4.9.patch deleted file mode 100644 index 25dd650..0000000 --- a/xsa235-4.9.patch +++ /dev/null @@ -1,49 +0,0 @@ -From: Jan Beulich -Subject: arm/mm: release grant lock on xenmem_add_to_physmap_one() error paths - -Commit 55021ff9ab ("xen/arm: add_to_physmap_one: Avoid to map mfn 0 if -an error occurs") introduced error paths not releasing the grant table -lock. Replace them by a suitable check after the lock was dropped. - -This is XSA-235. - -Reported-by: Wei Liu -Signed-off-by: Jan Beulich -Reviewed-by: Julien Grall - ---- a/xen/arch/arm/mm.c -+++ b/xen/arch/arm/mm.c -@@ -1164,7 +1164,7 @@ int xenmem_add_to_physmap_one( - if ( idx < nr_status_frames(d->grant_table) ) - mfn = virt_to_mfn(d->grant_table->status[idx]); - else -- return -EINVAL; -+ mfn = mfn_x(INVALID_MFN); - } - else - { -@@ -1175,14 +1175,21 @@ int xenmem_add_to_physmap_one( - if ( idx < nr_grant_frames(d->grant_table) ) - mfn = virt_to_mfn(d->grant_table->shared_raw[idx]); - else -- return -EINVAL; -+ mfn = mfn_x(INVALID_MFN); - } - -- d->arch.grant_table_gfn[idx] = gfn; -+ if ( mfn != mfn_x(INVALID_MFN) ) -+ { -+ d->arch.grant_table_gfn[idx] = gfn; - -- t = p2m_ram_rw; -+ t = p2m_ram_rw; -+ } - - grant_write_unlock(d->grant_table); -+ -+ if ( mfn == mfn_x(INVALID_MFN) ) -+ return -EINVAL; -+ - break; - case XENMAPSPACE_shared_info: - if ( idx != 0 ) diff --git a/xsa236-4.9.patch b/xsa236-4.9.patch deleted file mode 100644 index 203025d..0000000 --- a/xsa236-4.9.patch +++ /dev/null @@ -1,66 +0,0 @@ -From: Jan Beulich -Subject: gnttab: fix pin count / page reference race - -Dropping page references before decrementing pin counts is a bad idea -if assumptions are being made that a non-zero pin count implies a valid -page. Fix the order of operations in gnttab_copy_release_buf(), but at -the same time also remove the assertion that was found to trigger: -map_grant_ref() also has the potential of causing a race here, and -changing the order of operations there would likely be quite a bit more -involved. - -This is XSA-236. - -Reported-by: Pawel Wieczorkiewicz -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2330,9 +2330,20 @@ __acquire_grant_for_copy( - td = page_get_owner_and_reference(*page); - /* - * act->pin being non-zero should guarantee the page to have a -- * non-zero refcount and hence a valid owner. -+ * non-zero refcount and hence a valid owner (matching the one on -+ * record), with one exception: If the owning domain is dying we -+ * had better not make implications from pin count (map_grant_ref() -+ * updates pin counts before obtaining page references, for -+ * example). - */ -- ASSERT(td); -+ if ( td != rd || rd->is_dying ) -+ { -+ if ( td ) -+ put_page(*page); -+ *page = NULL; -+ rc = GNTST_bad_domain; -+ goto unlock_out_clear; -+ } - } - - act->pin += readonly ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; -@@ -2451,6 +2462,11 @@ static void gnttab_copy_release_buf(stru - unmap_domain_page(buf->virt); - buf->virt = NULL; - } -+ if ( buf->have_grant ) -+ { -+ __release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only); -+ buf->have_grant = 0; -+ } - if ( buf->have_type ) - { - put_page_type(buf->page); -@@ -2461,11 +2477,6 @@ static void gnttab_copy_release_buf(stru - put_page(buf->page); - buf->page = NULL; - } -- if ( buf->have_grant ) -- { -- __release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only); -- buf->have_grant = 0; -- } - } - - static int gnttab_copy_claim_buf(const struct gnttab_copy *op, diff --git a/xsa237-4.9-0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch b/xsa237-4.9-0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch deleted file mode 100644 index 7c9dff9..0000000 --- a/xsa237-4.9-0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch +++ /dev/null @@ -1,27 +0,0 @@ -From: Jan Beulich -Subject: x86: don't allow MSI pIRQ mapping on unowned device - -MSI setup should be permitted only for existing devices owned by the -respective guest (the operation may still be carried out by the domain -controlling that guest). - -This is part of XSA-237. - -Reported-by: HW42 -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -1963,7 +1963,10 @@ int map_domain_pirq( - if ( !cpu_has_apic ) - goto done; - -- pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); -+ pdev = pci_get_pdev_by_domain(d, msi->seg, msi->bus, msi->devfn); -+ if ( !pdev ) -+ goto done; -+ - ret = pci_enable_msi(msi, &msi_desc); - if ( ret ) - { diff --git a/xsa237-4.9-0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch b/xsa237-4.9-0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch deleted file mode 100644 index 0add704..0000000 --- a/xsa237-4.9-0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch +++ /dev/null @@ -1,66 +0,0 @@ -From: Jan Beulich -Subject: x86: enforce proper privilege when (un)mapping pIRQ-s - -(Un)mapping of IRQs, just like other RESOURCE__ADD* / RESOURCE__REMOVE* -actions (in FLASK terms) should be XSM_DM_PRIV rather than XSM_TARGET. -This in turn requires bypassing the XSM check in physdev_unmap_pirq() -for the HVM emuirq case just like is being done in physdev_map_pirq(). -The primary goal security wise, however, is to no longer allow HVM -guests, by specifying their own domain ID instead of DOMID_SELF, to -enter code paths intended for PV guest and the control domains of HVM -guests only. - -This is part of XSA-237. - -Reported-by: HW42 -Signed-off-by: Jan Beulich -Reviewed-by: George Dunlap - ---- a/xen/arch/x86/physdev.c -+++ b/xen/arch/x86/physdev.c -@@ -111,7 +111,7 @@ int physdev_map_pirq(domid_t domid, int - if ( d == NULL ) - return -ESRCH; - -- ret = xsm_map_domain_pirq(XSM_TARGET, d); -+ ret = xsm_map_domain_pirq(XSM_DM_PRIV, d); - if ( ret ) - goto free_domain; - -@@ -256,13 +256,14 @@ int physdev_map_pirq(domid_t domid, int - int physdev_unmap_pirq(domid_t domid, int pirq) - { - struct domain *d; -- int ret; -+ int ret = 0; - - d = rcu_lock_domain_by_any_id(domid); - if ( d == NULL ) - return -ESRCH; - -- ret = xsm_unmap_domain_pirq(XSM_TARGET, d); -+ if ( domid != DOMID_SELF || !is_hvm_domain(d) || !has_pirq(d) ) -+ ret = xsm_unmap_domain_pirq(XSM_DM_PRIV, d); - if ( ret ) - goto free_domain; - ---- a/xen/include/xsm/dummy.h -+++ b/xen/include/xsm/dummy.h -@@ -453,7 +453,7 @@ static XSM_INLINE char *xsm_show_irq_sid - - static XSM_INLINE int xsm_map_domain_pirq(XSM_DEFAULT_ARG struct domain *d) - { -- XSM_ASSERT_ACTION(XSM_TARGET); -+ XSM_ASSERT_ACTION(XSM_DM_PRIV); - return xsm_default_action(action, current->domain, d); - } - -@@ -465,7 +465,7 @@ static XSM_INLINE int xsm_map_domain_irq - - static XSM_INLINE int xsm_unmap_domain_pirq(XSM_DEFAULT_ARG struct domain *d) - { -- XSM_ASSERT_ACTION(XSM_TARGET); -+ XSM_ASSERT_ACTION(XSM_DM_PRIV); - return xsm_default_action(action, current->domain, d); - } - diff --git a/xsa237-4.9-0003-x86-MSI-disallow-redundant-enabling.patch b/xsa237-4.9-0003-x86-MSI-disallow-redundant-enabling.patch deleted file mode 100644 index 5c69c48..0000000 --- a/xsa237-4.9-0003-x86-MSI-disallow-redundant-enabling.patch +++ /dev/null @@ -1,55 +0,0 @@ -From: Jan Beulich -Subject: x86/MSI: disallow redundant enabling - -At the moment, Xen attempts to allow redundant enabling of MSI by -having pci_enable_msi() return 0, and point to the existing MSI -descriptor, when the msi already exists. - -Unfortunately, if subsequent errors are encountered, the cleanup -paths assume pci_enable_msi() had done full initialization, and -hence undo everything that was assumed to be done by that -function without also undoing other setup that would normally -occur only after that function was called (in map_domain_pirq() -itself). - -Rather than try to make the redundant enabling case work properly, just -forbid it entirely by having pci_enable_msi() return -EEXIST when MSI -is already set up. - -This is part of XSA-237. - -Reported-by: HW42 -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Reviewed-by: George Dunlap - ---- a/xen/arch/x86/msi.c -+++ b/xen/arch/x86/msi.c -@@ -1050,11 +1050,10 @@ static int __pci_enable_msi(struct msi_i - old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI); - if ( old_desc ) - { -- printk(XENLOG_WARNING "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n", -+ printk(XENLOG_ERR "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n", - msi->irq, msi->seg, msi->bus, - PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); -- *desc = old_desc; -- return 0; -+ return -EEXIST; - } - - old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX); -@@ -1118,11 +1117,10 @@ static int __pci_enable_msix(struct msi_ - old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX); - if ( old_desc ) - { -- printk(XENLOG_WARNING "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n", -+ printk(XENLOG_ERR "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n", - msi->irq, msi->seg, msi->bus, - PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); -- *desc = old_desc; -- return 0; -+ return -EEXIST; - } - - old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI); diff --git a/xsa237-4.9-0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch b/xsa237-4.9-0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch deleted file mode 100644 index a16ec1b..0000000 --- a/xsa237-4.9-0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch +++ /dev/null @@ -1,124 +0,0 @@ -From: Jan Beulich -Subject: x86/IRQ: conditionally preserve irq <-> pirq mapping on map error paths - -Mappings that had been set up before should not be torn down when -handling unrelated errors. - -This is part of XSA-237. - -Reported-by: HW42 -Signed-off-by: Jan Beulich -Reviewed-by: George Dunlap - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -1251,7 +1251,8 @@ static int prepare_domain_irq_pirq(struc - return -ENOMEM; - } - *pinfo = info; -- return 0; -+ -+ return !!err; - } - - static void set_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq) -@@ -1294,7 +1295,10 @@ int init_domain_irq_mapping(struct domai - continue; - err = prepare_domain_irq_pirq(d, i, i, &info); - if ( err ) -+ { -+ ASSERT(err < 0); - break; -+ } - set_domain_irq_pirq(d, i, info); - } - -@@ -1902,6 +1906,7 @@ int map_domain_pirq( - struct pirq *info; - struct irq_desc *desc; - unsigned long flags; -+ DECLARE_BITMAP(prepared, MAX_MSI_IRQS) = {}; - - ASSERT(spin_is_locked(&d->event_lock)); - -@@ -1945,8 +1950,10 @@ int map_domain_pirq( - } - - ret = prepare_domain_irq_pirq(d, irq, pirq, &info); -- if ( ret ) -+ if ( ret < 0 ) - goto revoke; -+ if ( !ret ) -+ __set_bit(0, prepared); - - desc = irq_to_desc(irq); - -@@ -2018,8 +2025,10 @@ int map_domain_pirq( - irq = create_irq(NUMA_NO_NODE); - ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info) - : irq; -- if ( ret ) -+ if ( ret < 0 ) - break; -+ if ( !ret ) -+ __set_bit(nr, prepared); - msi_desc[nr].irq = irq; - - if ( irq_permit_access(d, irq) != 0 ) -@@ -2052,15 +2061,15 @@ int map_domain_pirq( - desc->msi_desc = NULL; - spin_unlock_irqrestore(&desc->lock, flags); - } -- while ( nr-- ) -+ while ( nr ) - { - if ( irq >= 0 && irq_deny_access(d, irq) ) - printk(XENLOG_G_ERR - "dom%d: could not revoke access to IRQ%d (pirq %d)\n", - d->domain_id, irq, pirq); -- if ( info ) -+ if ( info && test_bit(nr, prepared) ) - cleanup_domain_irq_pirq(d, irq, info); -- info = pirq_info(d, pirq + nr); -+ info = pirq_info(d, pirq + --nr); - irq = info->arch.irq; - } - msi_desc->irq = -1; -@@ -2076,12 +2085,14 @@ int map_domain_pirq( - spin_lock_irqsave(&desc->lock, flags); - set_domain_irq_pirq(d, irq, info); - spin_unlock_irqrestore(&desc->lock, flags); -+ ret = 0; - } - - done: - if ( ret ) - { -- cleanup_domain_irq_pirq(d, irq, info); -+ if ( test_bit(0, prepared) ) -+ cleanup_domain_irq_pirq(d, irq, info); - revoke: - if ( irq_deny_access(d, irq) ) - printk(XENLOG_G_ERR ---- a/xen/arch/x86/physdev.c -+++ b/xen/arch/x86/physdev.c -@@ -186,7 +186,7 @@ int physdev_map_pirq(domid_t domid, int - } - else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) - { -- if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) -+ if ( msi->entry_nr <= 0 || msi->entry_nr > MAX_MSI_IRQS ) - ret = -EDOM; - else if ( msi->entry_nr != 1 && !iommu_intremap ) - ret = -EOPNOTSUPP; ---- a/xen/include/asm-x86/msi.h -+++ b/xen/include/asm-x86/msi.h -@@ -56,6 +56,8 @@ - /* MAX fixed pages reserved for mapping MSIX tables. */ - #define FIX_MSIX_MAX_PAGES 512 - -+#define MAX_MSI_IRQS 32 /* limited by MSI capability struct properties */ -+ - struct msi_info { - u16 seg; - u8 bus; diff --git a/xsa237-4.9-0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch b/xsa237-4.9-0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch deleted file mode 100644 index 155ba15..0000000 --- a/xsa237-4.9-0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch +++ /dev/null @@ -1,37 +0,0 @@ -From: Jan Beulich -Subject: x86/FLASK: fix unmap-domain-IRQ XSM hook - -The caller and the FLASK implementation of xsm_unmap_domain_irq() -disagreed about what the "data" argument points to in the MSI case: -Change both sides to pass/take a PCI device. - -This is part of XSA-237. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -2143,7 +2143,8 @@ int unmap_domain_pirq(struct domain *d, - nr = msi_desc->msi.nvec; - } - -- ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc); -+ ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, -+ msi_desc ? msi_desc->dev : NULL); - if ( ret ) - goto done; - ---- a/xen/xsm/flask/hooks.c -+++ b/xen/xsm/flask/hooks.c -@@ -918,8 +918,8 @@ static int flask_unmap_domain_msi (struc - u32 *sid, struct avc_audit_data *ad) - { - #ifdef CONFIG_HAS_PCI -- struct msi_info *msi = data; -- u32 machine_bdf = (msi->seg << 16) | (msi->bus << 8) | msi->devfn; -+ const struct pci_dev *pdev = data; -+ u32 machine_bdf = (pdev->seg << 16) | (pdev->bus << 8) | pdev->devfn; - - AVC_AUDIT_DATA_INIT(ad, DEV); - ad->device = machine_bdf; diff --git a/xsa238.patch b/xsa238.patch deleted file mode 100644 index 0d7d48f..0000000 --- a/xsa238.patch +++ /dev/null @@ -1,45 +0,0 @@ -From cdc2887076b19b39fab9faec495082586f3113df Mon Sep 17 00:00:00 2001 -From: XenProject Security Team -Date: Tue, 5 Sep 2017 13:41:37 +0200 -Subject: x86/ioreq server: correctly handle bogus - XEN_DMOP_{,un}map_io_range_to_ioreq_server arguments - -Misbehaving device model can pass incorrect XEN_DMOP_map/ -unmap_io_range_to_ioreq_server arguments, namely end < start when -specifying address range. When this happens we hit ASSERT(s <= e) in -rangeset_contains_range()/rangeset_overlaps_range() with debug builds. -Production builds will not trap right away but may misbehave later -while handling such bogus ranges. - -This is XSA-238. - -Signed-off-by: Vitaly Kuznetsov -Reviewed-by: Jan Beulich ---- - xen/arch/x86/hvm/ioreq.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c -index b2a8b0e986..8c8bf1f0ec 100644 ---- a/xen/arch/x86/hvm/ioreq.c -+++ b/xen/arch/x86/hvm/ioreq.c -@@ -820,6 +820,9 @@ int hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id, - struct hvm_ioreq_server *s; - int rc; - -+ if ( start > end ) -+ return -EINVAL; -+ - spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); - - rc = -ENOENT; -@@ -872,6 +875,9 @@ int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id, - struct hvm_ioreq_server *s; - int rc; - -+ if ( start > end ) -+ return -EINVAL; -+ - spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); - - rc = -ENOENT; diff --git a/xsa239.patch b/xsa239.patch deleted file mode 100644 index 5daecb5..0000000 --- a/xsa239.patch +++ /dev/null @@ -1,46 +0,0 @@ -From: Jan Beulich -Subject: x86/HVM: prefill partially used variable on emulation paths - -Certain handlers ignore the access size (vioapic_write() being the -example this was found with), perhaps leading to subsequent reads -seeing data that wasn't actually written by the guest. For -consistency and extra safety also do this on the read path of -hvm_process_io_intercept(), even if this doesn't directly affect what -guests get to see, as we've supposedly already dealt with read handlers -leaving data completely unitialized. - -This is XSA-239. - -Reported-by: Roger Pau Monné -Reviewed-by: Roger Pau Monné -Signed-off-by: Jan Beulich - ---- a/xen/arch/x86/hvm/emulate.c -+++ b/xen/arch/x86/hvm/emulate.c -@@ -129,7 +129,7 @@ static int hvmemul_do_io( - .count = *reps, - .dir = dir, - .df = df, -- .data = data, -+ .data = data_is_addr ? data : 0, - .data_is_ptr = data_is_addr, /* ioreq_t field name is misleading */ - .state = STATE_IOREQ_READY, - }; ---- a/xen/arch/x86/hvm/intercept.c -+++ b/xen/arch/x86/hvm/intercept.c -@@ -127,6 +127,7 @@ int hvm_process_io_intercept(const struc - addr = (p->type == IOREQ_TYPE_COPY) ? - p->addr + step * i : - p->addr; -+ data = 0; - rc = ops->read(handler, addr, p->size, &data); - if ( rc != X86EMUL_OKAY ) - break; -@@ -161,6 +162,7 @@ int hvm_process_io_intercept(const struc - { - if ( p->data_is_ptr ) - { -+ data = 0; - switch ( hvm_copy_from_guest_phys(&data, p->data + step * i, - p->size) ) - { diff --git a/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch b/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch deleted file mode 100644 index 515ad22..0000000 --- a/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch +++ /dev/null @@ -1,494 +0,0 @@ -From 867988237d3e472fe2c99e81ae733e103422566c Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Thu, 28 Sep 2017 15:17:25 +0100 -Subject: [PATCH 1/2] x86: limit linear page table use to a single level - -That's the only way that they're meant to be used. Without such a -restriction arbitrarily long chains of same-level page tables can be -built, tearing down of which may then cause arbitrarily deep recursion, -causing a stack overflow. To facilitate this restriction, a counter is -being introduced to track both the number of same-level entries in a -page table as well as the number of uses of a page table in another -same-level one (counting into positive and negative direction -respectively, utilizing the fact that both counts can't be non-zero at -the same time). - -Note that the added accounting introduces a restriction on the number -of times a page can be used in other same-level page tables - more than -32k of such uses are no longer possible. - -Note also that some put_page_and_type[_preemptible]() calls are -replaced with open-coded equivalents. This seemed preferrable to -adding "parent_table" to the matrix of functions. - -Note further that cross-domain same-level page table references are no -longer permitted (they probably never should have been). - -This is XSA-240. - -Reported-by: Jann Horn -Signed-off-by: Jan Beulich -Signed-off-by: George Dunlap ---- - xen/arch/x86/domain.c | 1 + - xen/arch/x86/mm.c | 171 ++++++++++++++++++++++++++++++++++++++----- - xen/include/asm-x86/domain.h | 2 + - xen/include/asm-x86/mm.h | 25 +++++-- - 4 files changed, 175 insertions(+), 24 deletions(-) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index d7e699228c..d7ed72c246 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -1226,6 +1226,7 @@ int arch_set_info_guest( - rc = -ERESTART; - /* Fallthrough */ - case -ERESTART: -+ v->arch.old_guest_ptpg = NULL; - v->arch.old_guest_table = - pagetable_get_page(v->arch.guest_table); - v->arch.guest_table = pagetable_null(); -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 86f5eda52d..1e469bd354 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -747,6 +747,61 @@ static void put_data_page( - put_page(page); - } - -+static bool inc_linear_entries(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; -+ -+ do { -+ /* -+ * The check below checks for the "linear use" count being non-zero -+ * as well as overflow. Signed integer overflow is undefined behavior -+ * according to the C spec. However, as long as linear_pt_count is -+ * smaller in size than 'int', the arithmetic operation of the -+ * increment below won't overflow; rather the result will be truncated -+ * when stored. Ensure that this is always true. -+ */ -+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); -+ oc = nc++; -+ if ( nc <= 0 ) -+ return false; -+ nc = cmpxchg(&pg->linear_pt_count, oc, nc); -+ } while ( oc != nc ); -+ -+ return true; -+} -+ -+static void dec_linear_entries(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) oc; -+ -+ oc = arch_fetch_and_add(&pg->linear_pt_count, -1); -+ ASSERT(oc > 0); -+} -+ -+static bool inc_linear_uses(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; -+ -+ do { -+ /* See the respective comment in inc_linear_entries(). */ -+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); -+ oc = nc--; -+ if ( nc >= 0 ) -+ return false; -+ nc = cmpxchg(&pg->linear_pt_count, oc, nc); -+ } while ( oc != nc ); -+ -+ return true; -+} -+ -+static void dec_linear_uses(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) oc; -+ -+ oc = arch_fetch_and_add(&pg->linear_pt_count, 1); -+ ASSERT(oc < 0); -+} -+ - /* - * We allow root tables to map each other (a.k.a. linear page tables). It - * needs some special care with reference counts and access permissions: -@@ -777,15 +832,35 @@ get_##level##_linear_pagetable( \ - \ - if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \ - { \ -+ struct page_info *ptpg = mfn_to_page(pde_pfn); \ -+ \ -+ /* Make sure the page table belongs to the correct domain. */ \ -+ if ( unlikely(page_get_owner(ptpg) != d) ) \ -+ return 0; \ -+ \ - /* Make sure the mapped frame belongs to the correct domain. */ \ - if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \ - return 0; \ - \ - /* \ -- * Ensure that the mapped frame is an already-validated page table. \ -+ * Ensure that the mapped frame is an already-validated page table \ -+ * and is not itself having linear entries, as well as that the \ -+ * containing page table is not iself in use as a linear page table \ -+ * elsewhere. \ - * If so, atomically increment the count (checking for overflow). \ - */ \ - page = mfn_to_page(pfn); \ -+ if ( !inc_linear_entries(ptpg) ) \ -+ { \ -+ put_page(page); \ -+ return 0; \ -+ } \ -+ if ( !inc_linear_uses(page) ) \ -+ { \ -+ dec_linear_entries(ptpg); \ -+ put_page(page); \ -+ return 0; \ -+ } \ - y = page->u.inuse.type_info; \ - do { \ - x = y; \ -@@ -793,6 +868,8 @@ get_##level##_linear_pagetable( \ - unlikely((x & (PGT_type_mask|PGT_validated)) != \ - (PGT_##level##_page_table|PGT_validated)) ) \ - { \ -+ dec_linear_uses(page); \ -+ dec_linear_entries(ptpg); \ - put_page(page); \ - return 0; \ - } \ -@@ -1226,6 +1303,9 @@ get_page_from_l4e( - l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \ - } while ( 0 ) - -+static int _put_page_type(struct page_info *page, bool preemptible, -+ struct page_info *ptpg); -+ - void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner) - { - unsigned long pfn = l1e_get_pfn(l1e); -@@ -1296,17 +1376,22 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) - if ( l2e_get_flags(l2e) & _PAGE_PSE ) - put_superpage(l2e_get_pfn(l2e)); - else -- put_page_and_type(l2e_get_page(l2e)); -+ { -+ struct page_info *pg = l2e_get_page(l2e); -+ int rc = _put_page_type(pg, false, mfn_to_page(pfn)); -+ -+ ASSERT(!rc); -+ put_page(pg); -+ } - - return 0; - } - --static int __put_page_type(struct page_info *, int preemptible); -- - static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - int partial, bool_t defer) - { - struct page_info *pg; -+ int rc; - - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) ) - return 1; -@@ -1329,21 +1414,28 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - if ( unlikely(partial > 0) ) - { - ASSERT(!defer); -- return __put_page_type(pg, 1); -+ return _put_page_type(pg, true, mfn_to_page(pfn)); - } - - if ( defer ) - { -+ current->arch.old_guest_ptpg = mfn_to_page(pfn); - current->arch.old_guest_table = pg; - return 0; - } - -- return put_page_and_type_preemptible(pg); -+ rc = _put_page_type(pg, true, mfn_to_page(pfn)); -+ if ( likely(!rc) ) -+ put_page(pg); -+ -+ return rc; - } - - static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - int partial, bool_t defer) - { -+ int rc = 1; -+ - if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && - (l4e_get_pfn(l4e) != pfn) ) - { -@@ -1352,18 +1444,22 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - if ( unlikely(partial > 0) ) - { - ASSERT(!defer); -- return __put_page_type(pg, 1); -+ return _put_page_type(pg, true, mfn_to_page(pfn)); - } - - if ( defer ) - { -+ current->arch.old_guest_ptpg = mfn_to_page(pfn); - current->arch.old_guest_table = pg; - return 0; - } - -- return put_page_and_type_preemptible(pg); -+ rc = _put_page_type(pg, true, mfn_to_page(pfn)); -+ if ( likely(!rc) ) -+ put_page(pg); - } -- return 1; -+ -+ return rc; - } - - static int alloc_l1_table(struct page_info *page) -@@ -1561,6 +1657,7 @@ static int alloc_l3_table(struct page_info *page) - { - page->nr_validated_ptes = i; - page->partial_pte = 0; -+ current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; - } - while ( i-- > 0 ) -@@ -1654,6 +1751,7 @@ static int alloc_l4_table(struct page_info *page) - { - if ( current->arch.old_guest_table ) - page->nr_validated_ptes++; -+ current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; - } - } -@@ -2403,14 +2501,20 @@ int free_page_type(struct page_info *pag - } - - --static int __put_final_page_type( -- struct page_info *page, unsigned long type, int preemptible) -+static int _put_final_page_type(struct page_info *page, unsigned long type, -+ bool preemptible, struct page_info *ptpg) - { - int rc = free_page_type(page, type, preemptible); - - /* No need for atomic update of type_info here: noone else updates it. */ - if ( rc == 0 ) - { -+ if ( ptpg && PGT_type_equal(type, ptpg->u.inuse.type_info) ) -+ { -+ dec_linear_uses(page); -+ dec_linear_entries(ptpg); -+ } -+ ASSERT(!page->linear_pt_count || page_get_owner(page)->is_dying); - /* - * Record TLB information for flush later. We do not stamp page tables - * when running in shadow mode: -@@ -2446,8 +2550,8 @@ static int __put_final_page_type( - } - - --static int __put_page_type(struct page_info *page, -- int preemptible) -+static int _put_page_type(struct page_info *page, bool preemptible, -+ struct page_info *ptpg) - { - unsigned long nx, x, y = page->u.inuse.type_info; - int rc = 0; -@@ -2474,12 +2578,28 @@ static int __put_page_type(struct page_info *page, - x, nx)) != x) ) - continue; - /* We cleared the 'valid bit' so we do the clean up. */ -- rc = __put_final_page_type(page, x, preemptible); -+ rc = _put_final_page_type(page, x, preemptible, ptpg); -+ ptpg = NULL; - if ( x & PGT_partial ) - put_page(page); - break; - } - -+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) -+ { -+ /* -+ * page_set_tlbflush_timestamp() accesses the same union -+ * linear_pt_count lives in. Unvalidated page table pages, -+ * however, should occur during domain destruction only -+ * anyway. Updating of linear_pt_count luckily is not -+ * necessary anymore for a dying domain. -+ */ -+ ASSERT(page_get_owner(page)->is_dying); -+ ASSERT(page->linear_pt_count < 0); -+ ASSERT(ptpg->linear_pt_count > 0); -+ ptpg = NULL; -+ } -+ - /* - * Record TLB information for flush later. We do not stamp page - * tables when running in shadow mode: -@@ -2499,6 +2619,13 @@ static int __put_page_type(struct page_info *page, - return -EINTR; - } - -+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) -+ { -+ ASSERT(!rc); -+ dec_linear_uses(page); -+ dec_linear_entries(ptpg); -+ } -+ - return rc; - } - -@@ -2638,6 +2765,7 @@ static int __get_page_type(struct page_info *page, unsigned long type, - page->nr_validated_ptes = 0; - page->partial_pte = 0; - } -+ page->linear_pt_count = 0; - rc = alloc_page_type(page, type, preemptible); - } - -@@ -2652,7 +2780,7 @@ static int __get_page_type(struct page_info *page, unsigned long type, - - void put_page_type(struct page_info *page) - { -- int rc = __put_page_type(page, 0); -+ int rc = _put_page_type(page, false, NULL); - ASSERT(rc == 0); - (void)rc; - } -@@ -2668,7 +2796,7 @@ int get_page_type(struct page_info *page, unsigned long type) - - int put_page_type_preemptible(struct page_info *page) - { -- return __put_page_type(page, 1); -+ return _put_page_type(page, true, NULL); - } - - int get_page_type_preemptible(struct page_info *page, unsigned long type) -@@ -2878,11 +3006,14 @@ int put_old_guest_table(struct vcpu *v) - if ( !v->arch.old_guest_table ) - return 0; - -- switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table) ) -+ switch ( rc = _put_page_type(v->arch.old_guest_table, true, -+ v->arch.old_guest_ptpg) ) - { - case -EINTR: - case -ERESTART: - return -ERESTART; -+ case 0: -+ put_page(v->arch.old_guest_table); - } - - v->arch.old_guest_table = NULL; -@@ -3042,6 +3173,7 @@ int new_guest_cr3(unsigned long mfn) - rc = -ERESTART; - /* fallthrough */ - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - break; - default: -@@ -3310,7 +3442,10 @@ long do_mmuext_op( - if ( type == PGT_l1_page_table ) - put_page_and_type(page); - else -+ { -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; -+ } - } - } - -@@ -3346,6 +3481,7 @@ long do_mmuext_op( - { - case -EINTR: - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - rc = 0; - break; -@@ -3425,6 +3561,7 @@ long do_mmuext_op( - rc = -ERESTART; - /* fallthrough */ - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - break; - default: -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index 924caac834..5a512918cc 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -527,6 +527,8 @@ struct arch_vcpu - pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */ - pagetable_t guest_table; /* (MFN) guest notion of cr3 */ - struct page_info *old_guest_table; /* partially destructed pagetable */ -+ struct page_info *old_guest_ptpg; /* containing page table of the */ -+ /* former, if any */ - /* guest_table holds a ref to the page, and also a type-count unless - * shadow refcounts are in use */ - pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ -diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h -index 119d7dec6b..445da50d47 100644 ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -124,11 +124,11 @@ struct page_info - u32 tlbflush_timestamp; - - /* -- * When PGT_partial is true then this field is valid and indicates -- * that PTEs in the range [0, @nr_validated_ptes) have been validated. -- * An extra page reference must be acquired (or not dropped) whenever -- * PGT_partial gets set, and it must be dropped when the flag gets -- * cleared. This is so that a get() leaving a page in partially -+ * When PGT_partial is true then the first two fields are valid and -+ * indicate that PTEs in the range [0, @nr_validated_ptes) have been -+ * validated. An extra page reference must be acquired (or not dropped) -+ * whenever PGT_partial gets set, and it must be dropped when the flag -+ * gets cleared. This is so that a get() leaving a page in partially - * validated state (where the caller would drop the reference acquired - * due to the getting of the type [apparently] failing [-ERESTART]) - * would not accidentally result in a page left with zero general -@@ -152,10 +152,18 @@ struct page_info - * put_page_from_lNe() (due to the apparent failure), and hence it - * must be dropped when the put operation is resumed (and completes), - * but it must not be acquired if picking up the page for validation. -+ * -+ * The 3rd field, @linear_pt_count, indicates -+ * - by a positive value, how many same-level page table entries a page -+ * table has, -+ * - by a negative value, in how many same-level page tables a page is -+ * in use. - */ - struct { -- u16 nr_validated_ptes; -- s8 partial_pte; -+ u16 nr_validated_ptes:PAGETABLE_ORDER + 1; -+ u16 :16 - PAGETABLE_ORDER - 1 - 2; -+ s16 partial_pte:2; -+ s16 linear_pt_count; - }; - - /* -@@ -206,6 +214,9 @@ struct page_info - #define PGT_count_width PG_shift(9) - #define PGT_count_mask ((1UL< -Date: Fri, 22 Sep 2017 11:46:55 +0100 -Subject: [PATCH 2/2] x86/mm: Disable PV linear pagetables by default - -Allowing pagetables to point to other pagetables of the same level -(often called 'linear pagetables') has been included in Xen since its -inception. But it is not used by the most common PV guests (Linux, -NetBSD, minios), and has been the source of a number of subtle -reference-counting bugs. - -Add a command-line option to control whether PV linear pagetables are -allowed (disabled by default). - -Reported-by: Jann Horn -Signed-off-by: George Dunlap -Reviewed-by: Andrew Cooper ---- -Changes since v2: -- s/_/-/; in command-line option -- Added __read_mostly ---- - docs/misc/xen-command-line.markdown | 15 +++++++++++++++ - xen/arch/x86/mm.c | 10 ++++++++++ - 2 files changed, 25 insertions(+) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 44d99852aa..45ef873abb 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -1374,6 +1374,21 @@ The following resources are available: - CDP, one COS will corespond two CBMs other than one with CAT, due to the - sum of CBMs is fixed, that means actual `cos_max` in use will automatically - reduce to half when CDP is enabled. -+ -+### pv-linear-pt -+> `= ` -+ -+> Default: `false` -+ -+Allow PV guests to have pagetable entries pointing to other pagetables -+of the same level (i.e., allowing L2 PTEs to point to other L2 pages). -+This technique is often called "linear pagetables", and is sometimes -+used to allow operating systems a simple way to consistently map the -+current process's pagetables into its own virtual address space. -+ -+None of the most common PV operating systems (Linux, NetBSD, MiniOS) -+use this technique, but there may be custom operating systems which -+do. - - ### reboot - > `= t[riple] | k[bd] | a[cpi] | p[ci] | P[ower] | e[fi] | n[o] [, [w]arm | [c]old]` -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 1e469bd354..32952a46b9 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -814,6 +814,9 @@ static void dec_linear_uses(struct page_info *pg) - * frame if it is mapped by a different root table. This is sufficient and - * also necessary to allow validation of a root table mapping itself. - */ -+static bool __read_mostly pv_linear_pt_enable = false; -+boolean_param("pv-linear-pt", pv_linear_pt_enable); -+ - #define define_get_linear_pagetable(level) \ - static int \ - get_##level##_linear_pagetable( \ -@@ -823,6 +826,13 @@ get_##level##_linear_pagetable( \ - struct page_info *page; \ - unsigned long pfn; \ - \ -+ if ( !pv_linear_pt_enable ) \ -+ { \ -+ gdprintk(XENLOG_WARNING, \ -+ "Attempt to create linear p.t. (feature disabled)\n"); \ -+ return 0; \ -+ } \ -+ \ - if ( (level##e_get_flags(pde) & _PAGE_RW) ) \ - { \ - gdprintk(XENLOG_WARNING, \ --- -2.14.1 - diff --git a/xsa240-4.9-0003-x86-dont-wrongly-trigger-linear-page-table-assertion.patch b/xsa240-4.9-0003-x86-dont-wrongly-trigger-linear-page-table-assertion.patch deleted file mode 100644 index fb1e0f0..0000000 --- a/xsa240-4.9-0003-x86-dont-wrongly-trigger-linear-page-table-assertion.patch +++ /dev/null @@ -1,63 +0,0 @@ -From: Jan Beulich -Subject: x86: don't wrongly trigger linear page table assertion - -_put_page_type() may do multiple iterations until its cmpxchg() -succeeds. It invokes set_tlbflush_timestamp() on the first -iteration, however. Code inside the function takes care of this, but -- the assertion in _put_final_page_type() would trigger on the second - iteration if time stamps in a debug build are permitted to be - sufficiently much wider than the default 6 bits (see WRAP_MASK in - flushtlb.c), -- it returning -EINTR (for a continuation to be scheduled) would leave - the page inconsistent state (until the re-invocation completes). -Make the set_tlbflush_timestamp() invocation conditional, bypassing it -(for now) only in the case we really can't tolerate the stamp to be -stored. - -This is part of XSA-240. - -Signed-off-by: Jan Beulich -Reviewed-by: George Dunlap - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2647,30 +2647,22 @@ static int _put_page_type(struct page_in - break; - } - -- if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) -- { -- /* -- * set_tlbflush_timestamp() accesses the same union -- * linear_pt_count lives in. Unvalidated page table pages, -- * however, should occur during domain destruction only -- * anyway. Updating of linear_pt_count luckily is not -- * necessary anymore for a dying domain. -- */ -- ASSERT(page_get_owner(page)->is_dying); -- ASSERT(page->linear_pt_count < 0); -- ASSERT(ptpg->linear_pt_count > 0); -- ptpg = NULL; -- } -- - /* - * Record TLB information for flush later. We do not stamp page - * tables when running in shadow mode: - * 1. Pointless, since it's the shadow pt's which must be tracked. - * 2. Shadow mode reuses this field for shadowed page tables to - * store flags info -- we don't want to conflict with that. -+ * Also page_set_tlbflush_timestamp() accesses the same union -+ * linear_pt_count lives in. Pages (including page table ones), -+ * however, don't need their flush time stamp set except when -+ * the last reference is being dropped. For page table pages -+ * this happens in _put_final_page_type(). - */ -- if ( !(shadow_mode_enabled(page_get_owner(page)) && -- (page->count_info & PGC_page_table)) ) -+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) -+ BUG_ON(!IS_ENABLED(CONFIG_PV_LINEAR_PT)); -+ else if ( !(shadow_mode_enabled(page_get_owner(page)) && -+ (page->count_info & PGC_page_table)) ) - page_set_tlbflush_timestamp(page); - } - else if ( unlikely((nx & (PGT_locked | PGT_count_mask)) == diff --git a/xsa241-4.9.patch b/xsa241-4.9.patch deleted file mode 100644 index 514e4c7..0000000 --- a/xsa241-4.9.patch +++ /dev/null @@ -1,120 +0,0 @@ -From: Jan Beulich -Subject: x86: don't store possibly stale TLB flush time stamp - -While the timing window is extremely narrow, it is theoretically -possible for an update to the TLB flush clock and a subsequent flush -IPI to happen between the read and write parts of the update of the -per-page stamp. Exclude this possibility by disabling interrupts -across the update, preventing the IPI to be serviced in the middle. - -This is XSA-241. - -Reported-by: Jann Horn -Suggested-by: George Dunlap -Signed-off-by: Jan Beulich -Reviewed-by: George Dunlap - ---- a/xen/arch/arm/smp.c -+++ b/xen/arch/arm/smp.c -@@ -1,3 +1,4 @@ -+#include - #include - #include - #include ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2524,7 +2524,7 @@ static int _put_final_page_type(struct p - */ - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - wmb(); - page->u.inuse.type_info--; - } -@@ -2534,7 +2534,7 @@ static int _put_final_page_type(struct p - (PGT_count_mask|PGT_validated|PGT_partial)) == 1); - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - wmb(); - page->u.inuse.type_info |= PGT_validated; - } -@@ -2588,7 +2588,7 @@ static int _put_page_type(struct page_in - if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) - { - /* -- * page_set_tlbflush_timestamp() accesses the same union -+ * set_tlbflush_timestamp() accesses the same union - * linear_pt_count lives in. Unvalidated page table pages, - * however, should occur during domain destruction only - * anyway. Updating of linear_pt_count luckily is not -@@ -2609,7 +2609,7 @@ static int _put_page_type(struct page_in - */ - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - } - - if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) ---- a/xen/arch/x86/mm/shadow/common.c -+++ b/xen/arch/x86/mm/shadow/common.c -@@ -1464,7 +1464,7 @@ void shadow_free(struct domain *d, mfn_t - * TLBs when we reuse the page. Because the destructors leave the - * contents of the pages in place, we can delay TLB flushes until - * just before the allocator hands the page out again. */ -- sp->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(sp); - perfc_decr(shadow_alloc_count); - page_list_add_tail(sp, &d->arch.paging.shadow.freelist); - sp = next; ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -960,7 +960,7 @@ static void free_heap_pages( - /* If a page has no owner it will need no safety TLB flush. */ - pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL); - if ( pg[i].u.free.need_tlbflush ) -- pg[i].tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(&pg[i]); - - /* This page is not a guest frame any more. */ - page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */ ---- a/xen/include/asm-arm/flushtlb.h -+++ b/xen/include/asm-arm/flushtlb.h -@@ -12,6 +12,11 @@ static inline void tlbflush_filter(cpuma - - #define tlbflush_current_time() (0) - -+static inline void page_set_tlbflush_timestamp(struct page_info *page) -+{ -+ page->tlbflush_timestamp = tlbflush_current_time(); -+} -+ - #if defined(CONFIG_ARM_32) - # include - #elif defined(CONFIG_ARM_64) ---- a/xen/include/asm-x86/flushtlb.h -+++ b/xen/include/asm-x86/flushtlb.h -@@ -23,6 +23,20 @@ DECLARE_PER_CPU(u32, tlbflush_time); - - #define tlbflush_current_time() tlbflush_clock - -+static inline void page_set_tlbflush_timestamp(struct page_info *page) -+{ -+ /* -+ * Prevent storing a stale time stamp, which could happen if an update -+ * to tlbflush_clock plus a subsequent flush IPI happen between the -+ * reading of tlbflush_clock and the writing of the struct page_info -+ * field. -+ */ -+ ASSERT(local_irq_is_enabled()); -+ local_irq_disable(); -+ page->tlbflush_timestamp = tlbflush_current_time(); -+ local_irq_enable(); -+} -+ - /* - * @cpu_stamp is the timestamp at last TLB flush for the CPU we are testing. - * @lastuse_stamp is a timestamp taken when the PFN we are testing was last diff --git a/xsa242-4.9.patch b/xsa242-4.9.patch deleted file mode 100644 index 8adfa61..0000000 --- a/xsa242-4.9.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Jan Beulich -Subject: x86: don't allow page_unlock() to drop the last type reference - -Only _put_page_type() does the necessary cleanup, and hence not all -domain pages can be released during guest cleanup (leaving around -zombie domains) if we get this wrong. - -This is XSA-242. - -Signed-off-by: Jan Beulich - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -1923,7 +1923,11 @@ void page_unlock(struct page_info *page) - - do { - x = y; -+ ASSERT((x & PGT_count_mask) && (x & PGT_locked)); -+ - nx = x - (1 | PGT_locked); -+ /* We must not drop the last reference here. */ -+ ASSERT(nx & PGT_count_mask); - } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x ); - } - -@@ -2611,6 +2615,17 @@ static int _put_page_type(struct page_in - (page->count_info & PGC_page_table)) ) - page_set_tlbflush_timestamp(page); - } -+ else if ( unlikely((nx & (PGT_locked | PGT_count_mask)) == -+ (PGT_locked | 1)) ) -+ { -+ /* -+ * We must not drop the second to last reference when the page is -+ * locked, as page_unlock() doesn't do any cleanup of the type. -+ */ -+ cpu_relax(); -+ y = page->u.inuse.type_info; -+ continue; -+ } - - if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) - break; diff --git a/xsa243-2.patch b/xsa243-2.patch deleted file mode 100644 index 1aca5d3..0000000 --- a/xsa243-2.patch +++ /dev/null @@ -1,54 +0,0 @@ -From: Andrew Cooper -Subject: x86/shadow: correct SH_LINEAR mapping detection in sh_guess_wrmap() - -The fix for XSA-243 / CVE-2017-15592 (c/s bf2b4eadcf379) introduced a change -in behaviour for sh_guest_wrmap(), where it had to cope with no shadow linear -mapping being present. - -As the name suggests, guest_vtable is a mapping of the guests pagetable, not -Xen's pagetable, meaning that it isn't the pagetable we need to check for the -shadow linear slot in. - -The practical upshot is that a shadow HVM vcpu which switches into 4-level -paging mode, with an L4 pagetable that contains a mapping which aliases Xen's -SH_LINEAR_PT_VIRT_START will fool the safety check for whether a SHADOW_LINEAR -mapping is present. As the check passes (when it should have failed), Xen -subsequently falls over the missing mapping with a pagefault such as: - - (XEN) Pagetable walk from ffff8140a0503880: - (XEN) L4[0x102] = 000000046c218063 ffffffffffffffff - (XEN) L3[0x102] = 000000046c218063 ffffffffffffffff - (XEN) L2[0x102] = 000000046c218063 ffffffffffffffff - (XEN) L1[0x103] = 0000000000000000 ffffffffffffffff - -This is part of XSA-243. - -Signed-off-by: Andrew Cooper -Reviewed-by: Tim Deegan - ---- a/xen/arch/x86/mm/shadow/multi.c -+++ b/xen/arch/x86/mm/shadow/multi.c -@@ -4350,11 +4350,18 @@ static int sh_guess_wrmap(struct vcpu *v - - /* Carefully look in the shadow linear map for the l1e we expect */ - #if SHADOW_PAGING_LEVELS >= 4 -- /* Is a shadow linear map is installed in the first place? */ -- sl4p = v->arch.paging.shadow.guest_vtable; -- sl4p += shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START); -- if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) -- return 0; -+ /* -+ * Non-external guests (i.e. PV) have a SHADOW_LINEAR mapping from the -+ * moment their shadows are created. External guests (i.e. HVM) may not, -+ * but always have a regular linear mapping, which we can use to observe -+ * whether a SHADOW_LINEAR mapping is present. -+ */ -+ if ( paging_mode_external(d) ) -+ { -+ sl4p = __linear_l4_table + l4_linear_offset(SH_LINEAR_PT_VIRT_START); -+ if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) -+ return 0; -+ } - sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr); - if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) - return 0; diff --git a/xsa243.patch b/xsa243.patch deleted file mode 100644 index aaff277..0000000 --- a/xsa243.patch +++ /dev/null @@ -1,93 +0,0 @@ -From: Andrew Cooper -Subject: x86/shadow: Don't create self-linear shadow mappings for 4-level translated guests - -When initially creating a monitor table for 4-level translated guests, don't -install a shadow-linear mapping. This mapping is actually self-linear, and -trips up the writeable heuristic logic into following Xen's mappings, not the -guests' shadows it was expecting to follow. - -A consequence of this is that sh_guess_wrmap() needs to cope with there being -no shadow-linear mapping present, which in practice occurs once each time a -vcpu switches to 4-level paging from a different paging mode. - -An appropriate shadow-linear slot will be inserted into the monitor table -either while constructing lower level monitor tables, or by sh_update_cr3(). - -While fixing this, clarify the safety of the other mappings. Despite -appearing unsafe, it is correct to create a guest-linear mapping for -translated domains; this is self-linear and doesn't point into the translated -domain. Drop a dead clause for translate != external guests. - -This is XSA-243. - -Signed-off-by: Andrew Cooper -Acked-by: Tim Deegan - -diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c -index 8d4f244..a18d286 100644 ---- a/xen/arch/x86/mm/shadow/multi.c -+++ b/xen/arch/x86/mm/shadow/multi.c -@@ -1485,26 +1485,38 @@ void sh_install_xen_entries_in_l4(struct domain *d, mfn_t gl4mfn, mfn_t sl4mfn) - sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty(); - } - -- /* Shadow linear mapping for 4-level shadows. N.B. for 3-level -- * shadows on 64-bit xen, this linear mapping is later replaced by the -- * monitor pagetable structure, which is built in make_monitor_table -- * and maintained by sh_update_linear_entries. */ -- sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = -- shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); -- -- /* Self linear mapping. */ -- if ( shadow_mode_translate(d) && !shadow_mode_external(d) ) -+ /* -+ * Linear mapping slots: -+ * -+ * Calling this function with gl4mfn == sl4mfn is used to construct a -+ * monitor table for translated domains. In this case, gl4mfn forms the -+ * self-linear mapping (i.e. not pointing into the translated domain), and -+ * the shadow-linear slot is skipped. The shadow-linear slot is either -+ * filled when constructing lower level monitor tables, or via -+ * sh_update_cr3() for 4-level guests. -+ * -+ * Calling this function with gl4mfn != sl4mfn is used for non-translated -+ * guests, where the shadow-linear slot is actually self-linear, and the -+ * guest-linear slot points into the guests view of its pagetables. -+ */ -+ if ( shadow_mode_translate(d) ) - { -- // linear tables may not be used with translated PV guests -- sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = -+ ASSERT(mfn_eq(gl4mfn, sl4mfn)); -+ -+ sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = - shadow_l4e_empty(); - } - else - { -- sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = -- shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR_RW); -+ ASSERT(!mfn_eq(gl4mfn, sl4mfn)); -+ -+ sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = -+ shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); - } - -+ sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = -+ shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR_RW); -+ - unmap_domain_page(sl4e); - } - #endif -@@ -4405,6 +4417,11 @@ static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn) - - /* Carefully look in the shadow linear map for the l1e we expect */ - #if SHADOW_PAGING_LEVELS >= 4 -+ /* Is a shadow linear map is installed in the first place? */ -+ sl4p = v->arch.paging.shadow.guest_vtable; -+ sl4p += shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START); -+ if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) -+ return 0; - sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr); - if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) - return 0; diff --git a/xsa244.patch b/xsa244.patch deleted file mode 100644 index c35a80b..0000000 --- a/xsa244.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: Andrew Cooper -Subject: [PATCH] x86/cpu: Fix IST handling during PCPU bringup - -Clear IST references in newly allocated IDTs. Nothing good will come of -having them set before the TSS is suitably constructed (although the chances -of the CPU surviving such an IST interrupt/exception is extremely slim). - -Uniformly set the IST references after the TSS is in place. This fixes an -issue on AMD hardware, where onlining a PCPU while PCPU0 is in HVM context -will cause IST_NONE to be copied into the new IDT, making that PCPU vulnerable -to privilege escalation from PV guests until it subsequently schedules an HVM -guest. - -This is XSA-244 - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich ---- - xen/arch/x86/cpu/common.c | 5 +++++ - xen/arch/x86/smpboot.c | 3 +++ - 2 files changed, 8 insertions(+) - -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index 78f5667..6cf3628 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -640,6 +640,7 @@ void __init early_cpu_init(void) - * - Sets up TSS with stack pointers, including ISTs - * - Inserts TSS selector into regular and compat GDTs - * - Loads GDT, IDT, TR then null LDT -+ * - Sets up IST references in the IDT - */ - void load_system_tables(void) - { -@@ -702,6 +703,10 @@ void load_system_tables(void) - asm volatile ("ltr %w0" : : "rm" (TSS_ENTRY << 3) ); - asm volatile ("lldt %w0" : : "rm" (0) ); - -+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF); -+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI); -+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE); -+ - /* - * Bottom-of-stack must be 16-byte aligned! - * -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 3ca716c..1609b62 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -724,6 +724,9 @@ static int cpu_smpboot_alloc(unsigned int cpu) - if ( idt_tables[cpu] == NULL ) - goto oom; - memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES * sizeof(idt_entry_t)); -+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE); -+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); -+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); - - for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1); - i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i ) diff --git a/xsa245-0001-xen-page_alloc-Cover-memory-unreserved-after-boot-in.patch b/xsa245-0001-xen-page_alloc-Cover-memory-unreserved-after-boot-in.patch deleted file mode 100644 index 2047686..0000000 --- a/xsa245-0001-xen-page_alloc-Cover-memory-unreserved-after-boot-in.patch +++ /dev/null @@ -1,48 +0,0 @@ -From a48d47febc1340f27d6c716545692641a09b414c Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Thu, 21 Sep 2017 14:13:08 +0100 -Subject: [PATCH 1/2] xen/page_alloc: Cover memory unreserved after boot in - first_valid_mfn - -On Arm, some regions (e.g Initramfs, Dom0 Kernel...) are marked as -reserved until the hardware domain is built and they are copied into its -memory. Therefore, they will not be added in the boot allocator via -init_boot_pages. - -Instead, init_xenheap_pages will be called once the region are not used -anymore. - -Update first_valid_mfn in both init_heap_pages and init_boot_pages -(already exist) to cover all the cases. - -Signed-off-by: Julien Grall -[Adjust comment, added locking around first_valid_mfn update] -Signed-off-by: Boris Ostrovsky ---- - xen/common/page_alloc.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c -index 0b9f6cc6df..fbe5a8af39 100644 ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -1700,6 +1700,16 @@ static void init_heap_pages( - { - unsigned long i; - -+ /* -+ * Some pages may not go through the boot allocator (e.g reserved -+ * memory at boot but released just after --- kernel, initramfs, -+ * etc.). -+ * Update first_valid_mfn to ensure those regions are covered. -+ */ -+ spin_lock(&heap_lock); -+ first_valid_mfn = min_t(unsigned long, page_to_mfn(pg), first_valid_mfn); -+ spin_unlock(&heap_lock); -+ - for ( i = 0; i < nr_pages; i++ ) - { - unsigned int nid = phys_to_nid(page_to_maddr(pg+i)); --- -2.11.0 - diff --git a/xsa245-0002-xen-arm-Correctly-report-the-memory-region-in-the-du.patch b/xsa245-0002-xen-arm-Correctly-report-the-memory-region-in-the-du.patch deleted file mode 100644 index cd4d270..0000000 --- a/xsa245-0002-xen-arm-Correctly-report-the-memory-region-in-the-du.patch +++ /dev/null @@ -1,73 +0,0 @@ -From cbfcf039d0e0b6f4c4cb3de612f7bf788a0c47cd Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Mon, 18 Sep 2017 14:24:08 +0100 -Subject: [PATCH 2/2] xen/arm: Correctly report the memory region in the dummy - NUMA helpers - -NUMA is currently not supported on Arm. Because common code is -NUMA-aware, dummy helpers are instead provided to expose a single node. - -Those helpers are for instance used to know the region to scrub. - -However the memory region is not reported correctly. Indeed, the -frametable may not be at the beginning of the memory and there might be -multiple memory banks. This will lead to not scrub some part of the -memory. - -The memory information can be found using: - * first_valid_mfn as the start of the memory - * max_page - first_valid_mfn as the spanned pages - -Note that first_valid_mfn is now been exported. The prototype has been -added in asm-arm/numa.h and not in a common header because I would -expect the variable to become static once NUMA is fully supported on -Arm. - -Signed-off-by: Julien Grall ---- - xen/common/page_alloc.c | 6 +++++- - xen/include/asm-arm/numa.h | 10 ++++++++-- - 2 files changed, 13 insertions(+), 3 deletions(-) - -diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c -index fbe5a8af39..472c6fe329 100644 ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -192,7 +192,11 @@ PAGE_LIST_HEAD(page_broken_list); - * BOOT-TIME ALLOCATOR - */ - --static unsigned long __initdata first_valid_mfn = ~0UL; -+/* -+ * first_valid_mfn is exported because it is use in ARM specific NUMA -+ * helpers. See comment in asm-arm/numa.h. -+ */ -+unsigned long first_valid_mfn = ~0UL; - - static struct bootmem_region { - unsigned long s, e; /* MFNs @s through @e-1 inclusive are free */ -diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h -index a2c1a3476d..3e7384da9e 100644 ---- a/xen/include/asm-arm/numa.h -+++ b/xen/include/asm-arm/numa.h -@@ -12,9 +12,15 @@ static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr) - return 0; - } - -+/* -+ * TODO: make first_valid_mfn static when NUMA is supported on Arm, this -+ * is required because the dummy helpers is using it. -+ */ -+extern unsigned long first_valid_mfn; -+ - /* XXX: implement NUMA support */ --#define node_spanned_pages(nid) (total_pages) --#define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx)) -+#define node_spanned_pages(nid) (max_page - first_valid_mfn) -+#define node_start_pfn(nid) (first_valid_mfn) - #define __node_distance(a, b) (20) - - static inline unsigned int arch_get_dma_bitsize(void) --- -2.11.0 -