From df7d53df1cd49569a2729aa50a3298400814f58c Mon Sep 17 00:00:00 2001 From: Michael Young Date: Mar 31 2015 18:24:08 +0000 Subject: Long latency MMIO mapping operations are not preemptible [XSA-125, CVE-2015-2752] (#1207741) Unmediated PCI command register access in qemu [XSA-126, CVE-2015-2756] (#1307738) Certain domctl operations may be abused to lock up the host [XSA-127, CVE-2015-2751] (#1207739) --- diff --git a/xen.spec b/xen.spec index 6d2a5f3..c7354a0 100644 --- a/xen.spec +++ b/xen.spec @@ -51,7 +51,7 @@ Summary: Xen is a virtual machine monitor Name: xen Version: 4.5.0 -Release: 6%{?dist} +Release: 7%{?dist} Group: Development/Libraries License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ @@ -93,6 +93,10 @@ Patch24: xsa122.patch Patch25: xsa123.patch Patch26: xsa119-unstable.patch Patch27: xsa98-update.patch +Patch28: xsa125.patch +Patch29: xsa126-qemuu.patch +Patch30: xsa126-qemut.patch +Patch31: xsa127-4.x.patch BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root BuildRequires: transfig libidn-devel zlib-devel texi2html SDL-devel curl-devel @@ -285,6 +289,10 @@ manage Xen virtual machines. %patch25 -p1 %patch26 -p1 %patch27 -p1 +%patch28 -p1 +%patch29 -p1 +%patch30 -p1 +%patch31 -p1 # stubdom sources cp -v %{SOURCE10} %{SOURCE11} %{SOURCE12} %{SOURCE13} %{SOURCE14} %{SOURCE15} stubdom @@ -789,6 +797,14 @@ rm -rf %{buildroot} %endif %changelog +* Tue Mar 31 2015 Michael Young - 4.5.0-7 +- Long latency MMIO mapping operations are not preemptible [XSA-125, + CVE-2015-2752] (#1207741) +- Unmediated PCI command register access in qemu [XSA-126, + CVE-2015-2756] (#1307738) +- Certain domctl operations may be abused to lock up the host [XSA-127, + CVE-2015-2751] (#1207739) + * Fri Mar 13 2015 Michael Young - 4.5.0-6 - Additional patch for XSA-98 on arm64 diff --git a/xsa125.patch b/xsa125.patch new file mode 100644 index 0000000..ad5dbb3 --- /dev/null +++ b/xsa125.patch @@ -0,0 +1,154 @@ +From 98670acc98cad5aee0e0714694a64d3b96675c36 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Wed, 19 Nov 2014 12:57:11 -0500 +Subject: [PATCH] Limit XEN_DOMCTL_memory_mapping hypercall to only process up + to 64 GFNs (or less) + +Said hypercall for large BARs can take quite a while. As such +we can require that the hypercall MUST break up the request +in smaller values. + +Another approach is to add preemption to it - whether we do the +preemption using hypercall_create_continuation or returning +EAGAIN to userspace (and have it re-invocate the call) - either +way the issue we cannot easily solve is that in 'map_mmio_regions' +if we encounter an error we MUST call 'unmap_mmio_regions' for the +whole BAR region. + +Since the preemption would re-use input fields such as nr_mfns, +first_gfn, first_mfn - we would lose the original values - +and only undo what was done in the current round (i.e. ignoring +anything that was done prior to earlier preemptions). + +Unless we re-used the return value as 'EAGAIN|nr_mfns_done<<10' but +that puts a limit (since the return value is a long) on the amount +of nr_mfns that can provided. + +This patch sidesteps this problem by: + - Setting an hard limit of nr_mfns having to be 64 or less. + - Toolstack adjusts correspondingly to the nr_mfn limit. + - If the there is an error when adding the toolstack will call the + remove operation to remove the whole region. + +The need to break this hypercall down is for large BARs can take +more than the guest (initial domain usually) time-slice. This has +the negative result in that the guest is locked out for a long +duration and is unable to act on any pending events. + +We also augment the code to return zero if nr_mfns instead +of trying to the hypercall. + +Suggested-by: Jan Beulich +Acked-by: Jan Beulich +Signed-off-by: Konrad Rzeszutek Wilk +Acked-by: Ian Campbell +--- +[v50: Simplify loop] +[v51: If max_batch_sz 1 (or less) we would return zero. Fix that] +[v52: Handle nr_mfns being zero] +[v53: Fix up return value] +--- + tools/libxc/xc_domain.c | 46 +++++++++++++++++++++++++++++++++++++++++---- + xen/common/domctl.c | 5 +++++ + xen/include/public/domctl.h | 1 + + 3 files changed, 48 insertions(+), 4 deletions(-) + +diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c +index 845d1d7..bba7672 100644 +--- a/tools/libxc/xc_domain.c ++++ b/tools/libxc/xc_domain.c +@@ -1988,6 +1988,8 @@ int xc_domain_memory_mapping( + { + DECLARE_DOMCTL; + xc_dominfo_t info; ++ int ret = 0, err; ++ unsigned long done = 0, nr, max_batch_sz; + + if ( xc_domain_getinfo(xch, domid, 1, &info) != 1 || + info.domid != domid ) +@@ -1998,14 +2000,50 @@ int xc_domain_memory_mapping( + if ( !xc_core_arch_auto_translated_physmap(&info) ) + return 0; + ++ if ( !nr_mfns ) ++ return 0; ++ + domctl.cmd = XEN_DOMCTL_memory_mapping; + domctl.domain = domid; +- domctl.u.memory_mapping.first_gfn = first_gfn; +- domctl.u.memory_mapping.first_mfn = first_mfn; +- domctl.u.memory_mapping.nr_mfns = nr_mfns; + domctl.u.memory_mapping.add_mapping = add_mapping; ++ max_batch_sz = nr_mfns; ++ do ++ { ++ nr = min(nr_mfns - done, max_batch_sz); ++ domctl.u.memory_mapping.nr_mfns = nr; ++ domctl.u.memory_mapping.first_gfn = first_gfn + done; ++ domctl.u.memory_mapping.first_mfn = first_mfn + done; ++ err = do_domctl(xch, &domctl); ++ if ( err && errno == E2BIG ) ++ { ++ if ( max_batch_sz <= 1 ) ++ break; ++ max_batch_sz >>= 1; ++ continue; ++ } ++ /* Save the first error... */ ++ if ( !ret ) ++ ret = err; ++ /* .. and ignore the rest of them when removing. */ ++ if ( err && add_mapping != DPCI_REMOVE_MAPPING ) ++ break; + +- return do_domctl(xch, &domctl); ++ done += nr; ++ } while ( done < nr_mfns ); ++ ++ /* ++ * Undo what we have done unless unmapping, by unmapping the entire region. ++ * Errors here are ignored. ++ */ ++ if ( ret && add_mapping != DPCI_REMOVE_MAPPING ) ++ xc_domain_memory_mapping(xch, domid, first_gfn, first_mfn, nr_mfns, ++ DPCI_REMOVE_MAPPING); ++ ++ /* We might get E2BIG so many times that we never advance. */ ++ if ( !done && !ret ) ++ ret = -1; ++ ++ return ret; + } + + int xc_domain_ioport_mapping( +diff --git a/xen/common/domctl.c b/xen/common/domctl.c +index d396cc4..c2e60a7 100644 +--- a/xen/common/domctl.c ++++ b/xen/common/domctl.c +@@ -1027,6 +1027,11 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) + (gfn + nr_mfns - 1) < gfn ) /* wrap? */ + break; + ++ ret = -E2BIG; ++ /* Must break hypercall up as this could take a while. */ ++ if ( nr_mfns > 64 ) ++ break; ++ + ret = -EPERM; + if ( !iomem_access_permitted(current->domain, mfn, mfn_end) || + !iomem_access_permitted(d, mfn, mfn_end) ) +diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h +index ca0e51e..0c9f474 100644 +--- a/xen/include/public/domctl.h ++++ b/xen/include/public/domctl.h +@@ -543,6 +543,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t); + + + /* Bind machine I/O address range -> HVM address range. */ ++/* If this returns -E2BIG lower nr_mfns value. */ + /* XEN_DOMCTL_memory_mapping */ + #define DPCI_ADD_MAPPING 1 + #define DPCI_REMOVE_MAPPING 0 +-- +2.1.0 + diff --git a/xsa126-qemut.patch b/xsa126-qemut.patch new file mode 100644 index 0000000..796ff9e --- /dev/null +++ b/xsa126-qemut.patch @@ -0,0 +1,151 @@ +xen: limit guest control of PCI command register + +Otherwise the guest can abuse that control to cause e.g. PCIe +Unsupported Request responses (by disabling memory and/or I/O decoding +and subsequently causing [CPU side] accesses to the respective address +ranges), which (depending on system configuration) may be fatal to the +host. + +This is CVE-2015-2756 / XSA-126. + +Signed-off-by: Jan Beulich +Reviewed-by: Stefano Stabellini +Acked-by: Ian Campbell + +--- a/tools/qemu-xen-traditional/hw/pass-through.c ++++ b/tools/qemu-xen-traditional/hw/pass-through.c +@@ -172,9 +172,6 @@ static int pt_word_reg_read(struct pt_de + static int pt_long_reg_read(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t *value, uint32_t valid_mask); +-static int pt_cmd_reg_read(struct pt_dev *ptdev, +- struct pt_reg_tbl *cfg_entry, +- uint16_t *value, uint16_t valid_mask); + static int pt_bar_reg_read(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t *value, uint32_t valid_mask); +@@ -286,9 +283,9 @@ static struct pt_reg_info_tbl pt_emu_reg + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xF880, +- .emu_mask = 0x0740, ++ .emu_mask = 0x0743, + .init = pt_common_reg_init, +- .u.w.read = pt_cmd_reg_read, ++ .u.w.read = pt_word_reg_read, + .u.w.write = pt_cmd_reg_write, + .u.w.restore = pt_cmd_reg_restore, + }, +@@ -1905,7 +1902,7 @@ static int pt_dev_is_virtfn(struct pci_d + return rc; + } + +-static int pt_register_regions(struct pt_dev *assigned_device) ++static int pt_register_regions(struct pt_dev *assigned_device, uint16_t *cmd) + { + int i = 0; + uint32_t bar_data = 0; +@@ -1925,17 +1922,26 @@ static int pt_register_regions(struct pt + + /* Register current region */ + if ( pci_dev->base_addr[i] & PCI_ADDRESS_SPACE_IO ) ++ { + pci_register_io_region((PCIDevice *)assigned_device, i, + (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_IO, + pt_ioport_map); ++ *cmd |= PCI_COMMAND_IO; ++ } + else if ( pci_dev->base_addr[i] & PCI_ADDRESS_SPACE_MEM_PREFETCH ) ++ { + pci_register_io_region((PCIDevice *)assigned_device, i, + (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_MEM_PREFETCH, + pt_iomem_map); ++ *cmd |= PCI_COMMAND_MEMORY; ++ } + else ++ { + pci_register_io_region((PCIDevice *)assigned_device, i, + (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_MEM, + pt_iomem_map); ++ *cmd |= PCI_COMMAND_MEMORY; ++ } + + PT_LOG("IO region registered (size=0x%08x base_addr=0x%08x)\n", + (uint32_t)(pci_dev->size[i]), +@@ -3263,27 +3269,6 @@ static int pt_long_reg_read(struct pt_de + return 0; + } + +-/* read Command register */ +-static int pt_cmd_reg_read(struct pt_dev *ptdev, +- struct pt_reg_tbl *cfg_entry, +- uint16_t *value, uint16_t valid_mask) +-{ +- struct pt_reg_info_tbl *reg = cfg_entry->reg; +- uint16_t valid_emu_mask = 0; +- uint16_t emu_mask = reg->emu_mask; +- +- if ( ptdev->is_virtfn ) +- emu_mask |= PCI_COMMAND_MEMORY; +- if ( pt_is_iomul(ptdev) ) +- emu_mask |= PCI_COMMAND_IO; +- +- /* emulate word register */ +- valid_emu_mask = emu_mask & valid_mask; +- *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); +- +- return 0; +-} +- + /* read BAR */ + static int pt_bar_reg_read(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, +@@ -3418,19 +3403,13 @@ static int pt_cmd_reg_write(struct pt_de + uint16_t writable_mask = 0; + uint16_t throughable_mask = 0; + uint16_t wr_value = *value; +- uint16_t emu_mask = reg->emu_mask; +- +- if ( ptdev->is_virtfn ) +- emu_mask |= PCI_COMMAND_MEMORY; +- if ( pt_is_iomul(ptdev) ) +- emu_mask |= PCI_COMMAND_IO; + + /* modify emulate register */ + writable_mask = ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ +- throughable_mask = ~emu_mask & valid_mask; ++ throughable_mask = ~reg->emu_mask & valid_mask; + + if (*value & PCI_COMMAND_DISABLE_INTx) + { +@@ -4211,6 +4190,7 @@ static struct pt_dev * register_real_dev + struct pt_dev *assigned_device = NULL; + struct pci_dev *pci_dev; + uint8_t e_device, e_intx; ++ uint16_t cmd = 0; + char *key, *val; + int msi_translate, power_mgmt; + +@@ -4300,7 +4280,7 @@ static struct pt_dev * register_real_dev + assigned_device->dev.config[i] = pci_read_byte(pci_dev, i); + + /* Handle real device's MMIO/PIO BARs */ +- pt_register_regions(assigned_device); ++ pt_register_regions(assigned_device, &cmd); + + /* Setup VGA bios for passthroughed gfx */ + if ( setup_vga_pt(assigned_device) < 0 ) +@@ -4378,6 +4358,10 @@ static struct pt_dev * register_real_dev + } + + out: ++ if (cmd) ++ pci_write_word(pci_dev, PCI_COMMAND, ++ *(uint16_t *)(&assigned_device->dev.config[PCI_COMMAND]) | cmd); ++ + PT_LOG("Real physical device %02x:%02x.%x registered successfuly!\n" + "IRQ type = %s\n", r_bus, r_dev, r_func, + assigned_device->msi_trans_en? "MSI-INTx":"INTx"); diff --git a/xsa126-qemuu.patch b/xsa126-qemuu.patch new file mode 100644 index 0000000..84fd4ae --- /dev/null +++ b/xsa126-qemuu.patch @@ -0,0 +1,128 @@ +xen: limit guest control of PCI command register + +Otherwise the guest can abuse that control to cause e.g. PCIe +Unsupported Request responses (by disabling memory and/or I/O decoding +and subsequently causing [CPU side] accesses to the respective address +ranges), which (depending on system configuration) may be fatal to the +host. + +This is CVE-2015-2756 / XSA-126. + +Signed-off-by: Jan Beulich +Reviewed-by: Stefano Stabellini +Acked-by: Ian Campbell + +--- a/tools/qemu-xen/hw/xen/xen_pt.c ++++ b/tools/qemu-xen/hw/xen/xen_pt.c +@@ -388,7 +388,7 @@ static const MemoryRegionOps ops = { + .write = xen_pt_bar_write, + }; + +-static int xen_pt_register_regions(XenPCIPassthroughState *s) ++static int xen_pt_register_regions(XenPCIPassthroughState *s, uint16_t *cmd) + { + int i = 0; + XenHostPCIDevice *d = &s->real_device; +@@ -406,6 +406,7 @@ static int xen_pt_register_regions(XenPC + + if (r->type & XEN_HOST_PCI_REGION_TYPE_IO) { + type = PCI_BASE_ADDRESS_SPACE_IO; ++ *cmd |= PCI_COMMAND_IO; + } else { + type = PCI_BASE_ADDRESS_SPACE_MEMORY; + if (r->type & XEN_HOST_PCI_REGION_TYPE_PREFETCH) { +@@ -414,6 +415,7 @@ static int xen_pt_register_regions(XenPC + if (r->type & XEN_HOST_PCI_REGION_TYPE_MEM_64) { + type |= PCI_BASE_ADDRESS_MEM_TYPE_64; + } ++ *cmd |= PCI_COMMAND_MEMORY; + } + + memory_region_init_io(&s->bar[i], OBJECT(s), &ops, &s->dev, +@@ -638,6 +640,7 @@ static int xen_pt_initfn(PCIDevice *d) + XenPCIPassthroughState *s = DO_UPCAST(XenPCIPassthroughState, dev, d); + int rc = 0; + uint8_t machine_irq = 0; ++ uint16_t cmd = 0; + int pirq = XEN_PT_UNASSIGNED_PIRQ; + + /* register real device */ +@@ -672,7 +675,7 @@ static int xen_pt_initfn(PCIDevice *d) + s->io_listener = xen_pt_io_listener; + + /* Handle real device's MMIO/PIO BARs */ +- xen_pt_register_regions(s); ++ xen_pt_register_regions(s, &cmd); + + /* reinitialize each config register to be emulated */ + if (xen_pt_config_init(s)) { +@@ -736,6 +739,11 @@ static int xen_pt_initfn(PCIDevice *d) + } + + out: ++ if (cmd) { ++ xen_host_pci_set_word(&s->real_device, PCI_COMMAND, ++ pci_get_word(d->config + PCI_COMMAND) | cmd); ++ } ++ + memory_listener_register(&s->memory_listener, &address_space_memory); + memory_listener_register(&s->io_listener, &address_space_io); + XEN_PT_LOG(d, +--- a/tools/qemu-xen/hw/xen/xen_pt_config_init.c ++++ b/tools/qemu-xen/hw/xen/xen_pt_config_init.c +@@ -286,23 +286,6 @@ static int xen_pt_irqpin_reg_init(XenPCI + } + + /* Command register */ +-static int xen_pt_cmd_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, +- uint16_t *value, uint16_t valid_mask) +-{ +- XenPTRegInfo *reg = cfg_entry->reg; +- uint16_t valid_emu_mask = 0; +- uint16_t emu_mask = reg->emu_mask; +- +- if (s->is_virtfn) { +- emu_mask |= PCI_COMMAND_MEMORY; +- } +- +- /* emulate word register */ +- valid_emu_mask = emu_mask & valid_mask; +- *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); +- +- return 0; +-} + static int xen_pt_cmd_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *val, uint16_t dev_value, + uint16_t valid_mask) +@@ -310,18 +293,13 @@ static int xen_pt_cmd_reg_write(XenPCIPa + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t writable_mask = 0; + uint16_t throughable_mask = 0; +- uint16_t emu_mask = reg->emu_mask; +- +- if (s->is_virtfn) { +- emu_mask |= PCI_COMMAND_MEMORY; +- } + + /* modify emulate register */ + writable_mask = ~reg->ro_mask & valid_mask; + cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ +- throughable_mask = ~emu_mask & valid_mask; ++ throughable_mask = ~reg->emu_mask & valid_mask; + + if (*val & PCI_COMMAND_INTX_DISABLE) { + throughable_mask |= PCI_COMMAND_INTX_DISABLE; +@@ -605,9 +583,9 @@ static XenPTRegInfo xen_pt_emu_reg_heade + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xF880, +- .emu_mask = 0x0740, ++ .emu_mask = 0x0743, + .init = xen_pt_common_reg_init, +- .u.w.read = xen_pt_cmd_reg_read, ++ .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_cmd_reg_write, + }, + /* Capabilities Pointer reg */ diff --git a/xsa127-4.x.patch b/xsa127-4.x.patch new file mode 100644 index 0000000..463b1dd --- /dev/null +++ b/xsa127-4.x.patch @@ -0,0 +1,50 @@ +domctl: don't allow a toolstack domain to call domain_pause() on itself + +These DOMCTL subops were accidentally declared safe for disaggregation +in the wake of XSA-77. + +This is XSA-127. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +Acked-by: Ian Campbell + +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -888,6 +888,10 @@ long arch_do_domctl( + { + xen_guest_tsc_info_t info; + ++ ret = -EINVAL; ++ if ( d == current->domain ) /* no domain_pause() */ ++ break; ++ + domain_pause(d); + tsc_get_info(d, &info.tsc_mode, + &info.elapsed_nsec, +@@ -903,6 +907,10 @@ long arch_do_domctl( + + case XEN_DOMCTL_settscinfo: + { ++ ret = -EINVAL; ++ if ( d == current->domain ) /* no domain_pause() */ ++ break; ++ + domain_pause(d); + tsc_set_info(d, domctl->u.tsc_info.info.tsc_mode, + domctl->u.tsc_info.info.elapsed_nsec, +--- a/xen/common/domctl.c ++++ b/xen/common/domctl.c +@@ -522,8 +522,10 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe + + case XEN_DOMCTL_resumedomain: + { +- domain_resume(d); +- ret = 0; ++ if ( d == current->domain ) /* no domain_pause() */ ++ ret = -EINVAL; ++ else ++ domain_resume(d); + } + break; + diff --git a/xsa127.patch b/xsa127.patch new file mode 100644 index 0000000..391d0f6 --- /dev/null +++ b/xsa127.patch @@ -0,0 +1,49 @@ +domctl: don't allow a toolstack domain to call domain_pause() on itself + +These DOMCTL subops were accidentally declared safe for disaggregation +in the wake of XSA-77. + +This is XSA-127. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +Acked-by: Ian Campbell + +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -884,6 +884,10 @@ long arch_do_domctl( + { + xen_guest_tsc_info_t info; + ++ ret = -EINVAL; ++ if ( d == current->domain ) /* no domain_pause() */ ++ break; ++ + domain_pause(d); + tsc_get_info(d, &info.tsc_mode, + &info.elapsed_nsec, +@@ -899,6 +903,10 @@ long arch_do_domctl( + + case XEN_DOMCTL_settscinfo: + { ++ ret = -EINVAL; ++ if ( d == current->domain ) /* no domain_pause() */ ++ break; ++ + domain_pause(d); + tsc_set_info(d, domctl->u.tsc_info.info.tsc_mode, + domctl->u.tsc_info.info.elapsed_nsec, +--- a/xen/common/domctl.c ++++ b/xen/common/domctl.c +@@ -531,7 +531,10 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe + break; + + case XEN_DOMCTL_resumedomain: +- domain_resume(d); ++ if ( d == current->domain ) /* no domain_pause() */ ++ ret = -EINVAL; ++ else ++ domain_resume(d); + break; + + case XEN_DOMCTL_createdomain: