From 5d36bf8069d7f75ecb35c94afc7bb88ccf6d226f Mon Sep 17 00:00:00 2001 From: Michael Young Date: Aug 12 2014 21:14:20 +0000 Subject: Long latency virtual-mmu operations are not preemptible --- diff --git a/xen.spec b/xen.spec index b6aa2f5..0cf9137 100644 --- a/xen.spec +++ b/xen.spec @@ -53,7 +53,7 @@ Summary: Xen is a virtual machine monitor Name: xen Version: 4.4.0 -Release: 9%{?dist} +Release: 10%{?dist} Group: Development/Libraries License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ @@ -106,6 +106,7 @@ Patch24: xsa92.patch Patch25: xsa96.patch Patch26: xsa100.patch Patch27: xen.git-6b4d71d028f445cba7426a144751fddc8bfdd67b.patch +Patch28: xsa97-hap-4.4.patch Patch99: localgcc490fix.patch Patch100: xen-configure-xend.patch @@ -297,6 +298,7 @@ manage Xen virtual machines. %patch25 -p1 %patch26 -p1 %patch27 -p1 +%patch28 -p1 %patch99 -p1 %patch100 -p1 @@ -894,6 +896,10 @@ rm -rf %{buildroot} %endif %changelog +* Tue Aug 12 2014 Michael Young - 4.4.0-10 +- Long latency virtual-mmu operations are not preemptible + [XSA-97, CVE-2014-5146] + * Thu Aug 07 2014 Richard W.M. Jones - 4.4.0-9 - ocaml-4.02.0-0.8.git10e45753.fc22 rebuild. diff --git a/xsa97-hap-4.4.patch b/xsa97-hap-4.4.patch new file mode 100644 index 0000000..d005efc --- /dev/null +++ b/xsa97-hap-4.4.patch @@ -0,0 +1,485 @@ +x86/paging: make log-dirty operations preemptible + +Both the freeing and the inspection of the bitmap get done in (nested) +loops which - besides having a rather high iteration count in general, +albeit that would be covered by XSA-77 - have the number of non-trivial +iterations they need to perform (indirectly) controllable by both the +guest they are for and any domain controlling the guest (including the +one running qemu for it). + +This is XSA-97. + +Signed-off-by: Jan Beulich +Reviewed-by: Tim Deegan + +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -1915,7 +1915,9 @@ int domain_relinquish_resources(struct d + pci_release_devices(d); + + /* Tear down paging-assistance stuff. */ +- paging_teardown(d); ++ ret = paging_teardown(d); ++ if ( ret ) ++ return ret; + + /* Drop the in-use references to page-table bases. */ + for_each_vcpu ( d, v ) +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -61,6 +61,9 @@ long arch_do_domctl( + ret = paging_domctl(d, + &domctl->u.shadow_op, + guest_handle_cast(u_domctl, void)); ++ if ( ret == -EAGAIN ) ++ return hypercall_create_continuation(__HYPERVISOR_domctl, ++ "h", u_domctl); + copyback = 1; + } + break; +--- a/xen/arch/x86/mm/hap/hap.c ++++ b/xen/arch/x86/mm/hap/hap.c +@@ -572,8 +572,7 @@ int hap_domctl(struct domain *d, xen_dom + paging_unlock(d); + if ( preempted ) + /* Not finished. Set up to re-run the call. */ +- rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h", +- u_domctl); ++ rc = -EAGAIN; + else + /* Finished. Return the new allocation */ + sc->mb = hap_get_allocation(d); +--- a/xen/arch/x86/mm/paging.c ++++ b/xen/arch/x86/mm/paging.c +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -116,26 +117,46 @@ static void paging_free_log_dirty_page(s + d->arch.paging.free_page(d, mfn_to_page(mfn)); + } + +-void paging_free_log_dirty_bitmap(struct domain *d) ++static int paging_free_log_dirty_bitmap(struct domain *d, int rc) + { + mfn_t *l4, *l3, *l2; + int i4, i3, i2; + ++ paging_lock(d); ++ + if ( !mfn_valid(d->arch.paging.log_dirty.top) ) +- return; ++ { ++ paging_unlock(d); ++ return 0; ++ } + +- paging_lock(d); ++ if ( !d->arch.paging.preempt.vcpu ) ++ { ++ memset(&d->arch.paging.preempt.log_dirty, 0, ++ sizeof(d->arch.paging.preempt.log_dirty)); ++ ASSERT(rc <= 0); ++ d->arch.paging.preempt.log_dirty.done = -rc; ++ } ++ else if ( d->arch.paging.preempt.vcpu != current || ++ d->arch.paging.preempt.op != XEN_DOMCTL_SHADOW_OP_OFF ) ++ { ++ paging_unlock(d); ++ return -EBUSY; ++ } + + l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top)); ++ i4 = d->arch.paging.preempt.log_dirty.i4; ++ i3 = d->arch.paging.preempt.log_dirty.i3; ++ rc = 0; + +- for ( i4 = 0; i4 < LOGDIRTY_NODE_ENTRIES; i4++ ) ++ for ( ; i4 < LOGDIRTY_NODE_ENTRIES; i4++, i3 = 0 ) + { + if ( !mfn_valid(l4[i4]) ) + continue; + + l3 = map_domain_page(mfn_x(l4[i4])); + +- for ( i3 = 0; i3 < LOGDIRTY_NODE_ENTRIES; i3++ ) ++ for ( ; i3 < LOGDIRTY_NODE_ENTRIES; i3++ ) + { + if ( !mfn_valid(l3[i3]) ) + continue; +@@ -148,20 +169,54 @@ void paging_free_log_dirty_bitmap(struct + + unmap_domain_page(l2); + paging_free_log_dirty_page(d, l3[i3]); ++ l3[i3] = _mfn(INVALID_MFN); ++ ++ if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() ) ++ { ++ d->arch.paging.preempt.log_dirty.i3 = i3 + 1; ++ d->arch.paging.preempt.log_dirty.i4 = i4; ++ rc = -EAGAIN; ++ break; ++ } + } + + unmap_domain_page(l3); ++ if ( rc ) ++ break; + paging_free_log_dirty_page(d, l4[i4]); ++ l4[i4] = _mfn(INVALID_MFN); ++ ++ if ( i4 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() ) ++ { ++ d->arch.paging.preempt.log_dirty.i3 = 0; ++ d->arch.paging.preempt.log_dirty.i4 = i4 + 1; ++ rc = -EAGAIN; ++ break; ++ } + } + + unmap_domain_page(l4); +- paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top); +- d->arch.paging.log_dirty.top = _mfn(INVALID_MFN); + +- ASSERT(d->arch.paging.log_dirty.allocs == 0); +- d->arch.paging.log_dirty.failed_allocs = 0; ++ if ( !rc ) ++ { ++ paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top); ++ d->arch.paging.log_dirty.top = _mfn(INVALID_MFN); ++ ++ ASSERT(d->arch.paging.log_dirty.allocs == 0); ++ d->arch.paging.log_dirty.failed_allocs = 0; ++ ++ rc = -d->arch.paging.preempt.log_dirty.done; ++ d->arch.paging.preempt.vcpu = NULL; ++ } ++ else ++ { ++ d->arch.paging.preempt.vcpu = current; ++ d->arch.paging.preempt.op = XEN_DOMCTL_SHADOW_OP_OFF; ++ } + + paging_unlock(d); ++ ++ return rc; + } + + int paging_log_dirty_enable(struct domain *d, bool_t log_global) +@@ -178,15 +233,25 @@ int paging_log_dirty_enable(struct domai + return ret; + } + +-int paging_log_dirty_disable(struct domain *d) ++static int paging_log_dirty_disable(struct domain *d, bool_t resuming) + { +- int ret; ++ int ret = 1; ++ ++ if ( !resuming ) ++ { ++ domain_pause(d); ++ /* Safe because the domain is paused. */ ++ ret = d->arch.paging.log_dirty.disable_log_dirty(d); ++ ASSERT(ret <= 0); ++ } + +- domain_pause(d); +- /* Safe because the domain is paused. */ +- ret = d->arch.paging.log_dirty.disable_log_dirty(d); + if ( !paging_mode_log_dirty(d) ) +- paging_free_log_dirty_bitmap(d); ++ { ++ ret = paging_free_log_dirty_bitmap(d, ret); ++ if ( ret == -EAGAIN ) ++ return ret; ++ } ++ + domain_unpause(d); + + return ret; +@@ -326,7 +391,9 @@ int paging_mfn_is_dirty(struct domain *d + + /* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN, + * clear the bitmap and stats as well. */ +-int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc) ++static int paging_log_dirty_op(struct domain *d, ++ struct xen_domctl_shadow_op *sc, ++ bool_t resuming) + { + int rv = 0, clean = 0, peek = 1; + unsigned long pages = 0; +@@ -334,9 +401,22 @@ int paging_log_dirty_op(struct domain *d + unsigned long *l1 = NULL; + int i4, i3, i2; + +- domain_pause(d); ++ if ( !resuming ) ++ domain_pause(d); + paging_lock(d); + ++ if ( !d->arch.paging.preempt.vcpu ) ++ memset(&d->arch.paging.preempt.log_dirty, 0, ++ sizeof(d->arch.paging.preempt.log_dirty)); ++ else if ( d->arch.paging.preempt.vcpu != current || ++ d->arch.paging.preempt.op != sc->op ) ++ { ++ paging_unlock(d); ++ ASSERT(!resuming); ++ domain_unpause(d); ++ return -EBUSY; ++ } ++ + clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN); + + PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", +@@ -365,17 +445,15 @@ int paging_log_dirty_op(struct domain *d + goto out; + } + +- pages = 0; + l4 = paging_map_log_dirty_bitmap(d); ++ i4 = d->arch.paging.preempt.log_dirty.i4; ++ i3 = d->arch.paging.preempt.log_dirty.i3; ++ pages = d->arch.paging.preempt.log_dirty.done; + +- for ( i4 = 0; +- (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); +- i4++ ) ++ for ( ; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++, i3 = 0 ) + { + l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL; +- for ( i3 = 0; +- (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); +- i3++ ) ++ for ( ; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ ) + { + l2 = ((l3 && mfn_valid(l3[i3])) ? + map_domain_page(mfn_x(l3[i3])) : NULL); +@@ -410,18 +488,51 @@ int paging_log_dirty_op(struct domain *d + } + if ( l2 ) + unmap_domain_page(l2); ++ ++ if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() ) ++ { ++ d->arch.paging.preempt.log_dirty.i4 = i4; ++ d->arch.paging.preempt.log_dirty.i3 = i3 + 1; ++ rv = -EAGAIN; ++ break; ++ } + } + if ( l3 ) + unmap_domain_page(l3); ++ ++ if ( !rv && i4 < LOGDIRTY_NODE_ENTRIES - 1 && ++ hypercall_preempt_check() ) ++ { ++ d->arch.paging.preempt.log_dirty.i4 = i4 + 1; ++ d->arch.paging.preempt.log_dirty.i3 = 0; ++ rv = -EAGAIN; ++ } ++ if ( rv ) ++ break; + } + if ( l4 ) + unmap_domain_page(l4); + +- if ( pages < sc->pages ) +- sc->pages = pages; ++ if ( !rv ) ++ d->arch.paging.preempt.vcpu = NULL; ++ else ++ { ++ d->arch.paging.preempt.vcpu = current; ++ d->arch.paging.preempt.op = sc->op; ++ d->arch.paging.preempt.log_dirty.done = pages; ++ } + + paging_unlock(d); + ++ if ( rv ) ++ { ++ /* Never leave the domain paused for other errors. */ ++ ASSERT(rv == -EAGAIN); ++ return rv; ++ } ++ ++ if ( pages < sc->pages ) ++ sc->pages = pages; + if ( clean ) + { + /* We need to further call clean_dirty_bitmap() functions of specific +@@ -432,6 +543,7 @@ int paging_log_dirty_op(struct domain *d + return rv; + + out: ++ d->arch.paging.preempt.vcpu = NULL; + paging_unlock(d); + domain_unpause(d); + +@@ -499,12 +611,6 @@ void paging_log_dirty_init(struct domain + d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap; + } + +-/* This function fress log dirty bitmap resources. */ +-static void paging_log_dirty_teardown(struct domain*d) +-{ +- paging_free_log_dirty_bitmap(d); +-} +- + /************************************************/ + /* CODE FOR PAGING SUPPORT */ + /************************************************/ +@@ -548,6 +654,7 @@ void paging_vcpu_init(struct vcpu *v) + int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE_PARAM(void) u_domctl) + { ++ bool_t resuming = 0; + int rc; + + if ( unlikely(d == current->domain) ) +@@ -570,6 +677,20 @@ int paging_domctl(struct domain *d, xen_ + return -EINVAL; + } + ++ if ( d->arch.paging.preempt.vcpu ) ++ { ++ if ( d->arch.paging.preempt.vcpu != current || ++ d->arch.paging.preempt.op != sc->op ) ++ { ++ printk(XENLOG_G_DEBUG ++ "d%d:v%d: Paging op %#x on Dom%u with unfinished prior op %#x\n", ++ current->domain->domain_id, current->vcpu_id, ++ sc->op, d->domain_id, d->arch.paging.preempt.op); ++ return -EBUSY; ++ } ++ resuming = 1; ++ } ++ + rc = xsm_shadow_control(XSM_HOOK, d, sc->op); + if ( rc ) + return rc; +@@ -595,13 +716,13 @@ int paging_domctl(struct domain *d, xen_ + + case XEN_DOMCTL_SHADOW_OP_OFF: + if ( paging_mode_log_dirty(d) ) +- if ( (rc = paging_log_dirty_disable(d)) != 0 ) ++ if ( (rc = paging_log_dirty_disable(d, resuming)) != 0 ) + return rc; + break; + + case XEN_DOMCTL_SHADOW_OP_CLEAN: + case XEN_DOMCTL_SHADOW_OP_PEEK: +- return paging_log_dirty_op(d, sc); ++ return paging_log_dirty_op(d, sc, resuming); + } + + /* Here, dispatch domctl to the appropriate paging code */ +@@ -612,18 +733,24 @@ int paging_domctl(struct domain *d, xen_ + } + + /* Call when destroying a domain */ +-void paging_teardown(struct domain *d) ++int paging_teardown(struct domain *d) + { ++ int rc; ++ + if ( hap_enabled(d) ) + hap_teardown(d); + else + shadow_teardown(d); + + /* clean up log dirty resources. */ +- paging_log_dirty_teardown(d); ++ rc = paging_free_log_dirty_bitmap(d, 0); ++ if ( rc == -EAGAIN ) ++ return rc; + + /* Move populate-on-demand cache back to domain_list for destruction */ + p2m_pod_empty_cache(d); ++ ++ return rc; + } + + /* Call once all of the references to the domain have gone away */ +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -3706,8 +3706,7 @@ int shadow_domctl(struct domain *d, + paging_unlock(d); + if ( preempted ) + /* Not finished. Set up to re-run the call. */ +- rc = hypercall_create_continuation( +- __HYPERVISOR_domctl, "h", u_domctl); ++ rc = -EAGAIN; + else + /* Finished. Return the new allocation */ + sc->mb = shadow_get_allocation(d); +--- a/xen/common/domain.c ++++ b/xen/common/domain.c +@@ -536,7 +536,6 @@ int domain_kill(struct domain *d) + rc = domain_relinquish_resources(d); + if ( rc != 0 ) + { +- BUG_ON(rc != -EAGAIN); + break; + } + for_each_vcpu ( d, v ) +--- a/xen/include/asm-x86/domain.h ++++ b/xen/include/asm-x86/domain.h +@@ -186,6 +186,20 @@ struct paging_domain { + struct hap_domain hap; + /* log dirty support */ + struct log_dirty_domain log_dirty; ++ ++ /* preemption handling */ ++ struct { ++ struct vcpu *vcpu; ++ unsigned int op; ++ union { ++ struct { ++ unsigned long done:PADDR_BITS - PAGE_SHIFT; ++ unsigned long i4:PAGETABLE_ORDER; ++ unsigned long i3:PAGETABLE_ORDER; ++ } log_dirty; ++ }; ++ } preempt; ++ + /* alloc/free pages from the pool for paging-assistance structures + * (used by p2m and log-dirty code for their tries) */ + struct page_info * (*alloc_page)(struct domain *d); +--- a/xen/include/asm-x86/paging.h ++++ b/xen/include/asm-x86/paging.h +@@ -133,9 +133,6 @@ struct paging_mode { + /***************************************************************************** + * Log dirty code */ + +-/* free log dirty bitmap resource */ +-void paging_free_log_dirty_bitmap(struct domain *d); +- + /* get the dirty bitmap for a specific range of pfns */ + void paging_log_dirty_range(struct domain *d, + unsigned long begin_pfn, +@@ -145,9 +142,6 @@ void paging_log_dirty_range(struct domai + /* enable log dirty */ + int paging_log_dirty_enable(struct domain *d, bool_t log_global); + +-/* disable log dirty */ +-int paging_log_dirty_disable(struct domain *d); +- + /* log dirty initialization */ + void paging_log_dirty_init(struct domain *d, + int (*enable_log_dirty)(struct domain *d, +@@ -207,7 +201,7 @@ int paging_domctl(struct domain *d, xen_ + XEN_GUEST_HANDLE_PARAM(void) u_domctl); + + /* Call when destroying a domain */ +-void paging_teardown(struct domain *d); ++int paging_teardown(struct domain *d); + + /* Call once all of the references to the domain have gone away */ + void paging_final_teardown(struct domain *d);