Blob Blame History Raw
From: Xen Project Security Team <security@xenproject.org>
Subject: x86/vpt: add support for IO-APIC routed interrupts

And modify the HPET code to make use of it. Currently HPET interrupts
are always treated as ISA and thus injected through the vPIC. This is
wrong because HPET interrupts when not in legacy mode should be
injected from the IO-APIC.

To make things worse, the supported interrupt routing values are set
to [20..23], which clearly falls outside of the ISA range, thus
leading to an ASSERT in debug builds or memory corruption in non-debug
builds because the interrupt injection code will write out of the
bounds of the arch.hvm_domain.vpic array.

Since the HPET interrupt source can change between ISA and IO-APIC
always destroy the timer before changing the mode, or else Xen risks
changing it while the timer is active.

Note that vpt interrupt injection is racy in the sense that the
vIO-APIC RTE entry can be written by the guest in between the call to
pt_irq_masked and hvm_ioapic_assert, or the call to pt_update_irq and
pt_intr_post. Those are not deemed to be security issues, but rather
quirks of the current implementation. In the worse case the guest
might lose interrupts or get multiple interrupt vectors injected for
the same timer source.

This is part of XSA-261.

Address actual and potential compiler warnings. Fix formatting.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/hpet.c
+++ b/xen/arch/x86/hvm/hpet.c
@@ -264,13 +264,20 @@ static void hpet_set_timer(HPETState *h,
         diff = (timer_is_32bit(h, tn) && (-diff > HPET_TINY_TIME_SPAN))
             ? (uint32_t)diff : 0;
 
+    destroy_periodic_time(&h->pt[tn]);
     if ( (tn <= 1) && (h->hpet.config & HPET_CFG_LEGACY) )
+    {
         /* if LegacyReplacementRoute bit is set, HPET specification requires
            timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
            timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */
         irq = (tn == 0) ? 0 : 8;
+        h->pt[tn].source = PTSRC_isa;
+    }
     else
+    {
         irq = timer_int_route(h, tn);
+        h->pt[tn].source = PTSRC_ioapic;
+    }
 
     /*
      * diff is the time from now when the timer should fire, for a periodic
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -41,6 +41,29 @@ static void assert_gsi(struct domain *d,
     vioapic_irq_positive_edge(d, ioapic_gsi);
 }
 
+int hvm_ioapic_assert(struct domain *d, unsigned int gsi, bool level)
+{
+    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
+    const struct hvm_vioapic *vioapic;
+    unsigned int pin;
+    int vector;
+
+    if ( gsi >= hvm_irq->nr_gsis )
+    {
+        ASSERT_UNREACHABLE();
+        return -1;
+    }
+
+    spin_lock(&d->arch.hvm_domain.irq_lock);
+    if ( !level || hvm_irq->gsi_assert_count[gsi]++ == 0 )
+        assert_gsi(d, gsi);
+    vioapic = gsi_vioapic(d, gsi, &pin);
+    vector = vioapic ? vioapic->redirtbl[pin].fields.vector : -1;
+    spin_unlock(&d->arch.hvm_domain.irq_lock);
+
+    return vector;
+}
+
 static void assert_irq(struct domain *d, unsigned ioapic_gsi, unsigned pic_irq)
 {
     assert_gsi(d, ioapic_gsi);
--- a/xen/arch/x86/hvm/vpt.c
+++ b/xen/arch/x86/hvm/vpt.c
@@ -107,31 +107,50 @@ static int pt_irq_vector(struct periodic
 static int pt_irq_masked(struct periodic_time *pt)
 {
     struct vcpu *v = pt->vcpu;
-    unsigned int gsi, isa_irq, pin;
-    struct hvm_vioapic *vioapic;
-    uint8_t pic_imr;
+    unsigned int gsi = pt->irq;
 
-    if ( pt->source == PTSRC_lapic )
+    switch ( pt->source )
+    {
+    case PTSRC_lapic:
     {
         struct vlapic *vlapic = vcpu_vlapic(v);
+
         return (!vlapic_enabled(vlapic) ||
                 (vlapic_get_reg(vlapic, APIC_LVTT) & APIC_LVT_MASKED));
     }
 
-    isa_irq = pt->irq;
-    gsi = hvm_isa_irq_to_gsi(isa_irq);
-    pic_imr = v->domain->arch.hvm_domain.vpic[isa_irq >> 3].imr;
-    vioapic = gsi_vioapic(v->domain, gsi, &pin);
-    if ( !vioapic )
-    {
-        dprintk(XENLOG_WARNING, "d%u: invalid GSI (%u) for platform timer\n",
-                v->domain->domain_id, gsi);
-        domain_crash(v->domain);
-        return -1;
+    case PTSRC_isa:
+    {
+        uint8_t pic_imr = v->domain->arch.hvm_domain.vpic[pt->irq >> 3].imr;
+
+        /* Check if the interrupt is unmasked in the PIC. */
+        if ( !(pic_imr & (1 << (pt->irq & 7))) && vlapic_accept_pic_intr(v) )
+            return 0;
+
+        gsi = hvm_isa_irq_to_gsi(pt->irq);
+    }
+
+    /* Fallthrough to check if the interrupt is masked on the IO APIC. */
+    case PTSRC_ioapic:
+    {
+        unsigned int pin;
+        const struct hvm_vioapic *vioapic = gsi_vioapic(v->domain, gsi, &pin);
+
+        if ( !vioapic )
+        {
+            dprintk(XENLOG_WARNING,
+                    "d%d: invalid GSI (%u) for platform timer\n",
+                    v->domain->domain_id, gsi);
+            domain_crash(v->domain);
+            return -1;
+        }
+
+        return vioapic->redirtbl[pin].fields.mask;
+    }
     }
 
-    return (((pic_imr & (1 << (isa_irq & 7))) || !vlapic_accept_pic_intr(v)) &&
-            vioapic->redirtbl[pin].fields.mask);
+    ASSERT_UNREACHABLE();
+    return 1;
 }
 
 static void pt_lock(struct periodic_time *pt)
@@ -252,7 +271,7 @@ int pt_update_irq(struct vcpu *v)
     struct list_head *head = &v->arch.hvm_vcpu.tm_list;
     struct periodic_time *pt, *temp, *earliest_pt;
     uint64_t max_lag;
-    int irq, is_lapic;
+    int irq, pt_vector = -1;
 
     spin_lock(&v->arch.hvm_vcpu.tm_lock);
 
@@ -288,29 +307,42 @@ int pt_update_irq(struct vcpu *v)
 
     earliest_pt->irq_issued = 1;
     irq = earliest_pt->irq;
-    is_lapic = (earliest_pt->source == PTSRC_lapic);
 
     spin_unlock(&v->arch.hvm_vcpu.tm_lock);
 
-    if ( is_lapic )
-        vlapic_set_irq(vcpu_vlapic(v), irq, 0);
-    else
+    switch ( earliest_pt->source )
     {
+    case PTSRC_lapic:
+        /*
+         * If periodic timer interrupt is handled by lapic, its vector in
+         * IRR is returned and used to set eoi_exit_bitmap for virtual
+         * interrupt delivery case. Otherwise return -1 to do nothing.
+         */
+        vlapic_set_irq(vcpu_vlapic(v), irq, 0);
+        pt_vector = irq;
+        break;
+
+    case PTSRC_isa:
         hvm_isa_irq_deassert(v->domain, irq);
         hvm_isa_irq_assert(v->domain, irq);
+
+        if ( platform_legacy_irq(irq) && vlapic_accept_pic_intr(v) &&
+             v->domain->arch.hvm_domain.vpic[irq >> 3].int_output )
+            return -1;
+
+        pt_vector = pt_irq_vector(earliest_pt, hvm_intsrc_lapic);
+        break;
+
+    case PTSRC_ioapic:
+        /*
+         * NB: At the moment IO-APIC routed interrupts generated by vpt devices
+         * (HPET) are edge-triggered.
+         */
+        pt_vector = hvm_ioapic_assert(v->domain, irq, false);
+        break;
     }
 
-    /*
-     * If periodic timer interrut is handled by lapic, its vector in
-     * IRR is returned and used to set eoi_exit_bitmap for virtual
-     * interrupt delivery case. Otherwise return -1 to do nothing.  
-     */ 
-    if ( !is_lapic &&
-         platform_legacy_irq(irq) && vlapic_accept_pic_intr(v) &&
-         (&v->domain->arch.hvm_domain)->vpic[irq >> 3].int_output )
-        return -1;
-    else 
-        return pt_irq_vector(earliest_pt, hvm_intsrc_lapic);
+    return pt_vector;
 }
 
 static struct periodic_time *is_pt_irq(
@@ -405,7 +437,14 @@ void create_periodic_time(
     struct vcpu *v, struct periodic_time *pt, uint64_t delta,
     uint64_t period, uint8_t irq, time_cb *cb, void *data)
 {
-    ASSERT(pt->source != 0);
+    if ( !pt->source ||
+         (pt->irq >= NR_ISAIRQS && pt->source == PTSRC_isa) ||
+         (pt->irq >= hvm_domain_irq(v->domain)->nr_gsis &&
+          pt->source == PTSRC_ioapic) )
+    {
+        ASSERT_UNREACHABLE();
+        return;
+    }
 
     destroy_periodic_time(pt);
 
@@ -485,7 +524,7 @@ static void pt_adjust_vcpu(struct period
 {
     int on_list;
 
-    ASSERT(pt->source == PTSRC_isa);
+    ASSERT(pt->source == PTSRC_isa || pt->source == PTSRC_ioapic);
 
     if ( pt->vcpu == NULL )
         return;
--- a/xen/include/asm-x86/hvm/irq.h
+++ b/xen/include/asm-x86/hvm/irq.h
@@ -106,4 +106,7 @@ struct hvm_intack hvm_vcpu_has_pending_i
 struct hvm_intack hvm_vcpu_ack_pending_irq(struct vcpu *v,
                                            struct hvm_intack intack);
 
+/* Assert an IO APIC pin. */
+int hvm_ioapic_assert(struct domain *d, unsigned int gsi, bool level);
+
 #endif /* __ASM_X86_HVM_IRQ_H__ */
--- a/xen/include/asm-x86/hvm/vpt.h
+++ b/xen/include/asm-x86/hvm/vpt.h
@@ -44,6 +44,7 @@ struct periodic_time {
     bool_t warned_timeout_too_short;
 #define PTSRC_isa    1 /* ISA time source */
 #define PTSRC_lapic  2 /* LAPIC time source */
+#define PTSRC_ioapic 3 /* IOAPIC time source */
     u8 source;                  /* PTSRC_ */
     u8 irq;
     struct vcpu *vcpu;          /* vcpu timer interrupt delivers to */