From 05650b8adfcc83ec984afc1caa90ee168ea61b12 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Dec 20 2020 18:59:11 +0000 Subject: update to xen 4.14.1 --- diff --git a/.gitignore b/.gitignore index f112132..155f9cb 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ lwip-1.3.0.tar.gz pciutils-2.2.9.tar.bz2 zlib-1.2.3.tar.gz polarssl-1.1.4-gpl.tgz -/xen-4.14.0.tar.gz +/xen-4.14.1.tar.gz diff --git a/sources b/sources index 660c881..e3f8b84 100644 --- a/sources +++ b/sources @@ -4,4 +4,4 @@ SHA512 (newlib-1.16.0.tar.gz) = 40eb96bbc6736a16b6399e0cdb73e853d0d90b685c967e77 SHA512 (zlib-1.2.3.tar.gz) = 021b958fcd0d346c4ba761bcf0cc40f3522de6186cf5a0a6ea34a70504ce9622b1c2626fce40675bc8282cf5f5ade18473656abc38050f72f5d6480507a2106e SHA512 (polarssl-1.1.4-gpl.tgz) = 88da614e4d3f4409c4fd3bb3e44c7587ba051e3fed4e33d526069a67e8180212e1ea22da984656f50e290049f60ddca65383e5983c0f8884f648d71f698303ad SHA512 (pciutils-2.2.9.tar.bz2) = 2b3d98d027e46d8c08037366dde6f0781ca03c610ef2b380984639e4ef39899ed8d8b8e4cd9c9dc54df101279b95879bd66bfd4d04ad07fef41e847ea7ae32b5 -SHA512 (xen-4.14.0.tar.gz) = ebce47a2f754955d8517123d69f62006634d97620fbbe3784869a0667466e586a249f57ffaf7846d5bcb45d69377cde43354c82c233fbb5407e55653b9a33ac0 +SHA512 (xen-4.14.1.tar.gz) = c75cbec82793435f5a7026626ffdb2e9a2166b42d2be4b2f1194240e0312458124f0ebd53eeb02ce7330c22afe402a28a96b32f8af66e41e9416fe94535724c9 diff --git a/xen.canonicalize.patch b/xen.canonicalize.patch index 43ccf02..500cc72 100644 --- a/xen.canonicalize.patch +++ b/xen.canonicalize.patch @@ -38,17 +38,6 @@ return get_node(conn, ctx, *canonical_name, perm); } ---- xen-4.9.0-rc1.2/tools/xenstore/xenstored_core.h.orig 2017-04-12 16:18:57.000000000 +0100 -+++ xen-4.9.0-rc1.2/tools/xenstore/xenstored_core.h 2017-04-13 21:20:29.146368478 +0100 -@@ -148,7 +148,7 @@ - void send_ack(struct connection *conn, enum xsd_sockmsg_type type); - - /* Canonicalize this path if possible. */ --char *canonicalize(struct connection *conn, const void *ctx, const char *node); -+char *xenstore_canonicalize(struct connection *conn, const void *ctx, const char *node); - - /* Write a node to the tdb data base. */ - int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node); --- xen-4.8.0/tools/console/testsuite/console-dom0.c.orig 2016-12-05 12:03:27.000000000 +0000 +++ xen-4.8.0/tools/console/testsuite/console-dom0.c 2017-02-26 21:52:24.554678631 +0000 @@ -18,7 +18,7 @@ @@ -107,3 +96,14 @@ seed = strtoul(line, 0, 0); printf("Seed Okay.\n"); fflush(stdout); +--- xen-4.14.1/tools/xenstore/xenstored_core.h.orig 2020-12-17 16:47:25.000000000 +0000 ++++ xen-4.14.1/tools/xenstore/xenstored_core.h 2020-12-17 20:13:10.806887309 +0000 +@@ -153,7 +153,7 @@ + void send_ack(struct connection *conn, enum xsd_sockmsg_type type); + + /* Canonicalize this path if possible. */ +-char *canonicalize(struct connection *conn, const void *ctx, const char *node); ++char *xenstore_canonicalize(struct connection *conn, const void *ctx, const char *node); + + /* Get access permissions. */ + enum xs_perm_type perm_for_conn(struct connection *conn, diff --git a/xen.gcc10.fixes.patch b/xen.gcc10.fixes.patch index 0bd895f..657989b 100644 --- a/xen.gcc10.fixes.patch +++ b/xen.gcc10.fixes.patch @@ -1,26 +1,3 @@ ---- xen-4.13.0/tools/libxl/libxlu_pci.c.orig 2019-12-17 14:23:09.000000000 +0000 -+++ xen-4.13.0/tools/libxl/libxlu_pci.c 2020-01-21 21:56:26.812212916 +0000 -@@ -48,7 +48,7 @@ - int xlu_pci_parse_bdf(XLU_Config *cfg, libxl_device_pci *pcidev, const char *str) - { - unsigned state = STATE_DOMAIN; -- unsigned dom, bus, dev, func, vslot = 0; -+ unsigned dom = 0, bus = 0, dev = 0, func = 0, vslot = 0; - char *buf2, *tok, *ptr, *end, *optkey = NULL; - - if ( NULL == (buf2 = ptr = strdup(str)) ) ---- xen-4.13.0/tools/libxl/libxl_utils.c.orig 2019-12-17 14:23:09.000000000 +0000 -+++ xen-4.13.0/tools/libxl/libxl_utils.c 2020-01-21 22:34:52.096300774 +0000 -@@ -1259,7 +1259,7 @@ - } - memset(un, 0, sizeof(struct sockaddr_un)); - un->sun_family = AF_UNIX; -- strncpy(un->sun_path, path, sizeof(un->sun_path)); -+ strncpy(un->sun_path, path, sizeof(un->sun_path)-1); - return 0; - } - - --- xen-4.13.0/tools/xenpmd/Makefile.orig 2019-12-17 14:23:09.000000000 +0000 +++ xen-4.13.0/tools/xenpmd/Makefile 2020-01-22 22:13:16.564873608 +0000 @@ -3,6 +3,7 @@ diff --git a/xen.git-1ad177370df2db9129c97c7305962fc5ad298728.patch b/xen.git-1ad177370df2db9129c97c7305962fc5ad298728.patch deleted file mode 100644 index a713b25..0000000 --- a/xen.git-1ad177370df2db9129c97c7305962fc5ad298728.patch +++ /dev/null @@ -1,592 +0,0 @@ -From 1ad177370df2db9129c97c7305962fc5ad298728 Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Tue, 1 Dec 2020 15:31:01 +0100 -Subject: [PATCH] xen/evtchn: rework per event channel lock - -Currently the lock for a single event channel needs to be taken with -interrupts off, which causes deadlocks in some cases. - -Rework the per event channel lock to be non-blocking for the case of -sending an event and removing the need for disabling interrupts for -taking the lock. - -The lock is needed for avoiding races between event channel state -changes (creation, closing, binding) against normal operations (set -pending, [un]masking, priority changes). - -Use a rwlock, but with some restrictions: - -- Changing the state of an event channel (creation, closing, binding) - needs to use write_lock(), with ASSERT()ing that the lock is taken as - writer only when the state of the event channel is either before or - after the locked region appropriate (either free or unbound). - -- Sending an event needs to use read_trylock() mostly, in case of not - obtaining the lock the operation is omitted. This is needed as - sending an event can happen with interrupts off (at least in some - cases). - -- Dumping the event channel state for debug purposes is using - read_trylock(), too, in order to avoid blocking in case the lock is - taken as writer for a long time. - -- All other cases can use read_lock(). - -Fixes: e045199c7c9c54 ("evtchn: address races with evtchn_reset()") -Signed-off-by: Juergen Gross -Reviewed-by: Jan Beulich -Acked-by: Julien Grall - -xen/events: fix build - -Commit 5f2df45ead7c1195 ("xen/evtchn: rework per event channel lock") -introduced a build failure for NDEBUG builds. - -Fixes: 5f2df45ead7c1195 ("xen/evtchn: rework per event channel lock") -Signed-off-by: Juergen Gross -Signed-off-by: Jan Beulich -master commit: 5f2df45ead7c1195142f68b7923047a1e9479d54 -master date: 2020-11-10 14:36:15 +0100 -master commit: 53bacb86f496fdb11560d9e3b361bca7de60d268 -master date: 2020-11-11 08:56:21 +0100 ---- - xen/arch/x86/irq.c | 6 +- - xen/arch/x86/pv/shim.c | 9 +-- - xen/common/event_channel.c | 141 ++++++++++++++++++++++--------------- - xen/include/xen/event.h | 27 +++++-- - xen/include/xen/sched.h | 5 +- - 5 files changed, 116 insertions(+), 72 deletions(-) - -diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c -index 93c4fb9a79..8d1f9a9fc6 100644 ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -2495,14 +2495,12 @@ static void dump_irqs(unsigned char key) - pirq = domain_irq_to_pirq(d, irq); - info = pirq_info(d, pirq); - evtchn = evtchn_from_port(d, info->evtchn); -- local_irq_disable(); -- if ( spin_trylock(&evtchn->lock) ) -+ if ( evtchn_read_trylock(evtchn) ) - { - pending = evtchn_is_pending(d, evtchn); - masked = evtchn_is_masked(d, evtchn); -- spin_unlock(&evtchn->lock); -+ evtchn_read_unlock(evtchn); - } -- local_irq_enable(); - printk("d%d:%3d(%c%c%c)%c", - d->domain_id, pirq, "-P?"[pending], - "-M?"[masked], info->masked ? 'M' : '-', -diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c -index 9aef7a860a..b4e83e0778 100644 ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -660,11 +660,12 @@ void pv_shim_inject_evtchn(unsigned int port) - if ( port_is_valid(guest, port) ) - { - struct evtchn *chn = evtchn_from_port(guest, port); -- unsigned long flags; - -- spin_lock_irqsave(&chn->lock, flags); -- evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn); -- spin_unlock_irqrestore(&chn->lock, flags); -+ if ( evtchn_read_trylock(chn) ) -+ { -+ evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn); -+ evtchn_read_unlock(chn); -+ } - } - } - -diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c -index 12f666cb79..181e5abaa6 100644 ---- a/xen/common/event_channel.c -+++ b/xen/common/event_channel.c -@@ -50,6 +50,40 @@ - - #define consumer_is_xen(e) (!!(e)->xen_consumer) - -+/* -+ * Lock an event channel exclusively. This is allowed only when the channel is -+ * free or unbound either when taking or when releasing the lock, as any -+ * concurrent operation on the event channel using evtchn_read_trylock() will -+ * just assume the event channel is free or unbound at the moment when the -+ * evtchn_read_trylock() returns false. -+ */ -+static inline void evtchn_write_lock(struct evtchn *evtchn) -+{ -+ write_lock(&evtchn->lock); -+ -+#ifndef NDEBUG -+ evtchn->old_state = evtchn->state; -+#endif -+} -+ -+static inline unsigned int old_state(const struct evtchn *evtchn) -+{ -+#ifndef NDEBUG -+ return evtchn->old_state; -+#else -+ return ECS_RESERVED; /* Just to allow things to build. */ -+#endif -+} -+ -+static inline void evtchn_write_unlock(struct evtchn *evtchn) -+{ -+ /* Enforce lock discipline. */ -+ ASSERT(old_state(evtchn) == ECS_FREE || old_state(evtchn) == ECS_UNBOUND || -+ evtchn->state == ECS_FREE || evtchn->state == ECS_UNBOUND); -+ -+ write_unlock(&evtchn->lock); -+} -+ - /* - * The function alloc_unbound_xen_event_channel() allows an arbitrary - * notifier function to be specified. However, very few unique functions -@@ -131,7 +165,7 @@ static struct evtchn *alloc_evtchn_bucket(struct domain *d, unsigned int port) - return NULL; - } - chn[i].port = port + i; -- spin_lock_init(&chn[i].lock); -+ rwlock_init(&chn[i].lock); - } - return chn; - } -@@ -249,7 +283,6 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc) - int port; - domid_t dom = alloc->dom; - long rc; -- unsigned long flags; - - d = rcu_lock_domain_by_any_id(dom); - if ( d == NULL ) -@@ -265,14 +298,14 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc) - if ( rc ) - goto out; - -- spin_lock_irqsave(&chn->lock, flags); -+ evtchn_write_lock(chn); - - chn->state = ECS_UNBOUND; - if ( (chn->u.unbound.remote_domid = alloc->remote_dom) == DOMID_SELF ) - chn->u.unbound.remote_domid = current->domain->domain_id; - evtchn_port_init(d, chn); - -- spin_unlock_irqrestore(&chn->lock, flags); -+ evtchn_write_unlock(chn); - - alloc->port = port; - -@@ -285,32 +318,26 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc) - } - - --static unsigned long double_evtchn_lock(struct evtchn *lchn, -- struct evtchn *rchn) -+static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn) - { -- unsigned long flags; -- - if ( lchn <= rchn ) - { -- spin_lock_irqsave(&lchn->lock, flags); -+ evtchn_write_lock(lchn); - if ( lchn != rchn ) -- spin_lock(&rchn->lock); -+ evtchn_write_lock(rchn); - } - else - { -- spin_lock_irqsave(&rchn->lock, flags); -- spin_lock(&lchn->lock); -+ evtchn_write_lock(rchn); -+ evtchn_write_lock(lchn); - } -- -- return flags; - } - --static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn, -- unsigned long flags) -+static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn) - { - if ( lchn != rchn ) -- spin_unlock(&lchn->lock); -- spin_unlock_irqrestore(&rchn->lock, flags); -+ evtchn_write_unlock(lchn); -+ evtchn_write_unlock(rchn); - } - - static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) -@@ -320,7 +347,6 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) - int lport, rport = bind->remote_port; - domid_t rdom = bind->remote_dom; - long rc; -- unsigned long flags; - - if ( rdom == DOMID_SELF ) - rdom = current->domain->domain_id; -@@ -356,7 +382,7 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) - if ( rc ) - goto out; - -- flags = double_evtchn_lock(lchn, rchn); -+ double_evtchn_lock(lchn, rchn); - - lchn->u.interdomain.remote_dom = rd; - lchn->u.interdomain.remote_port = rport; -@@ -373,7 +399,7 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) - */ - evtchn_port_set_pending(ld, lchn->notify_vcpu_id, lchn); - -- double_evtchn_unlock(lchn, rchn, flags); -+ double_evtchn_unlock(lchn, rchn); - - bind->local_port = lport; - -@@ -396,7 +422,6 @@ int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port) - struct domain *d = current->domain; - int virq = bind->virq, vcpu = bind->vcpu; - int rc = 0; -- unsigned long flags; - - if ( (virq < 0) || (virq >= ARRAY_SIZE(v->virq_to_evtchn)) ) - return -EINVAL; -@@ -434,14 +459,14 @@ int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port) - - chn = evtchn_from_port(d, port); - -- spin_lock_irqsave(&chn->lock, flags); -+ evtchn_write_lock(chn); - - chn->state = ECS_VIRQ; - chn->notify_vcpu_id = vcpu; - chn->u.virq = virq; - evtchn_port_init(d, chn); - -- spin_unlock_irqrestore(&chn->lock, flags); -+ evtchn_write_unlock(chn); - - v->virq_to_evtchn[virq] = bind->port = port; - -@@ -458,7 +483,6 @@ static long evtchn_bind_ipi(evtchn_bind_ipi_t *bind) - struct domain *d = current->domain; - int port, vcpu = bind->vcpu; - long rc = 0; -- unsigned long flags; - - if ( domain_vcpu(d, vcpu) == NULL ) - return -ENOENT; -@@ -470,13 +494,13 @@ static long evtchn_bind_ipi(evtchn_bind_ipi_t *bind) - - chn = evtchn_from_port(d, port); - -- spin_lock_irqsave(&chn->lock, flags); -+ evtchn_write_lock(chn); - - chn->state = ECS_IPI; - chn->notify_vcpu_id = vcpu; - evtchn_port_init(d, chn); - -- spin_unlock_irqrestore(&chn->lock, flags); -+ evtchn_write_unlock(chn); - - bind->port = port; - -@@ -520,7 +544,6 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind) - struct pirq *info; - int port = 0, pirq = bind->pirq; - long rc; -- unsigned long flags; - - if ( (pirq < 0) || (pirq >= d->nr_pirqs) ) - return -EINVAL; -@@ -553,14 +576,14 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind) - goto out; - } - -- spin_lock_irqsave(&chn->lock, flags); -+ evtchn_write_lock(chn); - - chn->state = ECS_PIRQ; - chn->u.pirq.irq = pirq; - link_pirq_port(port, chn, v); - evtchn_port_init(d, chn); - -- spin_unlock_irqrestore(&chn->lock, flags); -+ evtchn_write_unlock(chn); - - bind->port = port; - -@@ -581,7 +604,6 @@ int evtchn_close(struct domain *d1, int port1, bool guest) - struct evtchn *chn1, *chn2; - int port2; - long rc = 0; -- unsigned long flags; - - again: - spin_lock(&d1->event_lock); -@@ -681,14 +703,14 @@ int evtchn_close(struct domain *d1, int port1, bool guest) - BUG_ON(chn2->state != ECS_INTERDOMAIN); - BUG_ON(chn2->u.interdomain.remote_dom != d1); - -- flags = double_evtchn_lock(chn1, chn2); -+ double_evtchn_lock(chn1, chn2); - - evtchn_free(d1, chn1); - - chn2->state = ECS_UNBOUND; - chn2->u.unbound.remote_domid = d1->domain_id; - -- double_evtchn_unlock(chn1, chn2, flags); -+ double_evtchn_unlock(chn1, chn2); - - goto out; - -@@ -696,9 +718,9 @@ int evtchn_close(struct domain *d1, int port1, bool guest) - BUG(); - } - -- spin_lock_irqsave(&chn1->lock, flags); -+ evtchn_write_lock(chn1); - evtchn_free(d1, chn1); -- spin_unlock_irqrestore(&chn1->lock, flags); -+ evtchn_write_unlock(chn1); - - out: - if ( d2 != NULL ) -@@ -718,7 +740,6 @@ int evtchn_send(struct domain *ld, unsigned int lport) - struct evtchn *lchn, *rchn; - struct domain *rd; - int rport, ret = 0; -- unsigned long flags; - - if ( !port_is_valid(ld, lport) ) - return -EINVAL; -@@ -731,7 +752,7 @@ int evtchn_send(struct domain *ld, unsigned int lport) - - lchn = evtchn_from_port(ld, lport); - -- spin_lock_irqsave(&lchn->lock, flags); -+ evtchn_read_lock(lchn); - - /* Guest cannot send via a Xen-attached event channel. */ - if ( unlikely(consumer_is_xen(lchn)) ) -@@ -766,7 +787,7 @@ int evtchn_send(struct domain *ld, unsigned int lport) - } - - out: -- spin_unlock_irqrestore(&lchn->lock, flags); -+ evtchn_read_unlock(lchn); - - return ret; - } -@@ -793,9 +814,11 @@ void send_guest_vcpu_virq(struct vcpu *v, uint32_t virq) - - d = v->domain; - chn = evtchn_from_port(d, port); -- spin_lock(&chn->lock); -- evtchn_port_set_pending(d, v->vcpu_id, chn); -- spin_unlock(&chn->lock); -+ if ( evtchn_read_trylock(chn) ) -+ { -+ evtchn_port_set_pending(d, v->vcpu_id, chn); -+ evtchn_read_unlock(chn); -+ } - - out: - spin_unlock_irqrestore(&v->virq_lock, flags); -@@ -824,9 +847,11 @@ void send_guest_global_virq(struct domain *d, uint32_t virq) - goto out; - - chn = evtchn_from_port(d, port); -- spin_lock(&chn->lock); -- evtchn_port_set_pending(d, chn->notify_vcpu_id, chn); -- spin_unlock(&chn->lock); -+ if ( evtchn_read_trylock(chn) ) -+ { -+ evtchn_port_set_pending(d, chn->notify_vcpu_id, chn); -+ evtchn_read_unlock(chn); -+ } - - out: - spin_unlock_irqrestore(&v->virq_lock, flags); -@@ -836,7 +861,6 @@ void send_guest_pirq(struct domain *d, const struct pirq *pirq) - { - int port; - struct evtchn *chn; -- unsigned long flags; - - /* - * PV guests: It should not be possible to race with __evtchn_close(). The -@@ -851,9 +875,11 @@ void send_guest_pirq(struct domain *d, const struct pirq *pirq) - } - - chn = evtchn_from_port(d, port); -- spin_lock_irqsave(&chn->lock, flags); -- evtchn_port_set_pending(d, chn->notify_vcpu_id, chn); -- spin_unlock_irqrestore(&chn->lock, flags); -+ if ( evtchn_read_trylock(chn) ) -+ { -+ evtchn_port_set_pending(d, chn->notify_vcpu_id, chn); -+ evtchn_read_unlock(chn); -+ } - } - - static struct domain *global_virq_handlers[NR_VIRQS] __read_mostly; -@@ -1050,15 +1076,17 @@ int evtchn_unmask(unsigned int port) - { - struct domain *d = current->domain; - struct evtchn *evtchn; -- unsigned long flags; - - if ( unlikely(!port_is_valid(d, port)) ) - return -EINVAL; - - evtchn = evtchn_from_port(d, port); -- spin_lock_irqsave(&evtchn->lock, flags); -+ -+ evtchn_read_lock(evtchn); -+ - evtchn_port_unmask(d, evtchn); -- spin_unlock_irqrestore(&evtchn->lock, flags); -+ -+ evtchn_read_unlock(evtchn); - - return 0; - } -@@ -1304,7 +1332,6 @@ int alloc_unbound_xen_event_channel( - { - struct evtchn *chn; - int port, rc; -- unsigned long flags; - - spin_lock(&ld->event_lock); - -@@ -1317,14 +1344,14 @@ int alloc_unbound_xen_event_channel( - if ( rc ) - goto out; - -- spin_lock_irqsave(&chn->lock, flags); -+ evtchn_write_lock(chn); - - chn->state = ECS_UNBOUND; - chn->xen_consumer = get_xen_consumer(notification_fn); - chn->notify_vcpu_id = lvcpu; - chn->u.unbound.remote_domid = remote_domid; - -- spin_unlock_irqrestore(&chn->lock, flags); -+ evtchn_write_unlock(chn); - - write_atomic(&ld->xen_evtchns, ld->xen_evtchns + 1); - -@@ -1356,7 +1383,6 @@ void notify_via_xen_event_channel(struct domain *ld, int lport) - { - struct evtchn *lchn, *rchn; - struct domain *rd; -- unsigned long flags; - - if ( !port_is_valid(ld, lport) ) - { -@@ -1371,7 +1397,8 @@ void notify_via_xen_event_channel(struct domain *ld, int lport) - - lchn = evtchn_from_port(ld, lport); - -- spin_lock_irqsave(&lchn->lock, flags); -+ if ( !evtchn_read_trylock(lchn) ) -+ return; - - if ( likely(lchn->state == ECS_INTERDOMAIN) ) - { -@@ -1381,7 +1408,7 @@ void notify_via_xen_event_channel(struct domain *ld, int lport) - evtchn_port_set_pending(rd, rchn->notify_vcpu_id, rchn); - } - -- spin_unlock_irqrestore(&lchn->lock, flags); -+ evtchn_read_unlock(lchn); - } - - void evtchn_check_pollers(struct domain *d, unsigned int port) -diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h -index fa93a3684a..6588333f42 100644 ---- a/xen/include/xen/event.h -+++ b/xen/include/xen/event.h -@@ -111,6 +111,21 @@ static inline unsigned int max_evtchns(const struct domain *d) - : BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d); - } - -+static inline void evtchn_read_lock(struct evtchn *evtchn) -+{ -+ read_lock(&evtchn->lock); -+} -+ -+static inline bool evtchn_read_trylock(struct evtchn *evtchn) -+{ -+ return read_trylock(&evtchn->lock); -+} -+ -+static inline void evtchn_read_unlock(struct evtchn *evtchn) -+{ -+ read_unlock(&evtchn->lock); -+} -+ - static inline bool_t port_is_valid(struct domain *d, unsigned int p) - { - if ( p >= read_atomic(&d->valid_evtchns) ) -@@ -244,11 +259,10 @@ static inline bool evtchn_port_is_pending(struct domain *d, evtchn_port_t port) - { - struct evtchn *evtchn = evtchn_from_port(d, port); - bool rc; -- unsigned long flags; - -- spin_lock_irqsave(&evtchn->lock, flags); -+ evtchn_read_lock(evtchn); - rc = evtchn_is_pending(d, evtchn); -- spin_unlock_irqrestore(&evtchn->lock, flags); -+ evtchn_read_unlock(evtchn); - - return rc; - } -@@ -263,11 +277,12 @@ static inline bool evtchn_port_is_masked(struct domain *d, evtchn_port_t port) - { - struct evtchn *evtchn = evtchn_from_port(d, port); - bool rc; -- unsigned long flags; - -- spin_lock_irqsave(&evtchn->lock, flags); -+ evtchn_read_lock(evtchn); -+ - rc = evtchn_is_masked(d, evtchn); -- spin_unlock_irqrestore(&evtchn->lock, flags); -+ -+ evtchn_read_unlock(evtchn); - - return rc; - } -diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h -index 97ba8e0795..f782ffeb82 100644 ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -85,7 +85,7 @@ extern domid_t hardware_domid; - - struct evtchn - { -- spinlock_t lock; -+ rwlock_t lock; - #define ECS_FREE 0 /* Channel is available for use. */ - #define ECS_RESERVED 1 /* Channel is reserved. */ - #define ECS_UNBOUND 2 /* Channel is waiting to bind to a remote domain. */ -@@ -114,6 +114,9 @@ struct evtchn - u16 virq; /* state == ECS_VIRQ */ - } u; - u8 priority; -+#ifndef NDEBUG -+ u8 old_state; /* State when taking lock in write mode. */ -+#endif - u8 last_priority; - u16 last_vcpu_id; - #ifdef CONFIG_XSM --- -2.20.1 - diff --git a/xen.git-1cfb9b1c5b9e4c024f5f139d7a3d0357d2417b13.patch b/xen.git-1cfb9b1c5b9e4c024f5f139d7a3d0357d2417b13.patch deleted file mode 100644 index 12c64ca..0000000 --- a/xen.git-1cfb9b1c5b9e4c024f5f139d7a3d0357d2417b13.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 1cfb9b1c5b9e4c024f5f139d7a3d0357d2417b13 Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Tue, 1 Dec 2020 15:33:19 +0100 -Subject: [PATCH] xen/events: access last_priority and last_vcpu_id together - -The queue for a fifo event is depending on the vcpu_id and the -priority of the event. When sending an event it might happen the -event needs to change queues and the old queue needs to be kept for -keeping the links between queue elements intact. For this purpose -the event channel contains last_priority and last_vcpu_id values -elements for being able to identify the old queue. - -In order to avoid races always access last_priority and last_vcpu_id -with a single atomic operation avoiding any inconsistencies. - -Signed-off-by: Juergen Gross -Reviewed-by: Julien Grall -master commit: 1277cb9dc5e966f1faf665bcded02b7533e38078 -master date: 2020-11-24 11:23:42 +0100 ---- - xen/common/event_fifo.c | 25 +++++++++++++++++++------ - xen/include/xen/sched.h | 3 +-- - 2 files changed, 20 insertions(+), 8 deletions(-) - -diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c -index 27ab3a1c3f..2037b24196 100644 ---- a/xen/common/event_fifo.c -+++ b/xen/common/event_fifo.c -@@ -21,6 +21,14 @@ - - #include - -+union evtchn_fifo_lastq { -+ uint32_t raw; -+ struct { -+ uint8_t last_priority; -+ uint16_t last_vcpu_id; -+ }; -+}; -+ - static inline event_word_t *evtchn_fifo_word_from_port(const struct domain *d, - unsigned int port) - { -@@ -65,16 +73,18 @@ static struct evtchn_fifo_queue *lock_old_queue(const struct domain *d, - struct vcpu *v; - struct evtchn_fifo_queue *q, *old_q; - unsigned int try; -+ union evtchn_fifo_lastq lastq; - - for ( try = 0; try < 3; try++ ) - { -- v = d->vcpu[evtchn->last_vcpu_id]; -- old_q = &v->evtchn_fifo->queue[evtchn->last_priority]; -+ lastq.raw = read_atomic(&evtchn->fifo_lastq); -+ v = d->vcpu[lastq.last_vcpu_id]; -+ old_q = &v->evtchn_fifo->queue[lastq.last_priority]; - - spin_lock_irqsave(&old_q->lock, *flags); - -- v = d->vcpu[evtchn->last_vcpu_id]; -- q = &v->evtchn_fifo->queue[evtchn->last_priority]; -+ v = d->vcpu[lastq.last_vcpu_id]; -+ q = &v->evtchn_fifo->queue[lastq.last_priority]; - - if ( old_q == q ) - return old_q; -@@ -225,8 +235,11 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) - /* Moved to a different queue? */ - if ( old_q != q ) - { -- evtchn->last_vcpu_id = v->vcpu_id; -- evtchn->last_priority = q->priority; -+ union evtchn_fifo_lastq lastq = { }; -+ -+ lastq.last_vcpu_id = v->vcpu_id; -+ lastq.last_priority = q->priority; -+ write_atomic(&evtchn->fifo_lastq, lastq.raw); - - spin_unlock_irqrestore(&old_q->lock, flags); - spin_lock_irqsave(&q->lock, flags); -diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h -index f782ffeb82..99e2f1aac5 100644 ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -117,8 +117,7 @@ struct evtchn - #ifndef NDEBUG - u8 old_state; /* State when taking lock in write mode. */ - #endif -- u8 last_priority; -- u16 last_vcpu_id; -+ u32 fifo_lastq; /* Data for fifo events identifying last queue. */ - #ifdef CONFIG_XSM - union { - #ifdef XSM_NEED_GENERIC_EVTCHN_SSID --- -2.20.1 - diff --git a/xen.git-72bd989f51878bc9ba61e930b0c29b921a30dc0d.patch b/xen.git-72bd989f51878bc9ba61e930b0c29b921a30dc0d.patch deleted file mode 100644 index 21c5ddd..0000000 --- a/xen.git-72bd989f51878bc9ba61e930b0c29b921a30dc0d.patch +++ /dev/null @@ -1,240 +0,0 @@ -From 72bd989f51878bc9ba61e930b0c29b921a30dc0d Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Tue, 1 Dec 2020 15:34:31 +0100 -Subject: [PATCH] xen/events: rework fifo queue locking - -Two cpus entering evtchn_fifo_set_pending() for the same event channel -can race in case the first one gets interrupted after setting -EVTCHN_FIFO_PENDING and when the other one manages to set -EVTCHN_FIFO_LINKED before the first one is testing that bit. This can -lead to evtchn_check_pollers() being called before the event is put -properly into the queue, resulting eventually in the guest not seeing -the event pending and thus blocking forever afterwards. - -Note that commit 5f2df45ead7c1195 ("xen/evtchn: rework per event channel -lock") made the race just more obvious, while the fifo event channel -implementation had this race forever since the introduction and use of -per-channel locks, when an unmask operation was running in parallel with -an event channel send operation. - -Using a spinlock for the per event channel lock had turned out -problematic due to some paths needing to take the lock are called with -interrupts off, so the lock would need to disable interrupts, which in -turn broke some use cases related to vm events. - -For avoiding this race the queue locking in evtchn_fifo_set_pending() -needs to be reworked to cover the test of EVTCHN_FIFO_PENDING, -EVTCHN_FIFO_MASKED and EVTCHN_FIFO_LINKED, too. Additionally when an -event channel needs to change queues both queues need to be locked -initially, in order to avoid having a window with no lock held at all. - -Reported-by: Jan Beulich -Fixes: 5f2df45ead7c1195 ("xen/evtchn: rework per event channel lock") -Fixes: de6acb78bf0e137c ("evtchn: use a per-event channel lock for sending events") -Signed-off-by: Juergen Gross -Reviewed-by: Jan Beulich -master commit: 71ac522909e9302350a88bc378be99affa87067c -master date: 2020-11-30 14:05:39 +0100 ---- - xen/common/event_fifo.c | 128 ++++++++++++++++++++++------------------ - 1 file changed, 70 insertions(+), 58 deletions(-) - -diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c -index 2037b24196..2f5e868b7a 100644 ---- a/xen/common/event_fifo.c -+++ b/xen/common/event_fifo.c -@@ -66,38 +66,6 @@ static void evtchn_fifo_init(struct domain *d, struct evtchn *evtchn) - d->domain_id, evtchn->port); - } - --static struct evtchn_fifo_queue *lock_old_queue(const struct domain *d, -- struct evtchn *evtchn, -- unsigned long *flags) --{ -- struct vcpu *v; -- struct evtchn_fifo_queue *q, *old_q; -- unsigned int try; -- union evtchn_fifo_lastq lastq; -- -- for ( try = 0; try < 3; try++ ) -- { -- lastq.raw = read_atomic(&evtchn->fifo_lastq); -- v = d->vcpu[lastq.last_vcpu_id]; -- old_q = &v->evtchn_fifo->queue[lastq.last_priority]; -- -- spin_lock_irqsave(&old_q->lock, *flags); -- -- v = d->vcpu[lastq.last_vcpu_id]; -- q = &v->evtchn_fifo->queue[lastq.last_priority]; -- -- if ( old_q == q ) -- return old_q; -- -- spin_unlock_irqrestore(&old_q->lock, *flags); -- } -- -- gprintk(XENLOG_WARNING, -- "dom%d port %d lost event (too many queue changes)\n", -- d->domain_id, evtchn->port); -- return NULL; --} -- - static int try_set_link(event_word_t *word, event_word_t *w, uint32_t link) - { - event_word_t new, old; -@@ -169,6 +137,9 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) - event_word_t *word; - unsigned long flags; - bool_t was_pending; -+ struct evtchn_fifo_queue *q, *old_q; -+ unsigned int try; -+ bool linked = true; - - port = evtchn->port; - word = evtchn_fifo_word_from_port(d, port); -@@ -183,17 +154,67 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) - return; - } - -+ /* -+ * Lock all queues related to the event channel (in case of a queue change -+ * this might be two). -+ * It is mandatory to do that before setting and testing the PENDING bit -+ * and to hold the current queue lock until the event has been put into the -+ * list of pending events in order to avoid waking up a guest without the -+ * event being visibly pending in the guest. -+ */ -+ for ( try = 0; try < 3; try++ ) -+ { -+ union evtchn_fifo_lastq lastq; -+ const struct vcpu *old_v; -+ -+ lastq.raw = read_atomic(&evtchn->fifo_lastq); -+ old_v = d->vcpu[lastq.last_vcpu_id]; -+ -+ q = &v->evtchn_fifo->queue[evtchn->priority]; -+ old_q = &old_v->evtchn_fifo->queue[lastq.last_priority]; -+ -+ if ( q == old_q ) -+ spin_lock_irqsave(&q->lock, flags); -+ else if ( q < old_q ) -+ { -+ spin_lock_irqsave(&q->lock, flags); -+ spin_lock(&old_q->lock); -+ } -+ else -+ { -+ spin_lock_irqsave(&old_q->lock, flags); -+ spin_lock(&q->lock); -+ } -+ -+ lastq.raw = read_atomic(&evtchn->fifo_lastq); -+ old_v = d->vcpu[lastq.last_vcpu_id]; -+ if ( q == &v->evtchn_fifo->queue[evtchn->priority] && -+ old_q == &old_v->evtchn_fifo->queue[lastq.last_priority] ) -+ break; -+ -+ if ( q != old_q ) -+ spin_unlock(&old_q->lock); -+ spin_unlock_irqrestore(&q->lock, flags); -+ } -+ - was_pending = guest_test_and_set_bit(d, EVTCHN_FIFO_PENDING, word); - -+ /* If we didn't get the lock bail out. */ -+ if ( try == 3 ) -+ { -+ gprintk(XENLOG_WARNING, -+ "%pd port %u lost event (too many queue changes)\n", -+ d, evtchn->port); -+ goto done; -+ } -+ - /* - * Link the event if it unmasked and not already linked. - */ - if ( !guest_test_bit(d, EVTCHN_FIFO_MASKED, word) && - !guest_test_bit(d, EVTCHN_FIFO_LINKED, word) ) - { -- struct evtchn_fifo_queue *q, *old_q; - event_word_t *tail_word; -- bool_t linked = 0; - - /* - * Control block not mapped. The guest must not unmask an -@@ -204,25 +225,11 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) - { - printk(XENLOG_G_WARNING - "%pv has no FIFO event channel control block\n", v); -- goto done; -+ goto unlock; - } - -- /* -- * No locking around getting the queue. This may race with -- * changing the priority but we are allowed to signal the -- * event once on the old priority. -- */ -- q = &v->evtchn_fifo->queue[evtchn->priority]; -- -- old_q = lock_old_queue(d, evtchn, &flags); -- if ( !old_q ) -- goto done; -- - if ( guest_test_and_set_bit(d, EVTCHN_FIFO_LINKED, word) ) -- { -- spin_unlock_irqrestore(&old_q->lock, flags); -- goto done; -- } -+ goto unlock; - - /* - * If this event was a tail, the old queue is now empty and -@@ -241,8 +248,8 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) - lastq.last_priority = q->priority; - write_atomic(&evtchn->fifo_lastq, lastq.raw); - -- spin_unlock_irqrestore(&old_q->lock, flags); -- spin_lock_irqsave(&q->lock, flags); -+ spin_unlock(&old_q->lock); -+ old_q = q; - } - - /* -@@ -255,6 +262,7 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) - * If the queue is empty (i.e., we haven't linked to the new - * event), head must be updated. - */ -+ linked = false; - if ( q->tail ) - { - tail_word = evtchn_fifo_word_from_port(d, q->tail); -@@ -263,15 +271,19 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) - if ( !linked ) - write_atomic(q->head, port); - q->tail = port; -+ } - -- spin_unlock_irqrestore(&q->lock, flags); -+ unlock: -+ if ( q != old_q ) -+ spin_unlock(&old_q->lock); -+ spin_unlock_irqrestore(&q->lock, flags); - -- if ( !linked -- && !guest_test_and_set_bit(d, q->priority, -- &v->evtchn_fifo->control_block->ready) ) -- vcpu_mark_events_pending(v); -- } - done: -+ if ( !linked && -+ !guest_test_and_set_bit(d, q->priority, -+ &v->evtchn_fifo->control_block->ready) ) -+ vcpu_mark_events_pending(v); -+ - if ( !was_pending ) - evtchn_check_pollers(d, port); - } --- -2.20.1 - diff --git a/xen.git-8d148003fdf7bd9e28137e6683ef46902af39146.patch b/xen.git-8d148003fdf7bd9e28137e6683ef46902af39146.patch deleted file mode 100644 index dfff098..0000000 --- a/xen.git-8d148003fdf7bd9e28137e6683ef46902af39146.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 8d148003fdf7bd9e28137e6683ef46902af39146 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Tue, 20 Oct 2020 14:42:52 +0200 -Subject: [PATCH] evtchn/fifo: use stable fields when recording "last queue" - information - -Both evtchn->priority and evtchn->notify_vcpu_id could change behind the -back of evtchn_fifo_set_pending(), as for it - in the case of -interdomain channels - only the remote side's per-channel lock is held. -Neither the queue's priority nor the vCPU's vcpu_id fields have similar -properties, so they seem better suited for the purpose. In particular -they reflect the respective evtchn fields' values at the time they were -used to determine queue and vCPU. - -Signed-off-by: Jan Beulich -Reviewed-by: Julien Grall -Reviewed-by: Paul Durrant -master commit: 6f6f07b64cbe90e54f8e62b4d6f2404cf5306536 -master date: 2020-10-02 08:37:35 +0200 ---- - xen/common/event_fifo.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c -index 68d0c7a632..27ab3a1c3f 100644 ---- a/xen/common/event_fifo.c -+++ b/xen/common/event_fifo.c -@@ -225,8 +225,8 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) - /* Moved to a different queue? */ - if ( old_q != q ) - { -- evtchn->last_vcpu_id = evtchn->notify_vcpu_id; -- evtchn->last_priority = evtchn->priority; -+ evtchn->last_vcpu_id = v->vcpu_id; -+ evtchn->last_priority = q->priority; - - spin_unlock_irqrestore(&old_q->lock, flags); - spin_lock_irqsave(&q->lock, flags); --- -2.20.1 - diff --git a/xen.spec b/xen.spec index 122da22..40f2d33 100644 --- a/xen.spec +++ b/xen.spec @@ -57,8 +57,8 @@ Summary: Xen is a virtual machine monitor Name: xen -Version: 4.14.0 -Release: 14%{?dist} +Version: 4.14.1 +Release: 1%{?dist} License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz @@ -111,72 +111,8 @@ Patch39: qemu.trad.CVE-2017-9330.patch Patch40: xen.drop.brctl.patch Patch42: xen.gcc9.fixes.patch Patch45: xen.gcc10.fixes.patch -Patch46: xsa335-qemu.patch -Patch47: xsa335-trad.patch -Patch48: xsa333.patch -Patch49: xsa334.patch -Patch50: xsa336.patch -Patch51: xsa337-1.patch -Patch52: xsa337-2.patch -Patch53: xsa338.patch -Patch54: xsa339.patch -Patch55: xsa340.patch -Patch56: xsa342.patch -Patch57: xsa343-1.patch -Patch58: xsa343-2.patch -Patch59: xsa343-3.patch -Patch60: xsa344-1.patch -Patch61: xsa344-2.patch -Patch62: xsa345-4.14-0001-x86-mm-Refactor-map_pages_to_xen-to-have-only-a-sing.patch -Patch63: xsa345-4.14-0002-x86-mm-Refactor-modify_xen_mappings-to-have-one-exit.patch -Patch64: xsa345-4.14-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch -Patch65: xsa346-1.patch -Patch66: xsa346-2.patch -Patch67: xsa347-4.14-1.patch -Patch68: xsa347-4.14-2.patch -Patch69: xsa347-4.14-3.patch Patch70: xen.gcc11.fixes.patch -Patch71: xsa286-4.14-0001-x86-pv-Drop-FLUSH_TLB_GLOBAL-in-do_mmu_update-for-XP.patch -Patch72: xsa286-4.14-0002-x86-pv-Flush-TLB-in-response-to-paging-structure-cha.patch -Patch73: xsa351-arm.patch -Patch74: xsa351-x86-4.14-1.patch -Patch75: xsa351-x86-4.14-2.patch Patch76: zstd-dom0.patch -Patch77: xsa355.patch -Patch78: xsa115-4.14-c-0001-tools-xenstore-allow-removing-child-of-a-node-exceed.patch -Patch79: xsa115-4.14-c-0002-tools-xenstore-ignore-transaction-id-for-un-watch.patch -Patch80: xsa115-4.14-c-0003-tools-xenstore-fix-node-accounting-after-failed-node.patch -Patch81: xsa115-4.14-c-0004-tools-xenstore-simplify-and-rename-check_event_node.patch -Patch82: xsa115-4.14-c-0005-tools-xenstore-check-privilege-for-XS_IS_DOMAIN_INTR.patch -Patch83: xsa115-4.14-c-0006-tools-xenstore-rework-node-removal.patch -Patch84: xsa115-4.14-c-0007-tools-xenstore-fire-watches-only-when-removing-a-spe.patch -Patch85: xsa115-4.14-c-0008-tools-xenstore-introduce-node_perms-structure.patch -Patch86: xsa115-4.14-c-0009-tools-xenstore-allow-special-watches-for-privileged-.patch -Patch87: xsa115-4.14-c-0010-tools-xenstore-avoid-watch-events-for-nodes-without-.patch -Patch88: xsa115-o-0001-tools-ocaml-xenstored-ignore-transaction-id-for-un-w.patch -Patch89: xsa115-o-0002-tools-ocaml-xenstored-check-privilege-for-XS_IS_DOMA.patch -Patch90: xsa115-o-0003-tools-ocaml-xenstored-unify-watch-firing.patch -Patch91: xsa115-o-0004-tools-ocaml-xenstored-introduce-permissions-for-spec.patch -Patch92: xsa115-o-0005-tools-ocaml-xenstored-avoid-watch-events-for-nodes-w.patch -Patch93: xsa115-o-0006-tools-ocaml-xenstored-add-xenstored.conf-flag-to-tur.patch -Patch94: xsa322-4.14-c.patch -Patch95: xsa322-o.patch -Patch96: xsa323.patch -Patch97: xsa324.patch -Patch98: xsa325-4.14.patch -Patch99: xsa330.patch -Patch100: xsa348-1.patch -Patch101: xsa348-2.patch -Patch102: xsa348-3.patch -Patch103: xsa352.patch -Patch104: xsa353.patch -Patch105: xsa356.patch -Patch106: xen.git-8d148003fdf7bd9e28137e6683ef46902af39146.patch -Patch107: xen.git-1ad177370df2db9129c97c7305962fc5ad298728.patch -Patch108: xen.git-1cfb9b1c5b9e4c024f5f139d7a3d0357d2417b13.patch -Patch109: xen.git-72bd989f51878bc9ba61e930b0c29b921a30dc0d.patch -Patch110: xsa358.patch -Patch111: xsa359.patch %if %build_qemutrad @@ -378,71 +314,8 @@ manage Xen virtual machines. %patch40 -p1 %patch42 -p1 %patch45 -p1 -%patch47 -p1 -%patch48 -p1 -%patch49 -p1 -%patch50 -p1 -%patch51 -p1 -%patch52 -p1 -%patch53 -p1 -%patch54 -p1 -%patch55 -p1 -%patch56 -p1 -%patch57 -p1 -%patch58 -p1 -%patch59 -p1 -%patch60 -p1 -%patch61 -p1 -%patch62 -p1 -%patch63 -p1 -%patch64 -p1 -%patch65 -p1 -%patch66 -p1 -%patch67 -p1 -%patch68 -p1 -%patch69 -p1 %patch70 -p1 -%patch71 -p1 -%patch72 -p1 -%patch73 -p1 -%patch74 -p1 -%patch75 -p1 %patch76 -p1 -%patch77 -p1 -%patch78 -p1 -%patch79 -p1 -%patch80 -p1 -%patch81 -p1 -%patch82 -p1 -%patch83 -p1 -%patch84 -p1 -%patch85 -p1 -%patch86 -p1 -%patch87 -p1 -%patch88 -p1 -%patch89 -p1 -%patch90 -p1 -%patch91 -p1 -%patch92 -p1 -%patch93 -p1 -%patch94 -p1 -%patch95 -p1 -%patch96 -p1 -%patch97 -p1 -%patch98 -p1 -%patch99 -p1 -%patch100 -p1 -%patch101 -p1 -%patch102 -p1 -%patch103 -p1 -%patch104 -p1 -%patch105 -p1 -%patch106 -p1 -%patch107 -p1 -%patch108 -p1 -%patch109 -p1 -%patch110 -p1 -%patch111 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -459,7 +332,6 @@ popd # qemu-xen patches pushd tools/qemu-xen -%patch46 -p1 popd # stubdom sources @@ -1036,6 +908,11 @@ fi %endif %changelog +* Sun Dec 20 2020 Michael Young - 4.14.1-1 +- update to 4.14.1 + adjust xen.canonicalize.patch + remove or adjust patches now included or superceded upstream + * Tue Dec 15 2020 Michael Young - 4.14.0-14 - xenstore watch notifications lacking permission checks [XSA-115, CVE-2020-29480] (#1908091) diff --git a/xsa115-4.14-c-0001-tools-xenstore-allow-removing-child-of-a-node-exceed.patch b/xsa115-4.14-c-0001-tools-xenstore-allow-removing-child-of-a-node-exceed.patch deleted file mode 100644 index fb29db7..0000000 --- a/xsa115-4.14-c-0001-tools-xenstore-allow-removing-child-of-a-node-exceed.patch +++ /dev/null @@ -1,157 +0,0 @@ -From 71623492f7b1b6d63ed76e2bf970c113b88ffa0b Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Thu, 11 Jun 2020 16:12:37 +0200 -Subject: [PATCH 01/10] tools/xenstore: allow removing child of a node - exceeding quota - -An unprivileged user of Xenstore is not allowed to write nodes with a -size exceeding a global quota, while privileged users like dom0 are -allowed to write such nodes. The size of a node is the needed space -to store all node specific data, this includes the names of all -children of the node. - -When deleting a node its parent has to be modified by removing the -name of the to be deleted child from it. - -This results in the strange situation that an unprivileged owner of a -node might not succeed in deleting that node in case its parent is -exceeding the quota of that unprivileged user (it might have been -written by dom0), as the user is not allowed to write the updated -parent node. - -Fix that by not checking the quota when writing a node for the -purpose of removing a child's name only. - -The same applies to transaction handling: a node being read during a -transaction is written to the transaction specific area and it should -not be tested for exceeding the quota, as it might not be owned by -the reader and presumably the original write would have failed if the -node is owned by the reader. - -This is part of XSA-115. - -Signed-off-by: Juergen Gross -Reviewed-by: Julien Grall -Reviewed-by: Paul Durrant ---- - tools/xenstore/xenstored_core.c | 20 +++++++++++--------- - tools/xenstore/xenstored_core.h | 3 ++- - tools/xenstore/xenstored_transaction.c | 2 +- - 3 files changed, 14 insertions(+), 11 deletions(-) - -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index 7bd959f28b39..62a17a686edc 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -419,7 +419,8 @@ static struct node *read_node(struct connection *conn, const void *ctx, - return node; - } - --int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node) -+int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node, -+ bool no_quota_check) - { - TDB_DATA data; - void *p; -@@ -429,7 +430,7 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node) - + node->num_perms*sizeof(node->perms[0]) - + node->datalen + node->childlen; - -- if (domain_is_unprivileged(conn) && -+ if (!no_quota_check && domain_is_unprivileged(conn) && - data.dsize >= quota_max_entry_size) { - errno = ENOSPC; - return errno; -@@ -457,14 +458,15 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node) - return 0; - } - --static int write_node(struct connection *conn, struct node *node) -+static int write_node(struct connection *conn, struct node *node, -+ bool no_quota_check) - { - TDB_DATA key; - - if (access_node(conn, node, NODE_ACCESS_WRITE, &key)) - return errno; - -- return write_node_raw(conn, &key, node); -+ return write_node_raw(conn, &key, node, no_quota_check); - } - - static enum xs_perm_type perm_for_conn(struct connection *conn, -@@ -1001,7 +1003,7 @@ static struct node *create_node(struct connection *conn, const void *ctx, - /* We write out the nodes down, setting destructor in case - * something goes wrong. */ - for (i = node; i; i = i->parent) { -- if (write_node(conn, i)) { -+ if (write_node(conn, i, false)) { - domain_entry_dec(conn, i); - return NULL; - } -@@ -1041,7 +1043,7 @@ static int do_write(struct connection *conn, struct buffered_data *in) - } else { - node->data = in->buffer + offset; - node->datalen = datalen; -- if (write_node(conn, node)) -+ if (write_node(conn, node, false)) - return errno; - } - -@@ -1117,7 +1119,7 @@ static int remove_child_entry(struct connection *conn, struct node *node, - size_t childlen = strlen(node->children + offset); - memdel(node->children, offset, childlen + 1, node->childlen); - node->childlen -= childlen + 1; -- return write_node(conn, node); -+ return write_node(conn, node, true); - } - - -@@ -1256,7 +1258,7 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) - node->num_perms = num; - domain_entry_inc(conn, node); - -- if (write_node(conn, node)) -+ if (write_node(conn, node, false)) - return errno; - - fire_watches(conn, in, name, false); -@@ -1516,7 +1518,7 @@ static void manual_node(const char *name, const char *child) - if (child) - node->childlen = strlen(child) + 1; - -- if (write_node(NULL, node)) -+ if (write_node(NULL, node, false)) - barf_perror("Could not create initial node %s", name); - talloc_free(node); - } -diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h -index c4c32bc88f0c..29d638fbc5a0 100644 ---- a/tools/xenstore/xenstored_core.h -+++ b/tools/xenstore/xenstored_core.h -@@ -149,7 +149,8 @@ void send_ack(struct connection *conn, enum xsd_sockmsg_type type); - char *xenstore_canonicalize(struct connection *conn, const void *ctx, const char *node); - - /* Write a node to the tdb data base. */ --int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node); -+int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node, -+ bool no_quota_check); - - /* Get this node, checking we have permissions. */ - struct node *get_node(struct connection *conn, -diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c -index 2824f7b359b8..e87897573469 100644 ---- a/tools/xenstore/xenstored_transaction.c -+++ b/tools/xenstore/xenstored_transaction.c -@@ -276,7 +276,7 @@ int access_node(struct connection *conn, struct node *node, - i->check_gen = true; - if (node->generation != NO_GENERATION) { - set_tdb_key(trans_name, &local_key); -- ret = write_node_raw(conn, &local_key, node); -+ ret = write_node_raw(conn, &local_key, node, true); - if (ret) - goto err; - i->ta_node = true; --- -2.17.1 - diff --git a/xsa115-4.14-c-0002-tools-xenstore-ignore-transaction-id-for-un-watch.patch b/xsa115-4.14-c-0002-tools-xenstore-ignore-transaction-id-for-un-watch.patch deleted file mode 100644 index 42ccd5a..0000000 --- a/xsa115-4.14-c-0002-tools-xenstore-ignore-transaction-id-for-un-watch.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 072c729cfe90b4b09cacb12d912ba088db8274fe Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Thu, 11 Jun 2020 16:12:38 +0200 -Subject: [PATCH 02/10] tools/xenstore: ignore transaction id for [un]watch - -Instead of ignoring the transaction id for XS_WATCH and XS_UNWATCH -commands as it is documented in docs/misc/xenstore.txt, it is tested -for validity today. - -Really ignore the transaction id for XS_WATCH and XS_UNWATCH. - -This is part of XSA-115. - -Signed-off-by: Juergen Gross -Reviewed-by: Julien Grall -Reviewed-by: Paul Durrant ---- - tools/xenstore/xenstored_core.c | 26 ++++++++++++++++---------- - 1 file changed, 16 insertions(+), 10 deletions(-) - -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index 62a17a686edc..2f989524b497 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -1270,13 +1270,17 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) - static struct { - const char *str; - int (*func)(struct connection *conn, struct buffered_data *in); -+ unsigned int flags; -+#define XS_FLAG_NOTID (1U << 0) /* Ignore transaction id. */ - } const wire_funcs[XS_TYPE_COUNT] = { - [XS_CONTROL] = { "CONTROL", do_control }, - [XS_DIRECTORY] = { "DIRECTORY", send_directory }, - [XS_READ] = { "READ", do_read }, - [XS_GET_PERMS] = { "GET_PERMS", do_get_perms }, -- [XS_WATCH] = { "WATCH", do_watch }, -- [XS_UNWATCH] = { "UNWATCH", do_unwatch }, -+ [XS_WATCH] = -+ { "WATCH", do_watch, XS_FLAG_NOTID }, -+ [XS_UNWATCH] = -+ { "UNWATCH", do_unwatch, XS_FLAG_NOTID }, - [XS_TRANSACTION_START] = { "TRANSACTION_START", do_transaction_start }, - [XS_TRANSACTION_END] = { "TRANSACTION_END", do_transaction_end }, - [XS_INTRODUCE] = { "INTRODUCE", do_introduce }, -@@ -1298,7 +1302,7 @@ static struct { - - static const char *sockmsg_string(enum xsd_sockmsg_type type) - { -- if ((unsigned)type < XS_TYPE_COUNT && wire_funcs[type].str) -+ if ((unsigned int)type < ARRAY_SIZE(wire_funcs) && wire_funcs[type].str) - return wire_funcs[type].str; - - return "**UNKNOWN**"; -@@ -1313,7 +1317,14 @@ static void process_message(struct connection *conn, struct buffered_data *in) - enum xsd_sockmsg_type type = in->hdr.msg.type; - int ret; - -- trans = transaction_lookup(conn, in->hdr.msg.tx_id); -+ if ((unsigned int)type >= XS_TYPE_COUNT || !wire_funcs[type].func) { -+ eprintf("Client unknown operation %i", type); -+ send_error(conn, ENOSYS); -+ return; -+ } -+ -+ trans = (wire_funcs[type].flags & XS_FLAG_NOTID) -+ ? NULL : transaction_lookup(conn, in->hdr.msg.tx_id); - if (IS_ERR(trans)) { - send_error(conn, -PTR_ERR(trans)); - return; -@@ -1322,12 +1333,7 @@ static void process_message(struct connection *conn, struct buffered_data *in) - assert(conn->transaction == NULL); - conn->transaction = trans; - -- if ((unsigned)type < XS_TYPE_COUNT && wire_funcs[type].func) -- ret = wire_funcs[type].func(conn, in); -- else { -- eprintf("Client unknown operation %i", type); -- ret = ENOSYS; -- } -+ ret = wire_funcs[type].func(conn, in); - if (ret) - send_error(conn, ret); - --- -2.17.1 - diff --git a/xsa115-4.14-c-0003-tools-xenstore-fix-node-accounting-after-failed-node.patch b/xsa115-4.14-c-0003-tools-xenstore-fix-node-accounting-after-failed-node.patch deleted file mode 100644 index 94c3f1f..0000000 --- a/xsa115-4.14-c-0003-tools-xenstore-fix-node-accounting-after-failed-node.patch +++ /dev/null @@ -1,104 +0,0 @@ -From a133627453898759ca73dd5c1c185c3830fed754 Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Thu, 11 Jun 2020 16:12:39 +0200 -Subject: [PATCH 03/10] tools/xenstore: fix node accounting after failed node - creation - -When a node creation fails the number of nodes of the domain should be -the same as before the failed node creation. In case of failure when -trying to create a node requiring to create one or more intermediate -nodes as well (e.g. when /a/b/c/d is to be created, but /a/b isn't -existing yet) it might happen that the number of nodes of the creating -domain is not reset to the value it had before. - -So move the quota accounting out of construct_node() and into the node -write loop in create_node() in order to be able to undo the accounting -in case of an error in the intermediate node destructor. - -This is part of XSA-115. - -Signed-off-by: Juergen Gross -Reviewed-by: Paul Durrant -Acked-by: Julien Grall ---- - tools/xenstore/xenstored_core.c | 37 ++++++++++++++++++++++----------- - 1 file changed, 25 insertions(+), 12 deletions(-) - -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index 2f989524b497..c971519e542a 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -927,11 +927,6 @@ static struct node *construct_node(struct connection *conn, const void *ctx, - if (!parent) - return NULL; - -- if (domain_entry(conn) >= quota_nb_entry_per_domain) { -- errno = ENOSPC; -- return NULL; -- } -- - /* Add child to parent. */ - base = basename(name); - baselen = strlen(base) + 1; -@@ -964,7 +959,6 @@ static struct node *construct_node(struct connection *conn, const void *ctx, - node->children = node->data = NULL; - node->childlen = node->datalen = 0; - node->parent = parent; -- domain_entry_inc(conn, node); - return node; - - nomem: -@@ -984,6 +978,9 @@ static int destroy_node(void *_node) - key.dsize = strlen(node->name); - - tdb_delete(tdb_ctx, key); -+ -+ domain_entry_dec(talloc_parent(node), node); -+ - return 0; - } - -@@ -1000,18 +997,34 @@ static struct node *create_node(struct connection *conn, const void *ctx, - node->data = data; - node->datalen = datalen; - -- /* We write out the nodes down, setting destructor in case -- * something goes wrong. */ -+ /* -+ * We write out the nodes bottom up. -+ * All new created nodes will have i->parent set, while the final -+ * node will be already existing and won't have i->parent set. -+ * New nodes are subject to quota handling. -+ * Initially set a destructor for all new nodes removing them from -+ * TDB again and undoing quota accounting for the case of an error -+ * during the write loop. -+ */ - for (i = node; i; i = i->parent) { -- if (write_node(conn, i, false)) { -- domain_entry_dec(conn, i); -+ /* i->parent is set for each new node, so check quota. */ -+ if (i->parent && -+ domain_entry(conn) >= quota_nb_entry_per_domain) { -+ errno = ENOSPC; - return NULL; - } -- talloc_set_destructor(i, destroy_node); -+ if (write_node(conn, i, false)) -+ return NULL; -+ -+ /* Account for new node, set destructor for error case. */ -+ if (i->parent) { -+ domain_entry_inc(conn, i); -+ talloc_set_destructor(i, destroy_node); -+ } - } - - /* OK, now remove destructors so they stay around */ -- for (i = node; i; i = i->parent) -+ for (i = node; i->parent; i = i->parent) - talloc_set_destructor(i, NULL); - return node; - } --- -2.17.1 - diff --git a/xsa115-4.14-c-0004-tools-xenstore-simplify-and-rename-check_event_node.patch b/xsa115-4.14-c-0004-tools-xenstore-simplify-and-rename-check_event_node.patch deleted file mode 100644 index 5a7d705..0000000 --- a/xsa115-4.14-c-0004-tools-xenstore-simplify-and-rename-check_event_node.patch +++ /dev/null @@ -1,55 +0,0 @@ -From dc6cf381bdeca4013b6bfe25c27e57f010e7ca84 Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Thu, 11 Jun 2020 16:12:40 +0200 -Subject: [PATCH 04/10] tools/xenstore: simplify and rename check_event_node() - -There is no path which allows to call check_event_node() without a -event name. So don't let the result depend on the name being NULL and -add an assert() covering that case. - -Rename the function to check_special_event() to better match the -semantics. - -This is part of XSA-115. - -Signed-off-by: Juergen Gross -Reviewed-by: Julien Grall -Reviewed-by: Paul Durrant ---- - tools/xenstore/xenstored_watch.c | 12 +++++------- - 1 file changed, 5 insertions(+), 7 deletions(-) - -diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c -index 7dedca60dfd6..f2f1bed47cc6 100644 ---- a/tools/xenstore/xenstored_watch.c -+++ b/tools/xenstore/xenstored_watch.c -@@ -47,13 +47,11 @@ struct watch - char *node; - }; - --static bool check_event_node(const char *node) -+static bool check_special_event(const char *name) - { -- if (!node || !strstarts(node, "@")) { -- errno = EINVAL; -- return false; -- } -- return true; -+ assert(name); -+ -+ return strstarts(name, "@"); - } - - /* Is child a subnode of parent, or equal? */ -@@ -87,7 +85,7 @@ static void add_event(struct connection *conn, - unsigned int len; - char *data; - -- if (!check_event_node(name)) { -+ if (!check_special_event(name)) { - /* Can this conn load node, or see that it doesn't exist? */ - struct node *node = get_node(conn, ctx, name, XS_PERM_READ); - /* --- -2.17.1 - diff --git a/xsa115-4.14-c-0005-tools-xenstore-check-privilege-for-XS_IS_DOMAIN_INTR.patch b/xsa115-4.14-c-0005-tools-xenstore-check-privilege-for-XS_IS_DOMAIN_INTR.patch deleted file mode 100644 index 8025401..0000000 --- a/xsa115-4.14-c-0005-tools-xenstore-check-privilege-for-XS_IS_DOMAIN_INTR.patch +++ /dev/null @@ -1,115 +0,0 @@ -From cd456dd7e3c4bbe229a0307a469c2fc3b8e7b590 Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Thu, 11 Jun 2020 16:12:41 +0200 -Subject: [PATCH 05/10] tools/xenstore: check privilege for - XS_IS_DOMAIN_INTRODUCED - -The Xenstore command XS_IS_DOMAIN_INTRODUCED should be possible for -privileged domains only (the only user in the tree is the xenpaging -daemon). - -Instead of having the privilege test for each command introduce a -per-command flag for that purpose. - -This is part of XSA-115. - -Signed-off-by: Juergen Gross -Reviewed-by: Julien Grall -Reviewed-by: Paul Durrant ---- - tools/xenstore/xenstored_core.c | 24 ++++++++++++++++++------ - tools/xenstore/xenstored_domain.c | 7 ++----- - 2 files changed, 20 insertions(+), 11 deletions(-) - -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index c971519e542a..f38196ae2825 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -1285,8 +1285,10 @@ static struct { - int (*func)(struct connection *conn, struct buffered_data *in); - unsigned int flags; - #define XS_FLAG_NOTID (1U << 0) /* Ignore transaction id. */ -+#define XS_FLAG_PRIV (1U << 1) /* Privileged domain only. */ - } const wire_funcs[XS_TYPE_COUNT] = { -- [XS_CONTROL] = { "CONTROL", do_control }, -+ [XS_CONTROL] = -+ { "CONTROL", do_control, XS_FLAG_PRIV }, - [XS_DIRECTORY] = { "DIRECTORY", send_directory }, - [XS_READ] = { "READ", do_read }, - [XS_GET_PERMS] = { "GET_PERMS", do_get_perms }, -@@ -1296,8 +1298,10 @@ static struct { - { "UNWATCH", do_unwatch, XS_FLAG_NOTID }, - [XS_TRANSACTION_START] = { "TRANSACTION_START", do_transaction_start }, - [XS_TRANSACTION_END] = { "TRANSACTION_END", do_transaction_end }, -- [XS_INTRODUCE] = { "INTRODUCE", do_introduce }, -- [XS_RELEASE] = { "RELEASE", do_release }, -+ [XS_INTRODUCE] = -+ { "INTRODUCE", do_introduce, XS_FLAG_PRIV }, -+ [XS_RELEASE] = -+ { "RELEASE", do_release, XS_FLAG_PRIV }, - [XS_GET_DOMAIN_PATH] = { "GET_DOMAIN_PATH", do_get_domain_path }, - [XS_WRITE] = { "WRITE", do_write }, - [XS_MKDIR] = { "MKDIR", do_mkdir }, -@@ -1306,9 +1310,11 @@ static struct { - [XS_WATCH_EVENT] = { "WATCH_EVENT", NULL }, - [XS_ERROR] = { "ERROR", NULL }, - [XS_IS_DOMAIN_INTRODUCED] = -- { "IS_DOMAIN_INTRODUCED", do_is_domain_introduced }, -- [XS_RESUME] = { "RESUME", do_resume }, -- [XS_SET_TARGET] = { "SET_TARGET", do_set_target }, -+ { "IS_DOMAIN_INTRODUCED", do_is_domain_introduced, XS_FLAG_PRIV }, -+ [XS_RESUME] = -+ { "RESUME", do_resume, XS_FLAG_PRIV }, -+ [XS_SET_TARGET] = -+ { "SET_TARGET", do_set_target, XS_FLAG_PRIV }, - [XS_RESET_WATCHES] = { "RESET_WATCHES", do_reset_watches }, - [XS_DIRECTORY_PART] = { "DIRECTORY_PART", send_directory_part }, - }; -@@ -1336,6 +1342,12 @@ static void process_message(struct connection *conn, struct buffered_data *in) - return; - } - -+ if ((wire_funcs[type].flags & XS_FLAG_PRIV) && -+ domain_is_unprivileged(conn)) { -+ send_error(conn, EACCES); -+ return; -+ } -+ - trans = (wire_funcs[type].flags & XS_FLAG_NOTID) - ? NULL : transaction_lookup(conn, in->hdr.msg.tx_id); - if (IS_ERR(trans)) { -diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c -index 06359503f091..2d0d87ee89e1 100644 ---- a/tools/xenstore/xenstored_domain.c -+++ b/tools/xenstore/xenstored_domain.c -@@ -372,7 +372,7 @@ int do_introduce(struct connection *conn, struct buffered_data *in) - if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) - return EINVAL; - -- if (domain_is_unprivileged(conn) || !conn->can_write) -+ if (!conn->can_write) - return EACCES; - - domid = atoi(vec[0]); -@@ -438,7 +438,7 @@ int do_set_target(struct connection *conn, struct buffered_data *in) - if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) - return EINVAL; - -- if (domain_is_unprivileged(conn) || !conn->can_write) -+ if (!conn->can_write) - return EACCES; - - domid = atoi(vec[0]); -@@ -473,9 +473,6 @@ static struct domain *onearg_domain(struct connection *conn, - if (!domid) - return ERR_PTR(-EINVAL); - -- if (domain_is_unprivileged(conn)) -- return ERR_PTR(-EACCES); -- - return find_connected_domain(domid); - } - --- -2.17.1 - diff --git a/xsa115-4.14-c-0006-tools-xenstore-rework-node-removal.patch b/xsa115-4.14-c-0006-tools-xenstore-rework-node-removal.patch deleted file mode 100644 index f2357b8..0000000 --- a/xsa115-4.14-c-0006-tools-xenstore-rework-node-removal.patch +++ /dev/null @@ -1,217 +0,0 @@ -From a3d8089532ae573c03e1cdb2fc3c5ee5ebb52a60 Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Thu, 11 Jun 2020 16:12:42 +0200 -Subject: [PATCH 06/10] tools/xenstore: rework node removal - -Today a Xenstore node is being removed by deleting it from the parent -first and then deleting itself and all its children. This results in -stale entries remaining in the data base in case e.g. a memory -allocation is failing during processing. This would result in the -rather strange behavior to be able to read a node (as its still in the -data base) while not being visible in the tree view of Xenstore. - -Fix that by deleting the nodes from the leaf side instead of starting -at the root. - -As fire_watches() is now called from _rm() the ctx parameter needs a -const attribute. - -This is part of XSA-115. - -Signed-off-by: Juergen Gross -Reviewed-by: Julien Grall -Reviewed-by: Paul Durrant ---- - tools/xenstore/xenstored_core.c | 99 ++++++++++++++++---------------- - tools/xenstore/xenstored_watch.c | 4 +- - tools/xenstore/xenstored_watch.h | 2 +- - 3 files changed, 54 insertions(+), 51 deletions(-) - -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index f38196ae2825..dfdb64f3ee60 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -1089,74 +1089,76 @@ static int do_mkdir(struct connection *conn, struct buffered_data *in) - return 0; - } - --static void delete_node(struct connection *conn, struct node *node) --{ -- unsigned int i; -- char *name; -- -- /* Delete self, then delete children. If we crash, then the worst -- that can happen is the children will continue to take up space, but -- will otherwise be unreachable. */ -- delete_node_single(conn, node); -- -- /* Delete children, too. */ -- for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) { -- struct node *child; -- -- name = talloc_asprintf(node, "%s/%s", node->name, -- node->children + i); -- child = name ? read_node(conn, node, name) : NULL; -- if (child) { -- delete_node(conn, child); -- } -- else { -- trace("delete_node: Error deleting child '%s/%s'!\n", -- node->name, node->children + i); -- /* Skip it, we've already deleted the parent. */ -- } -- talloc_free(name); -- } --} -- -- - /* Delete memory using memmove. */ - static void memdel(void *mem, unsigned off, unsigned len, unsigned total) - { - memmove(mem + off, mem + off + len, total - off - len); - } - -- --static int remove_child_entry(struct connection *conn, struct node *node, -- size_t offset) -+static void remove_child_entry(struct connection *conn, struct node *node, -+ size_t offset) - { - size_t childlen = strlen(node->children + offset); -+ - memdel(node->children, offset, childlen + 1, node->childlen); - node->childlen -= childlen + 1; -- return write_node(conn, node, true); -+ if (write_node(conn, node, true)) -+ corrupt(conn, "Can't update parent node '%s'", node->name); - } - -- --static int delete_child(struct connection *conn, -- struct node *node, const char *childname) -+static void delete_child(struct connection *conn, -+ struct node *node, const char *childname) - { - unsigned int i; - - for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) { - if (streq(node->children+i, childname)) { -- return remove_child_entry(conn, node, i); -+ remove_child_entry(conn, node, i); -+ return; - } - } - corrupt(conn, "Can't find child '%s' in %s", childname, node->name); -- return ENOENT; - } - -+static int delete_node(struct connection *conn, struct node *parent, -+ struct node *node) -+{ -+ char *name; -+ -+ /* Delete children. */ -+ while (node->childlen) { -+ struct node *child; -+ -+ name = talloc_asprintf(node, "%s/%s", node->name, -+ node->children); -+ child = name ? read_node(conn, node, name) : NULL; -+ if (child) { -+ if (delete_node(conn, node, child)) -+ return errno; -+ } else { -+ trace("delete_node: Error deleting child '%s/%s'!\n", -+ node->name, node->children); -+ /* Quit deleting. */ -+ errno = ENOMEM; -+ return errno; -+ } -+ talloc_free(name); -+ } -+ -+ delete_node_single(conn, node); -+ delete_child(conn, parent, basename(node->name)); -+ talloc_free(node); -+ -+ return 0; -+} - - static int _rm(struct connection *conn, const void *ctx, struct node *node, - const char *name) - { -- /* Delete from parent first, then if we crash, the worst that can -- happen is the child will continue to take up space, but will -- otherwise be unreachable. */ -+ /* -+ * Deleting node by node, so the result is always consistent even in -+ * case of a failure. -+ */ - struct node *parent; - char *parentname = get_parent(ctx, name); - -@@ -1167,11 +1169,13 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node, - if (!parent) - return (errno == ENOMEM) ? ENOMEM : EINVAL; - -- if (delete_child(conn, parent, basename(name))) -- return EINVAL; -- -- delete_node(conn, node); -- return 0; -+ /* -+ * Fire the watches now, when we can still see the node permissions. -+ * This fine as we are single threaded and the next possible read will -+ * be handled only after the node has been really removed. -+ */ -+ fire_watches(conn, ctx, name, true); -+ return delete_node(conn, parent, node); - } - - -@@ -1209,7 +1213,6 @@ static int do_rm(struct connection *conn, struct buffered_data *in) - if (ret) - return ret; - -- fire_watches(conn, in, name, true); - send_ack(conn, XS_RM); - - return 0; -diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c -index f2f1bed47cc6..f0bbfe7a6dc6 100644 ---- a/tools/xenstore/xenstored_watch.c -+++ b/tools/xenstore/xenstored_watch.c -@@ -77,7 +77,7 @@ static bool is_child(const char *child, const char *parent) - * Temporary memory allocations are done with ctx. - */ - static void add_event(struct connection *conn, -- void *ctx, -+ const void *ctx, - struct watch *watch, - const char *name) - { -@@ -121,7 +121,7 @@ static void add_event(struct connection *conn, - * Check whether any watch events are to be sent. - * Temporary memory allocations are done with ctx. - */ --void fire_watches(struct connection *conn, void *ctx, const char *name, -+void fire_watches(struct connection *conn, const void *ctx, const char *name, - bool recurse) - { - struct connection *i; -diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h -index c72ea6a68542..54d4ea7e0d41 100644 ---- a/tools/xenstore/xenstored_watch.h -+++ b/tools/xenstore/xenstored_watch.h -@@ -25,7 +25,7 @@ int do_watch(struct connection *conn, struct buffered_data *in); - int do_unwatch(struct connection *conn, struct buffered_data *in); - - /* Fire all watches: recurse means all the children are affected (ie. rm). */ --void fire_watches(struct connection *conn, void *tmp, const char *name, -+void fire_watches(struct connection *conn, const void *tmp, const char *name, - bool recurse); - - void conn_delete_all_watches(struct connection *conn); --- -2.17.1 - diff --git a/xsa115-4.14-c-0007-tools-xenstore-fire-watches-only-when-removing-a-spe.patch b/xsa115-4.14-c-0007-tools-xenstore-fire-watches-only-when-removing-a-spe.patch deleted file mode 100644 index 008ce01..0000000 --- a/xsa115-4.14-c-0007-tools-xenstore-fire-watches-only-when-removing-a-spe.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 3d4e3fd6c78795bf426947fbfbfa9af6568ece9f Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Thu, 11 Jun 2020 16:12:43 +0200 -Subject: [PATCH 07/10] tools/xenstore: fire watches only when removing a - specific node - -Instead of firing all watches for removing a subtree in one go, do so -only when the related node is being removed. - -The watches for the top-most node being removed include all watches -including that node, while watches for nodes below that are only fired -if they are matching exactly. This avoids firing any watch more than -once when removing a subtree. - -This is part of XSA-115. - -Signed-off-by: Juergen Gross -Reviewed-by: Julien Grall -Reviewed-by: Paul Durrant ---- - tools/xenstore/xenstored_core.c | 11 ++++++----- - tools/xenstore/xenstored_watch.c | 13 ++++++++----- - tools/xenstore/xenstored_watch.h | 4 ++-- - 3 files changed, 16 insertions(+), 12 deletions(-) - -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index dfdb64f3ee60..20a7a3581555 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -1120,8 +1120,8 @@ static void delete_child(struct connection *conn, - corrupt(conn, "Can't find child '%s' in %s", childname, node->name); - } - --static int delete_node(struct connection *conn, struct node *parent, -- struct node *node) -+static int delete_node(struct connection *conn, const void *ctx, -+ struct node *parent, struct node *node) - { - char *name; - -@@ -1133,7 +1133,7 @@ static int delete_node(struct connection *conn, struct node *parent, - node->children); - child = name ? read_node(conn, node, name) : NULL; - if (child) { -- if (delete_node(conn, node, child)) -+ if (delete_node(conn, ctx, node, child)) - return errno; - } else { - trace("delete_node: Error deleting child '%s/%s'!\n", -@@ -1145,6 +1145,7 @@ static int delete_node(struct connection *conn, struct node *parent, - talloc_free(name); - } - -+ fire_watches(conn, ctx, node->name, true); - delete_node_single(conn, node); - delete_child(conn, parent, basename(node->name)); - talloc_free(node); -@@ -1174,8 +1175,8 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node, - * This fine as we are single threaded and the next possible read will - * be handled only after the node has been really removed. - */ -- fire_watches(conn, ctx, name, true); -- return delete_node(conn, parent, node); -+ fire_watches(conn, ctx, name, false); -+ return delete_node(conn, ctx, parent, node); - } - - -diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c -index f0bbfe7a6dc6..3836675459fa 100644 ---- a/tools/xenstore/xenstored_watch.c -+++ b/tools/xenstore/xenstored_watch.c -@@ -122,7 +122,7 @@ static void add_event(struct connection *conn, - * Temporary memory allocations are done with ctx. - */ - void fire_watches(struct connection *conn, const void *ctx, const char *name, -- bool recurse) -+ bool exact) - { - struct connection *i; - struct watch *watch; -@@ -134,10 +134,13 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name, - /* Create an event for each watch. */ - list_for_each_entry(i, &connections, list) { - list_for_each_entry(watch, &i->watches, list) { -- if (is_child(name, watch->node)) -- add_event(i, ctx, watch, name); -- else if (recurse && is_child(watch->node, name)) -- add_event(i, ctx, watch, watch->node); -+ if (exact) { -+ if (streq(name, watch->node)) -+ add_event(i, ctx, watch, name); -+ } else { -+ if (is_child(name, watch->node)) -+ add_event(i, ctx, watch, name); -+ } - } - } - } -diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h -index 54d4ea7e0d41..1b3c80d3dda1 100644 ---- a/tools/xenstore/xenstored_watch.h -+++ b/tools/xenstore/xenstored_watch.h -@@ -24,9 +24,9 @@ - int do_watch(struct connection *conn, struct buffered_data *in); - int do_unwatch(struct connection *conn, struct buffered_data *in); - --/* Fire all watches: recurse means all the children are affected (ie. rm). */ -+/* Fire all watches: !exact means all the children are affected (ie. rm). */ - void fire_watches(struct connection *conn, const void *tmp, const char *name, -- bool recurse); -+ bool exact); - - void conn_delete_all_watches(struct connection *conn); - --- -2.17.1 - diff --git a/xsa115-4.14-c-0008-tools-xenstore-introduce-node_perms-structure.patch b/xsa115-4.14-c-0008-tools-xenstore-introduce-node_perms-structure.patch deleted file mode 100644 index c295e8c..0000000 --- a/xsa115-4.14-c-0008-tools-xenstore-introduce-node_perms-structure.patch +++ /dev/null @@ -1,289 +0,0 @@ -From 1069c600f85ff583c461cfbfee1afb1a0731796e Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Thu, 11 Jun 2020 16:12:44 +0200 -Subject: [PATCH 08/10] tools/xenstore: introduce node_perms structure - -There are several places in xenstored using a permission array and the -size of that array. Introduce a new struct node_perms containing both. - -This is part of XSA-115. - -Signed-off-by: Juergen Gross -Acked-by: Julien Grall -Reviewed-by: Paul Durrant ---- - tools/xenstore/xenstored_core.c | 79 +++++++++++++++---------------- - tools/xenstore/xenstored_core.h | 8 +++- - tools/xenstore/xenstored_domain.c | 12 ++--- - 3 files changed, 50 insertions(+), 49 deletions(-) - -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index 20a7a3581555..79d305fbbe58 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -403,14 +403,14 @@ static struct node *read_node(struct connection *conn, const void *ctx, - /* Datalen, childlen, number of permissions */ - hdr = (void *)data.dptr; - node->generation = hdr->generation; -- node->num_perms = hdr->num_perms; -+ node->perms.num = hdr->num_perms; - node->datalen = hdr->datalen; - node->childlen = hdr->childlen; - - /* Permissions are struct xs_permissions. */ -- node->perms = hdr->perms; -+ node->perms.p = hdr->perms; - /* Data is binary blob (usually ascii, no nul). */ -- node->data = node->perms + node->num_perms; -+ node->data = node->perms.p + node->perms.num; - /* Children is strings, nul separated. */ - node->children = node->data + node->datalen; - -@@ -427,7 +427,7 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node, - struct xs_tdb_record_hdr *hdr; - - data.dsize = sizeof(*hdr) -- + node->num_perms*sizeof(node->perms[0]) -+ + node->perms.num * sizeof(node->perms.p[0]) - + node->datalen + node->childlen; - - if (!no_quota_check && domain_is_unprivileged(conn) && -@@ -439,12 +439,13 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node, - data.dptr = talloc_size(node, data.dsize); - hdr = (void *)data.dptr; - hdr->generation = node->generation; -- hdr->num_perms = node->num_perms; -+ hdr->num_perms = node->perms.num; - hdr->datalen = node->datalen; - hdr->childlen = node->childlen; - -- memcpy(hdr->perms, node->perms, node->num_perms*sizeof(node->perms[0])); -- p = hdr->perms + node->num_perms; -+ memcpy(hdr->perms, node->perms.p, -+ node->perms.num * sizeof(*node->perms.p)); -+ p = hdr->perms + node->perms.num; - memcpy(p, node->data, node->datalen); - p += node->datalen; - memcpy(p, node->children, node->childlen); -@@ -470,8 +471,7 @@ static int write_node(struct connection *conn, struct node *node, - } - - static enum xs_perm_type perm_for_conn(struct connection *conn, -- struct xs_permissions *perms, -- unsigned int num) -+ const struct node_perms *perms) - { - unsigned int i; - enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER; -@@ -480,16 +480,16 @@ static enum xs_perm_type perm_for_conn(struct connection *conn, - mask &= ~XS_PERM_WRITE; - - /* Owners and tools get it all... */ -- if (!domain_is_unprivileged(conn) || perms[0].id == conn->id -- || (conn->target && perms[0].id == conn->target->id)) -+ if (!domain_is_unprivileged(conn) || perms->p[0].id == conn->id -+ || (conn->target && perms->p[0].id == conn->target->id)) - return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask; - -- for (i = 1; i < num; i++) -- if (perms[i].id == conn->id -- || (conn->target && perms[i].id == conn->target->id)) -- return perms[i].perms & mask; -+ for (i = 1; i < perms->num; i++) -+ if (perms->p[i].id == conn->id -+ || (conn->target && perms->p[i].id == conn->target->id)) -+ return perms->p[i].perms & mask; - -- return perms[0].perms & mask; -+ return perms->p[0].perms & mask; - } - - /* -@@ -536,7 +536,7 @@ static int ask_parents(struct connection *conn, const void *ctx, - return 0; - } - -- *perm = perm_for_conn(conn, node->perms, node->num_perms); -+ *perm = perm_for_conn(conn, &node->perms); - return 0; - } - -@@ -582,8 +582,7 @@ struct node *get_node(struct connection *conn, - node = read_node(conn, ctx, name); - /* If we don't have permission, we don't have node. */ - if (node) { -- if ((perm_for_conn(conn, node->perms, node->num_perms) & perm) -- != perm) { -+ if ((perm_for_conn(conn, &node->perms) & perm) != perm) { - errno = EACCES; - node = NULL; - } -@@ -759,16 +758,15 @@ const char *onearg(struct buffered_data *in) - return in->buffer; - } - --static char *perms_to_strings(const void *ctx, -- struct xs_permissions *perms, unsigned int num, -+static char *perms_to_strings(const void *ctx, const struct node_perms *perms, - unsigned int *len) - { - unsigned int i; - char *strings = NULL; - char buffer[MAX_STRLEN(unsigned int) + 1]; - -- for (*len = 0, i = 0; i < num; i++) { -- if (!xs_perm_to_string(&perms[i], buffer, sizeof(buffer))) -+ for (*len = 0, i = 0; i < perms->num; i++) { -+ if (!xs_perm_to_string(&perms->p[i], buffer, sizeof(buffer))) - return NULL; - - strings = talloc_realloc(ctx, strings, char, -@@ -947,13 +945,13 @@ static struct node *construct_node(struct connection *conn, const void *ctx, - goto nomem; - - /* Inherit permissions, except unprivileged domains own what they create */ -- node->num_perms = parent->num_perms; -- node->perms = talloc_memdup(node, parent->perms, -- node->num_perms * sizeof(node->perms[0])); -- if (!node->perms) -+ node->perms.num = parent->perms.num; -+ node->perms.p = talloc_memdup(node, parent->perms.p, -+ node->perms.num * sizeof(*node->perms.p)); -+ if (!node->perms.p) - goto nomem; - if (domain_is_unprivileged(conn)) -- node->perms[0].id = conn->id; -+ node->perms.p[0].id = conn->id; - - /* No children, no data */ - node->children = node->data = NULL; -@@ -1230,7 +1228,7 @@ static int do_get_perms(struct connection *conn, struct buffered_data *in) - if (!node) - return errno; - -- strings = perms_to_strings(node, node->perms, node->num_perms, &len); -+ strings = perms_to_strings(node, &node->perms, &len); - if (!strings) - return errno; - -@@ -1241,13 +1239,12 @@ static int do_get_perms(struct connection *conn, struct buffered_data *in) - - static int do_set_perms(struct connection *conn, struct buffered_data *in) - { -- unsigned int num; -- struct xs_permissions *perms; -+ struct node_perms perms; - char *name, *permstr; - struct node *node; - -- num = xs_count_strings(in->buffer, in->used); -- if (num < 2) -+ perms.num = xs_count_strings(in->buffer, in->used); -+ if (perms.num < 2) - return EINVAL; - - /* First arg is node name. */ -@@ -1258,21 +1255,21 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) - return errno; - - permstr = in->buffer + strlen(in->buffer) + 1; -- num--; -+ perms.num--; - -- perms = talloc_array(node, struct xs_permissions, num); -- if (!perms) -+ perms.p = talloc_array(node, struct xs_permissions, perms.num); -+ if (!perms.p) - return ENOMEM; -- if (!xs_strings_to_perms(perms, num, permstr)) -+ if (!xs_strings_to_perms(perms.p, perms.num, permstr)) - return errno; - - /* Unprivileged domains may not change the owner. */ -- if (domain_is_unprivileged(conn) && perms[0].id != node->perms[0].id) -+ if (domain_is_unprivileged(conn) && -+ perms.p[0].id != node->perms.p[0].id) - return EPERM; - - domain_entry_dec(conn, node); - node->perms = perms; -- node->num_perms = num; - domain_entry_inc(conn, node); - - if (write_node(conn, node, false)) -@@ -1547,8 +1544,8 @@ static void manual_node(const char *name, const char *child) - barf_perror("Could not allocate initial node %s", name); - - node->name = name; -- node->perms = &perms; -- node->num_perms = 1; -+ node->perms.p = &perms; -+ node->perms.num = 1; - node->children = (char *)child; - if (child) - node->childlen = strlen(child) + 1; -diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h -index 29d638fbc5a0..47ba0916dbe2 100644 ---- a/tools/xenstore/xenstored_core.h -+++ b/tools/xenstore/xenstored_core.h -@@ -109,6 +109,11 @@ struct connection - }; - extern struct list_head connections; - -+struct node_perms { -+ unsigned int num; -+ struct xs_permissions *p; -+}; -+ - struct node { - const char *name; - -@@ -120,8 +125,7 @@ struct node { - #define NO_GENERATION ~((uint64_t)0) - - /* Permissions. */ -- unsigned int num_perms; -- struct xs_permissions *perms; -+ struct node_perms perms; - - /* Contents. */ - unsigned int datalen; -diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c -index 2d0d87ee89e1..aa9942fcc267 100644 ---- a/tools/xenstore/xenstored_domain.c -+++ b/tools/xenstore/xenstored_domain.c -@@ -650,12 +650,12 @@ void domain_entry_inc(struct connection *conn, struct node *node) - if (!conn) - return; - -- if (node->perms && node->perms[0].id != conn->id) { -+ if (node->perms.p && node->perms.p[0].id != conn->id) { - if (conn->transaction) { - transaction_entry_inc(conn->transaction, -- node->perms[0].id); -+ node->perms.p[0].id); - } else { -- d = find_domain_by_domid(node->perms[0].id); -+ d = find_domain_by_domid(node->perms.p[0].id); - if (d) - d->nbentry++; - } -@@ -676,12 +676,12 @@ void domain_entry_dec(struct connection *conn, struct node *node) - if (!conn) - return; - -- if (node->perms && node->perms[0].id != conn->id) { -+ if (node->perms.p && node->perms.p[0].id != conn->id) { - if (conn->transaction) { - transaction_entry_dec(conn->transaction, -- node->perms[0].id); -+ node->perms.p[0].id); - } else { -- d = find_domain_by_domid(node->perms[0].id); -+ d = find_domain_by_domid(node->perms.p[0].id); - if (d && d->nbentry) - d->nbentry--; - } --- -2.17.1 - diff --git a/xsa115-4.14-c-0009-tools-xenstore-allow-special-watches-for-privileged-.patch b/xsa115-4.14-c-0009-tools-xenstore-allow-special-watches-for-privileged-.patch deleted file mode 100644 index e1e8942..0000000 --- a/xsa115-4.14-c-0009-tools-xenstore-allow-special-watches-for-privileged-.patch +++ /dev/null @@ -1,237 +0,0 @@ -From b9fff4b7ad6b41db860a43d35c401847fef789cb Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Thu, 11 Jun 2020 16:12:45 +0200 -Subject: [PATCH 09/10] tools/xenstore: allow special watches for privileged - callers only - -The special watches "@introduceDomain" and "@releaseDomain" should be -allowed for privileged callers only, as they allow to gain information -about presence of other guests on the host. So send watch events for -those watches via privileged connections only. - -In order to allow for disaggregated setups where e.g. driver domains -need to make use of those special watches add support for calling -"set permissions" for those special nodes, too. - -This is part of XSA-115. - -Signed-off-by: Juergen Gross -Reviewed-by: Julien Grall -Reviewed-by: Paul Durrant ---- - docs/misc/xenstore.txt | 5 +++ - tools/xenstore/xenstored_core.c | 27 ++++++++------ - tools/xenstore/xenstored_core.h | 2 ++ - tools/xenstore/xenstored_domain.c | 60 +++++++++++++++++++++++++++++++ - tools/xenstore/xenstored_domain.h | 5 +++ - tools/xenstore/xenstored_watch.c | 4 +++ - 6 files changed, 93 insertions(+), 10 deletions(-) - -diff --git a/docs/misc/xenstore.txt b/docs/misc/xenstore.txt -index cb8009cb686d..2081f20f55e4 100644 ---- a/docs/misc/xenstore.txt -+++ b/docs/misc/xenstore.txt -@@ -170,6 +170,9 @@ SET_PERMS ||+? - n no access - See https://wiki.xen.org/wiki/XenBus section - `Permissions' for details of the permissions system. -+ It is possible to set permissions for the special watch paths -+ "@introduceDomain" and "@releaseDomain" to enable receiving those -+ watches in unprivileged domains. - - ---------- Watches ---------- - -@@ -194,6 +197,8 @@ WATCH ||? - @releaseDomain occurs on any domain crash or - shutdown, and also on RELEASE - and domain destruction -+ events are sent to privileged callers or explicitly -+ via SET_PERMS enabled domains only. - - When a watch is first set up it is triggered once straight - away, with equal to . Watches may be triggered -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index 79d305fbbe58..15ffbeb30f19 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -470,8 +470,8 @@ static int write_node(struct connection *conn, struct node *node, - return write_node_raw(conn, &key, node, no_quota_check); - } - --static enum xs_perm_type perm_for_conn(struct connection *conn, -- const struct node_perms *perms) -+enum xs_perm_type perm_for_conn(struct connection *conn, -+ const struct node_perms *perms) - { - unsigned int i; - enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER; -@@ -1247,22 +1247,29 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) - if (perms.num < 2) - return EINVAL; - -- /* First arg is node name. */ -- /* We must own node to do this (tools can do this too). */ -- node = get_node_canonicalized(conn, in, in->buffer, &name, -- XS_PERM_WRITE | XS_PERM_OWNER); -- if (!node) -- return errno; -- - permstr = in->buffer + strlen(in->buffer) + 1; - perms.num--; - -- perms.p = talloc_array(node, struct xs_permissions, perms.num); -+ perms.p = talloc_array(in, struct xs_permissions, perms.num); - if (!perms.p) - return ENOMEM; - if (!xs_strings_to_perms(perms.p, perms.num, permstr)) - return errno; - -+ /* First arg is node name. */ -+ if (strstarts(in->buffer, "@")) { -+ if (set_perms_special(conn, in->buffer, &perms)) -+ return errno; -+ send_ack(conn, XS_SET_PERMS); -+ return 0; -+ } -+ -+ /* We must own node to do this (tools can do this too). */ -+ node = get_node_canonicalized(conn, in, in->buffer, &name, -+ XS_PERM_WRITE | XS_PERM_OWNER); -+ if (!node) -+ return errno; -+ - /* Unprivileged domains may not change the owner. */ - if (domain_is_unprivileged(conn) && - perms.p[0].id != node->perms.p[0].id) -diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h -index 47ba0916dbe2..53f1050859fc 100644 ---- a/tools/xenstore/xenstored_core.h -+++ b/tools/xenstore/xenstored_core.h -@@ -165,6 +165,8 @@ struct node *get_node(struct connection *conn, - struct connection *new_connection(connwritefn_t *write, connreadfn_t *read); - void check_store(void); - void corrupt(struct connection *conn, const char *fmt, ...); -+enum xs_perm_type perm_for_conn(struct connection *conn, -+ const struct node_perms *perms); - - /* Is this a valid node name? */ - bool is_valid_nodename(const char *node); -diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c -index aa9942fcc267..a0d1a11c837f 100644 ---- a/tools/xenstore/xenstored_domain.c -+++ b/tools/xenstore/xenstored_domain.c -@@ -41,6 +41,9 @@ static evtchn_port_t virq_port; - - xenevtchn_handle *xce_handle = NULL; - -+static struct node_perms dom_release_perms; -+static struct node_perms dom_introduce_perms; -+ - struct domain - { - struct list_head list; -@@ -582,6 +585,59 @@ void restore_existing_connections(void) - { - } - -+static int set_dom_perms_default(struct node_perms *perms) -+{ -+ perms->num = 1; -+ perms->p = talloc_array(NULL, struct xs_permissions, perms->num); -+ if (!perms->p) -+ return -1; -+ perms->p->id = 0; -+ perms->p->perms = XS_PERM_NONE; -+ -+ return 0; -+} -+ -+static struct node_perms *get_perms_special(const char *name) -+{ -+ if (!strcmp(name, "@releaseDomain")) -+ return &dom_release_perms; -+ if (!strcmp(name, "@introduceDomain")) -+ return &dom_introduce_perms; -+ return NULL; -+} -+ -+int set_perms_special(struct connection *conn, const char *name, -+ struct node_perms *perms) -+{ -+ struct node_perms *p; -+ -+ p = get_perms_special(name); -+ if (!p) -+ return EINVAL; -+ -+ if ((perm_for_conn(conn, p) & (XS_PERM_WRITE | XS_PERM_OWNER)) != -+ (XS_PERM_WRITE | XS_PERM_OWNER)) -+ return EACCES; -+ -+ p->num = perms->num; -+ talloc_free(p->p); -+ p->p = perms->p; -+ talloc_steal(NULL, perms->p); -+ -+ return 0; -+} -+ -+bool check_perms_special(const char *name, struct connection *conn) -+{ -+ struct node_perms *p; -+ -+ p = get_perms_special(name); -+ if (!p) -+ return false; -+ -+ return perm_for_conn(conn, p) & XS_PERM_READ; -+} -+ - static int dom0_init(void) - { - evtchn_port_t port; -@@ -603,6 +659,10 @@ static int dom0_init(void) - - xenevtchn_notify(xce_handle, dom0->port); - -+ if (set_dom_perms_default(&dom_release_perms) || -+ set_dom_perms_default(&dom_introduce_perms)) -+ return -1; -+ - return 0; - } - -diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h -index 56ae01597475..259183962a9c 100644 ---- a/tools/xenstore/xenstored_domain.h -+++ b/tools/xenstore/xenstored_domain.h -@@ -65,6 +65,11 @@ void domain_watch_inc(struct connection *conn); - void domain_watch_dec(struct connection *conn); - int domain_watch(struct connection *conn); - -+/* Special node permission handling. */ -+int set_perms_special(struct connection *conn, const char *name, -+ struct node_perms *perms); -+bool check_perms_special(const char *name, struct connection *conn); -+ - /* Write rate limiting */ - - #define WRL_FACTOR 1000 /* for fixed-point arithmetic */ -diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c -index 3836675459fa..f4e289362eb6 100644 ---- a/tools/xenstore/xenstored_watch.c -+++ b/tools/xenstore/xenstored_watch.c -@@ -133,6 +133,10 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name, - - /* Create an event for each watch. */ - list_for_each_entry(i, &connections, list) { -+ /* introduce/release domain watches */ -+ if (check_special_event(name) && !check_perms_special(name, i)) -+ continue; -+ - list_for_each_entry(watch, &i->watches, list) { - if (exact) { - if (streq(name, watch->node)) --- -2.17.1 - diff --git a/xsa115-4.14-c-0010-tools-xenstore-avoid-watch-events-for-nodes-without-.patch b/xsa115-4.14-c-0010-tools-xenstore-avoid-watch-events-for-nodes-without-.patch deleted file mode 100644 index b09153c..0000000 --- a/xsa115-4.14-c-0010-tools-xenstore-avoid-watch-events-for-nodes-without-.patch +++ /dev/null @@ -1,374 +0,0 @@ -From f1cc47b0572b337269af7e34bd019584f4b8c98e Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Thu, 11 Jun 2020 16:12:46 +0200 -Subject: [PATCH 10/10] tools/xenstore: avoid watch events for nodes without - access - -Today watch events are sent regardless of the access rights of the -node the event is sent for. This enables any guest to e.g. setup a -watch for "/" in order to have a detailed record of all Xenstore -modifications. - -Modify that by sending only watch events for nodes that the watcher -has a chance to see otherwise (either via direct reads or by querying -the children of a node). This includes cases where the visibility of -a node for a watcher is changing (permissions being removed). - -This is part of XSA-115. - -Signed-off-by: Juergen Gross -Reviewed-by: Julien Grall -Reviewed-by: Paul Durrant ---- - tools/xenstore/xenstored_core.c | 28 +++++----- - tools/xenstore/xenstored_core.h | 15 ++++-- - tools/xenstore/xenstored_domain.c | 6 +-- - tools/xenstore/xenstored_transaction.c | 21 +++++++- - tools/xenstore/xenstored_watch.c | 75 +++++++++++++++++++------- - tools/xenstore/xenstored_watch.h | 2 +- - 6 files changed, 104 insertions(+), 43 deletions(-) - -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index 15ffbeb30f19..92bfd54cff62 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -360,8 +360,8 @@ static void initialize_fds(int *p_sock_pollfd_idx, int *p_ro_sock_pollfd_idx, - * If it fails, returns NULL and sets errno. - * Temporary memory allocations will be done with ctx. - */ --static struct node *read_node(struct connection *conn, const void *ctx, -- const char *name) -+struct node *read_node(struct connection *conn, const void *ctx, -+ const char *name) - { - TDB_DATA key, data; - struct xs_tdb_record_hdr *hdr; -@@ -496,7 +496,7 @@ enum xs_perm_type perm_for_conn(struct connection *conn, - * Get name of node parent. - * Temporary memory allocations are done with ctx. - */ --static char *get_parent(const void *ctx, const char *node) -+char *get_parent(const void *ctx, const char *node) - { - char *parent; - char *slash = strrchr(node + 1, '/'); -@@ -568,10 +568,10 @@ static int errno_from_parents(struct connection *conn, const void *ctx, - * If it fails, returns NULL and sets errno. - * Temporary memory allocations are done with ctx. - */ --struct node *get_node(struct connection *conn, -- const void *ctx, -- const char *name, -- enum xs_perm_type perm) -+static struct node *get_node(struct connection *conn, -+ const void *ctx, -+ const char *name, -+ enum xs_perm_type perm) - { - struct node *node; - -@@ -1058,7 +1058,7 @@ static int do_write(struct connection *conn, struct buffered_data *in) - return errno; - } - -- fire_watches(conn, in, name, false); -+ fire_watches(conn, in, name, node, false, NULL); - send_ack(conn, XS_WRITE); - - return 0; -@@ -1080,7 +1080,7 @@ static int do_mkdir(struct connection *conn, struct buffered_data *in) - node = create_node(conn, in, name, NULL, 0); - if (!node) - return errno; -- fire_watches(conn, in, name, false); -+ fire_watches(conn, in, name, node, false, NULL); - } - send_ack(conn, XS_MKDIR); - -@@ -1143,7 +1143,7 @@ static int delete_node(struct connection *conn, const void *ctx, - talloc_free(name); - } - -- fire_watches(conn, ctx, node->name, true); -+ fire_watches(conn, ctx, node->name, node, true, NULL); - delete_node_single(conn, node); - delete_child(conn, parent, basename(node->name)); - talloc_free(node); -@@ -1167,13 +1167,14 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node, - parent = read_node(conn, ctx, parentname); - if (!parent) - return (errno == ENOMEM) ? ENOMEM : EINVAL; -+ node->parent = parent; - - /* - * Fire the watches now, when we can still see the node permissions. - * This fine as we are single threaded and the next possible read will - * be handled only after the node has been really removed. - */ -- fire_watches(conn, ctx, name, false); -+ fire_watches(conn, ctx, name, node, false, NULL); - return delete_node(conn, ctx, parent, node); - } - -@@ -1239,7 +1240,7 @@ static int do_get_perms(struct connection *conn, struct buffered_data *in) - - static int do_set_perms(struct connection *conn, struct buffered_data *in) - { -- struct node_perms perms; -+ struct node_perms perms, old_perms; - char *name, *permstr; - struct node *node; - -@@ -1275,6 +1276,7 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) - perms.p[0].id != node->perms.p[0].id) - return EPERM; - -+ old_perms = node->perms; - domain_entry_dec(conn, node); - node->perms = perms; - domain_entry_inc(conn, node); -@@ -1282,7 +1284,7 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) - if (write_node(conn, node, false)) - return errno; - -- fire_watches(conn, in, name, false); -+ fire_watches(conn, in, name, node, false, &old_perms); - send_ack(conn, XS_SET_PERMS); - - return 0; -diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h -index 53f1050859fc..eb19b71f5f46 100644 ---- a/tools/xenstore/xenstored_core.h -+++ b/tools/xenstore/xenstored_core.h -@@ -152,15 +152,17 @@ void send_ack(struct connection *conn, enum xsd_sockmsg_type type); - /* Canonicalize this path if possible. */ - char *xenstore_canonicalize(struct connection *conn, const void *ctx, const char *node); - -+/* Get access permissions. */ -+enum xs_perm_type perm_for_conn(struct connection *conn, -+ const struct node_perms *perms); -+ - /* Write a node to the tdb data base. */ - int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node, - bool no_quota_check); - --/* Get this node, checking we have permissions. */ --struct node *get_node(struct connection *conn, -- const void *ctx, -- const char *name, -- enum xs_perm_type perm); -+/* Get a node from the tdb data base. */ -+struct node *read_node(struct connection *conn, const void *ctx, -+ const char *name); - - struct connection *new_connection(connwritefn_t *write, connreadfn_t *read); - void check_store(void); -@@ -171,6 +173,9 @@ enum xs_perm_type perm_for_conn(struct connection *conn, - /* Is this a valid node name? */ - bool is_valid_nodename(const char *node); - -+/* Get name of parent node. */ -+char *get_parent(const void *ctx, const char *node); -+ - /* Tracing infrastructure. */ - void trace_create(const void *data, const char *type); - void trace_destroy(const void *data, const char *type); -diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c -index a0d1a11c837f..9fad470f8331 100644 ---- a/tools/xenstore/xenstored_domain.c -+++ b/tools/xenstore/xenstored_domain.c -@@ -202,7 +202,7 @@ static int destroy_domain(void *_domain) - unmap_interface(domain->interface); - } - -- fire_watches(NULL, domain, "@releaseDomain", false); -+ fire_watches(NULL, domain, "@releaseDomain", NULL, false, NULL); - - wrl_domain_destroy(domain); - -@@ -240,7 +240,7 @@ static void domain_cleanup(void) - } - - if (notify) -- fire_watches(NULL, NULL, "@releaseDomain", false); -+ fire_watches(NULL, NULL, "@releaseDomain", NULL, false, NULL); - } - - /* We scan all domains rather than use the information given here. */ -@@ -404,7 +404,7 @@ int do_introduce(struct connection *conn, struct buffered_data *in) - /* Now domain belongs to its connection. */ - talloc_steal(domain->conn, domain); - -- fire_watches(NULL, in, "@introduceDomain", false); -+ fire_watches(NULL, in, "@introduceDomain", NULL, false, NULL); - } else { - /* Use XS_INTRODUCE for recreating the xenbus event-channel. */ - if (domain->port) -diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c -index e87897573469..a7d8c5d475ec 100644 ---- a/tools/xenstore/xenstored_transaction.c -+++ b/tools/xenstore/xenstored_transaction.c -@@ -114,6 +114,9 @@ struct accessed_node - /* Generation count (or NO_GENERATION) for conflict checking. */ - uint64_t generation; - -+ /* Original node permissions. */ -+ struct node_perms perms; -+ - /* Generation count checking required? */ - bool check_gen; - -@@ -260,6 +263,15 @@ int access_node(struct connection *conn, struct node *node, - i->node = talloc_strdup(i, node->name); - if (!i->node) - goto nomem; -+ if (node->generation != NO_GENERATION && node->perms.num) { -+ i->perms.p = talloc_array(i, struct xs_permissions, -+ node->perms.num); -+ if (!i->perms.p) -+ goto nomem; -+ i->perms.num = node->perms.num; -+ memcpy(i->perms.p, node->perms.p, -+ i->perms.num * sizeof(*i->perms.p)); -+ } - - introduce = true; - i->ta_node = false; -@@ -368,9 +380,14 @@ static int finalize_transaction(struct connection *conn, - talloc_free(data.dptr); - if (ret) - goto err; -- } else if (tdb_delete(tdb_ctx, key)) -+ fire_watches(conn, trans, i->node, NULL, false, -+ i->perms.p ? &i->perms : NULL); -+ } else { -+ fire_watches(conn, trans, i->node, NULL, false, -+ i->perms.p ? &i->perms : NULL); -+ if (tdb_delete(tdb_ctx, key)) - goto err; -- fire_watches(conn, trans, i->node, false); -+ } - } - - if (i->ta_node && tdb_delete(tdb_ctx, ta_key)) -diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c -index f4e289362eb6..71c108ea99f1 100644 ---- a/tools/xenstore/xenstored_watch.c -+++ b/tools/xenstore/xenstored_watch.c -@@ -85,22 +85,6 @@ static void add_event(struct connection *conn, - unsigned int len; - char *data; - -- if (!check_special_event(name)) { -- /* Can this conn load node, or see that it doesn't exist? */ -- struct node *node = get_node(conn, ctx, name, XS_PERM_READ); -- /* -- * XXX We allow EACCES here because otherwise a non-dom0 -- * backend driver cannot watch for disappearance of a frontend -- * xenstore directory. When the directory disappears, we -- * revert to permissions of the parent directory for that path, -- * which will typically disallow access for the backend. -- * But this breaks device-channel teardown! -- * Really we should fix this better... -- */ -- if (!node && errno != ENOENT && errno != EACCES) -- return; -- } -- - if (watch->relative_path) { - name += strlen(watch->relative_path); - if (*name == '/') /* Could be "" */ -@@ -117,12 +101,60 @@ static void add_event(struct connection *conn, - talloc_free(data); - } - -+/* -+ * Check permissions of a specific watch to fire: -+ * Either the node itself or its parent have to be readable by the connection -+ * the watch has been setup for. In case a watch event is created due to -+ * changed permissions we need to take the old permissions into account, too. -+ */ -+static bool watch_permitted(struct connection *conn, const void *ctx, -+ const char *name, struct node *node, -+ struct node_perms *perms) -+{ -+ enum xs_perm_type perm; -+ struct node *parent; -+ char *parent_name; -+ -+ if (perms) { -+ perm = perm_for_conn(conn, perms); -+ if (perm & XS_PERM_READ) -+ return true; -+ } -+ -+ if (!node) { -+ node = read_node(conn, ctx, name); -+ if (!node) -+ return false; -+ } -+ -+ perm = perm_for_conn(conn, &node->perms); -+ if (perm & XS_PERM_READ) -+ return true; -+ -+ parent = node->parent; -+ if (!parent) { -+ parent_name = get_parent(ctx, node->name); -+ if (!parent_name) -+ return false; -+ parent = read_node(conn, ctx, parent_name); -+ if (!parent) -+ return false; -+ } -+ -+ perm = perm_for_conn(conn, &parent->perms); -+ -+ return perm & XS_PERM_READ; -+} -+ - /* - * Check whether any watch events are to be sent. - * Temporary memory allocations are done with ctx. -+ * We need to take the (potential) old permissions of the node into account -+ * as a watcher losing permissions to access a node should receive the -+ * watch event, too. - */ - void fire_watches(struct connection *conn, const void *ctx, const char *name, -- bool exact) -+ struct node *node, bool exact, struct node_perms *perms) - { - struct connection *i; - struct watch *watch; -@@ -134,8 +166,13 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name, - /* Create an event for each watch. */ - list_for_each_entry(i, &connections, list) { - /* introduce/release domain watches */ -- if (check_special_event(name) && !check_perms_special(name, i)) -- continue; -+ if (check_special_event(name)) { -+ if (!check_perms_special(name, i)) -+ continue; -+ } else { -+ if (!watch_permitted(i, ctx, name, node, perms)) -+ continue; -+ } - - list_for_each_entry(watch, &i->watches, list) { - if (exact) { -diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h -index 1b3c80d3dda1..03094374f379 100644 ---- a/tools/xenstore/xenstored_watch.h -+++ b/tools/xenstore/xenstored_watch.h -@@ -26,7 +26,7 @@ int do_unwatch(struct connection *conn, struct buffered_data *in); - - /* Fire all watches: !exact means all the children are affected (ie. rm). */ - void fire_watches(struct connection *conn, const void *tmp, const char *name, -- bool exact); -+ struct node *node, bool exact, struct node_perms *perms); - - void conn_delete_all_watches(struct connection *conn); - --- -2.17.1 - diff --git a/xsa115-o-0001-tools-ocaml-xenstored-ignore-transaction-id-for-un-w.patch b/xsa115-o-0001-tools-ocaml-xenstored-ignore-transaction-id-for-un-w.patch deleted file mode 100644 index 0072c68..0000000 --- a/xsa115-o-0001-tools-ocaml-xenstored-ignore-transaction-id-for-un-w.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= -Subject: tools/ocaml/xenstored: ignore transaction id for [un]watch -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Instead of ignoring the transaction id for XS_WATCH and XS_UNWATCH -commands as it is documented in docs/misc/xenstore.txt, it is tested -for validity today. - -Really ignore the transaction id for XS_WATCH and XS_UNWATCH. - -This is part of XSA-115. - -Signed-off-by: Edwin Török -Acked-by: Christian Lindig -Reviewed-by: Andrew Cooper - -diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml -index ff5c9484fc..2fa6798e3b 100644 ---- a/tools/ocaml/xenstored/process.ml -+++ b/tools/ocaml/xenstored/process.ml -@@ -498,12 +498,19 @@ let retain_op_in_history ty = - | Xenbus.Xb.Op.Reset_watches - | Xenbus.Xb.Op.Invalid -> false - -+let maybe_ignore_transaction = function -+ | Xenbus.Xb.Op.Watch | Xenbus.Xb.Op.Unwatch -> fun tid -> -+ if tid <> Transaction.none then -+ debug "Ignoring transaction ID %d for watch/unwatch" tid; -+ Transaction.none -+ | _ -> fun x -> x -+ - (** - * Nothrow guarantee. - *) - let process_packet ~store ~cons ~doms ~con ~req = - let ty = req.Packet.ty in -- let tid = req.Packet.tid in -+ let tid = maybe_ignore_transaction ty req.Packet.tid in - let rid = req.Packet.rid in - try - let fct = function_of_type ty in diff --git a/xsa115-o-0002-tools-ocaml-xenstored-check-privilege-for-XS_IS_DOMA.patch b/xsa115-o-0002-tools-ocaml-xenstored-check-privilege-for-XS_IS_DOMA.patch deleted file mode 100644 index 26033c7..0000000 --- a/xsa115-o-0002-tools-ocaml-xenstored-check-privilege-for-XS_IS_DOMA.patch +++ /dev/null @@ -1,30 +0,0 @@ -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= -Subject: tools/ocaml/xenstored: check privilege for XS_IS_DOMAIN_INTRODUCED -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The Xenstore command XS_IS_DOMAIN_INTRODUCED should be possible for privileged -domains only (the only user in the tree is the xenpaging daemon). - -This is part of XSA-115. - -Signed-off-by: Edwin Török -Acked-by: Christian Lindig -Reviewed-by: Andrew Cooper - -diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml -index 2fa6798e3b..fd79ef564f 100644 ---- a/tools/ocaml/xenstored/process.ml -+++ b/tools/ocaml/xenstored/process.ml -@@ -166,7 +166,9 @@ let do_setperms con t _domains _cons data = - let do_error _con _t _domains _cons _data = - raise Define.Unknown_operation - --let do_isintroduced _con _t domains _cons data = -+let do_isintroduced con _t domains _cons data = -+ if not (Connection.is_dom0 con) -+ then raise Define.Permission_denied; - let domid = - match (split None '\000' data) with - | domid :: _ -> int_of_string domid diff --git a/xsa115-o-0003-tools-ocaml-xenstored-unify-watch-firing.patch b/xsa115-o-0003-tools-ocaml-xenstored-unify-watch-firing.patch deleted file mode 100644 index fea94a9..0000000 --- a/xsa115-o-0003-tools-ocaml-xenstored-unify-watch-firing.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= -Subject: tools/ocaml/xenstored: unify watch firing -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This will make it easier insert additional checks in a follow-up patch. -All watches are now fired from a single function. - -This is part of XSA-115. - -Signed-off-by: Edwin Török -Acked-by: Christian Lindig -Reviewed-by: Andrew Cooper - -diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml -index 24750ada43..e5df62d9e7 100644 ---- a/tools/ocaml/xenstored/connection.ml -+++ b/tools/ocaml/xenstored/connection.ml -@@ -210,8 +210,7 @@ let fire_watch watch path = - end else - path - in -- let data = Utils.join_by_null [ new_path; watch.token; "" ] in -- send_reply watch.con Transaction.none 0 Xenbus.Xb.Op.Watchevent data -+ fire_single_watch { watch with path = new_path } - - (* Search for a valid unused transaction id. *) - let rec valid_transaction_id con proposed_id = diff --git a/xsa115-o-0004-tools-ocaml-xenstored-introduce-permissions-for-spec.patch b/xsa115-o-0004-tools-ocaml-xenstored-introduce-permissions-for-spec.patch deleted file mode 100644 index 76f98e9..0000000 --- a/xsa115-o-0004-tools-ocaml-xenstored-introduce-permissions-for-spec.patch +++ /dev/null @@ -1,117 +0,0 @@ -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= -Subject: tools/ocaml/xenstored: introduce permissions for special watches -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The special watches "@introduceDomain" and "@releaseDomain" should be -allowed for privileged callers only, as they allow to gain information -about presence of other guests on the host. So send watch events for -those watches via privileged connections only. - -Start to address this by treating the special watches as regular nodes -in the tree, which gives them normal semantics for permissions. A later -change will restrict the handling, so that they can't be listed, etc. - -This is part of XSA-115. - -Signed-off-by: Edwin Török -Acked-by: Christian Lindig -Reviewed-by: Andrew Cooper - -diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml -index fd79ef564f..e528d1ecb2 100644 ---- a/tools/ocaml/xenstored/process.ml -+++ b/tools/ocaml/xenstored/process.ml -@@ -420,7 +420,7 @@ let do_introduce con _t domains cons data = - else try - let ndom = Domains.create domains domid mfn port in - Connections.add_domain cons ndom; -- Connections.fire_spec_watches cons "@introduceDomain"; -+ Connections.fire_spec_watches cons Store.Path.introduce_domain; - ndom - with _ -> raise Invalid_Cmd_Args - in -@@ -439,7 +439,7 @@ let do_release con _t domains cons data = - Domains.del domains domid; - Connections.del_domain cons domid; - if fire_spec_watches -- then Connections.fire_spec_watches cons "@releaseDomain" -+ then Connections.fire_spec_watches cons Store.Path.release_domain - else raise Invalid_Cmd_Args - - let do_resume con _t domains _cons data = -diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml -index 92b6289b5e..52b88b3ee1 100644 ---- a/tools/ocaml/xenstored/store.ml -+++ b/tools/ocaml/xenstored/store.ml -@@ -214,6 +214,11 @@ let rec lookup node path fct = - - let apply rnode path fct = - lookup rnode path fct -+ -+let introduce_domain = "@introduceDomain" -+let release_domain = "@releaseDomain" -+let specials = List.map of_string [ introduce_domain; release_domain ] -+ - end - - (* The Store.t type *) -diff --git a/tools/ocaml/xenstored/utils.ml b/tools/ocaml/xenstored/utils.ml -index b252db799b..e8c9fe4e94 100644 ---- a/tools/ocaml/xenstored/utils.ml -+++ b/tools/ocaml/xenstored/utils.ml -@@ -88,19 +88,17 @@ let read_file_single_integer filename = - Unix.close fd; - int_of_string (Bytes.sub_string buf 0 sz) - --let path_complete path connection_path = -- if String.get path 0 <> '/' then -- connection_path ^ path -- else -- path -- -+(* @path may be guest data and needs its length validating. @connection_path -+ * is generated locally in xenstored and always of the form "/local/domain/$N/" *) - let path_validate path connection_path = -- if String.length path = 0 || String.length path > 1024 then -- raise Define.Invalid_path -- else -- let cpath = path_complete path connection_path in -- if String.get cpath 0 <> '/' then -- raise Define.Invalid_path -- else -- cpath -+ let len = String.length path in -+ -+ if len = 0 || len > 1024 then raise Define.Invalid_path; -+ -+ let abs_path = -+ match String.get path 0 with -+ | '/' | '@' -> path -+ | _ -> connection_path ^ path -+ in - -+ abs_path -diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml -index 7e7824761b..8d0c50bfa4 100644 ---- a/tools/ocaml/xenstored/xenstored.ml -+++ b/tools/ocaml/xenstored/xenstored.ml -@@ -286,6 +286,8 @@ let _ = - let quit = ref false in - - Logging.init_xenstored_log(); -+ List.iter (fun path -> -+ Store.write store Perms.Connection.full_rights path "") Store.Path.specials; - - let filename = Paths.xen_run_stored ^ "/db" in - if cf.restart && Sys.file_exists filename then ( -@@ -335,7 +337,7 @@ let _ = - let (notify, deaddom) = Domains.cleanup domains in - List.iter (Connections.del_domain cons) deaddom; - if deaddom <> [] || notify then -- Connections.fire_spec_watches cons "@releaseDomain" -+ Connections.fire_spec_watches cons Store.Path.release_domain - ) - else - let c = Connections.find_domain_by_port cons port in diff --git a/xsa115-o-0005-tools-ocaml-xenstored-avoid-watch-events-for-nodes-w.patch b/xsa115-o-0005-tools-ocaml-xenstored-avoid-watch-events-for-nodes-w.patch deleted file mode 100644 index 866d415..0000000 --- a/xsa115-o-0005-tools-ocaml-xenstored-avoid-watch-events-for-nodes-w.patch +++ /dev/null @@ -1,406 +0,0 @@ -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= -Subject: tools/ocaml/xenstored: avoid watch events for nodes without access -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Today watch events are sent regardless of the access rights of the -node the event is sent for. This enables any guest to e.g. setup a -watch for "/" in order to have a detailed record of all Xenstore -modifications. - -Modify that by sending only watch events for nodes that the watcher -has a chance to see otherwise (either via direct reads or by querying -the children of a node). This includes cases where the visibility of -a node for a watcher is changing (permissions being removed). - -Permissions for nodes are looked up either in the old (pre -transaction/command) or current trees (post transaction). If -permissions are changed multiple times in a transaction only the final -version is checked, because considering a transaction atomic the -individual permission changes would not be noticable to an outside -observer. - -Two trees are only needed for set_perms: here we can either notice the -node disappearing (if we loose permission), appearing -(if we gain permission), or changing (if we preserve permission). - -RM needs to only look at the old tree: in the new tree the node would be -gone, or could have different permissions if it was recreated (the -recreation would get its own watch fired). - -Inside a tree we lookup the watch path's parent, and then the watch path -child itself. This gets us 4 sets of permissions in worst case, and if -either of these allows a watch, then we permit it to fire. The -permission lookups are done without logging the failures, otherwise we'd -get confusing errors about permission denied for some paths, but a watch -still firing. The actual result is logged in xenstored-access log: - - 'w event ...' as usual if watch was fired - 'w notfired...' if the watch was not fired, together with path and - permission set to help in troubleshooting - -Adding a watch bypasses permission checks and always fires the watch -once immediately. This is consistent with the specification, and no -information is gained (the watch is fired both if the path exists or -doesn't, and both if you have or don't have access, i.e. it reflects the -path a domain gave it back to that domain). - -There are some semantic changes here: - - * Write+rm in a single transaction of the same path is unobservable - now via watches: both before and after a transaction the path - doesn't exist, thus both tree lookups come up with the empty - permission set, and noone, not even Dom0 can see this. This is - consistent with transaction atomicity though. - * Similar to above if we temporarily grant and then revoke permission - on a path any watches fired inbetween are ignored as well - * There is a new log event (w notfired) which shows the permission set - of the path, and the path. - * Watches on paths that a domain doesn't have access to are now not - seen, which is the purpose of the security fix. - -This is part of XSA-115. - -Signed-off-by: Edwin Török -Acked-by: Christian Lindig -Reviewed-by: Andrew Cooper - -diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml -index e5df62d9e7..644a448f2e 100644 ---- a/tools/ocaml/xenstored/connection.ml -+++ b/tools/ocaml/xenstored/connection.ml -@@ -196,11 +196,36 @@ let list_watches con = - con.watches [] in - List.concat ll - --let fire_single_watch watch = -+let dbg fmt = Logging.debug "connection" fmt -+let info fmt = Logging.info "connection" fmt -+ -+let lookup_watch_perm path = function -+| None -> [] -+| Some root -> -+ try Store.Path.apply root path @@ fun parent name -> -+ Store.Node.get_perms parent :: -+ try [Store.Node.get_perms (Store.Node.find parent name)] -+ with Not_found -> [] -+ with Define.Invalid_path | Not_found -> [] -+ -+let lookup_watch_perms oldroot root path = -+ lookup_watch_perm path oldroot @ lookup_watch_perm path (Some root) -+ -+let fire_single_watch_unchecked watch = - let data = Utils.join_by_null [watch.path; watch.token; ""] in - send_reply watch.con Transaction.none 0 Xenbus.Xb.Op.Watchevent data - --let fire_watch watch path = -+let fire_single_watch (oldroot, root) watch = -+ let abspath = get_watch_path watch.con watch.path |> Store.Path.of_string in -+ let perms = lookup_watch_perms oldroot root abspath in -+ if List.exists (Perms.has watch.con.perm READ) perms then -+ fire_single_watch_unchecked watch -+ else -+ let perms = perms |> List.map (Perms.Node.to_string ~sep:" ") |> String.concat ", " in -+ let con = get_domstr watch.con in -+ Logging.watch_not_fired ~con perms (Store.Path.to_string abspath) -+ -+let fire_watch roots watch path = - let new_path = - if watch.is_relative && path.[0] = '/' - then begin -@@ -210,7 +235,7 @@ let fire_watch watch path = - end else - path - in -- fire_single_watch { watch with path = new_path } -+ fire_single_watch roots { watch with path = new_path } - - (* Search for a valid unused transaction id. *) - let rec valid_transaction_id con proposed_id = -diff --git a/tools/ocaml/xenstored/connections.ml b/tools/ocaml/xenstored/connections.ml -index f2c4318c88..9f9f7ee2f0 100644 ---- a/tools/ocaml/xenstored/connections.ml -+++ b/tools/ocaml/xenstored/connections.ml -@@ -135,25 +135,26 @@ let del_watch cons con path token = - watch - - (* path is absolute *) --let fire_watches cons path recurse = -+let fire_watches ?oldroot root cons path recurse = - let key = key_of_path path in - let path = Store.Path.to_string path in -+ let roots = oldroot, root in - let fire_watch _ = function - | None -> () -- | Some watches -> List.iter (fun w -> Connection.fire_watch w path) watches -+ | Some watches -> List.iter (fun w -> Connection.fire_watch roots w path) watches - in - let fire_rec _x = function - | None -> () - | Some watches -> -- List.iter (fun w -> Connection.fire_single_watch w) watches -+ List.iter (Connection.fire_single_watch roots) watches - in - Trie.iter_path fire_watch cons.watches key; - if recurse then - Trie.iter fire_rec (Trie.sub cons.watches key) - --let fire_spec_watches cons specpath = -+let fire_spec_watches root cons specpath = - iter cons (fun con -> -- List.iter (fun w -> Connection.fire_single_watch w) (Connection.get_watches con specpath)) -+ List.iter (Connection.fire_single_watch (None, root)) (Connection.get_watches con specpath)) - - let set_target cons domain target_domain = - let con = find_domain cons domain in -diff --git a/tools/ocaml/xenstored/logging.ml b/tools/ocaml/xenstored/logging.ml -index c5cba79e92..1ede131329 100644 ---- a/tools/ocaml/xenstored/logging.ml -+++ b/tools/ocaml/xenstored/logging.ml -@@ -161,6 +161,8 @@ let xenstored_log_nb_lines = ref 13215 - let xenstored_log_nb_chars = ref (-1) - let xenstored_logger = ref (None: logger option) - -+let debug_enabled () = !xenstored_log_level = Debug -+ - let set_xenstored_log_destination s = - xenstored_log_destination := log_destination_of_string s - -@@ -204,6 +206,7 @@ type access_type = - | Commit - | Newconn - | Endconn -+ | Watch_not_fired - | XbOp of Xenbus.Xb.Op.operation - - let string_of_tid ~con tid = -@@ -217,6 +220,7 @@ let string_of_access_type = function - | Commit -> "commit " - | Newconn -> "newconn " - | Endconn -> "endconn " -+ | Watch_not_fired -> "w notfired" - - | XbOp op -> match op with - | Xenbus.Xb.Op.Debug -> "debug " -@@ -331,3 +335,7 @@ let xb_answer ~tid ~con ~ty data = - | _ -> false, Debug - in - if print then access_logging ~tid ~con ~data (XbOp ty) ~level -+ -+let watch_not_fired ~con perms path = -+ let data = Printf.sprintf "EPERM perms=[%s] path=%s" perms path in -+ access_logging ~tid:0 ~con ~data Watch_not_fired ~level:Info -diff --git a/tools/ocaml/xenstored/perms.ml b/tools/ocaml/xenstored/perms.ml -index 3ea193ea14..23b80aba3d 100644 ---- a/tools/ocaml/xenstored/perms.ml -+++ b/tools/ocaml/xenstored/perms.ml -@@ -79,9 +79,9 @@ let of_string s = - let string_of_perm perm = - Printf.sprintf "%c%u" (char_of_permty (snd perm)) (fst perm) - --let to_string permvec = -+let to_string ?(sep="\000") permvec = - let l = ((permvec.owner, permvec.other) :: permvec.acl) in -- String.concat "\000" (List.map string_of_perm l) -+ String.concat sep (List.map string_of_perm l) - - end - -@@ -132,8 +132,8 @@ let check_owner (connection:Connection.t) (node:Node.t) = - then Connection.is_owner connection (Node.get_owner node) - else true - --(* check if the current connection has the requested perm on the current node *) --let check (connection:Connection.t) request (node:Node.t) = -+(* check if the current connection lacks the requested perm on the current node *) -+let lacks (connection:Connection.t) request (node:Node.t) = - let check_acl domainid = - let perm = - if List.mem_assoc domainid (Node.get_acl node) -@@ -154,11 +154,19 @@ let check (connection:Connection.t) request (node:Node.t) = - info "Permission denied: Domain %d has write only access" domainid; - false - in -- if !activate -+ !activate - && not (Connection.is_dom0 connection) - && not (check_owner connection node) - && not (List.exists check_acl (Connection.get_owners connection)) -+ -+(* check if the current connection has the requested perm on the current node. -+* Raises an exception if it doesn't. *) -+let check connection request node = -+ if lacks connection request node - then raise Define.Permission_denied - -+(* check if the current connection has the requested perm on the current node *) -+let has connection request node = not (lacks connection request node) -+ - let equiv perm1 perm2 = - (Node.to_string perm1) = (Node.to_string perm2) -diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml -index e528d1ecb2..f99b9e935c 100644 ---- a/tools/ocaml/xenstored/process.ml -+++ b/tools/ocaml/xenstored/process.ml -@@ -56,15 +56,17 @@ let split_one_path data con = - | path :: "" :: [] -> Store.Path.create path (Connection.get_path con) - | _ -> raise Invalid_Cmd_Args - --let process_watch ops cons = -+let process_watch t cons = -+ let oldroot = t.Transaction.oldroot in -+ let newroot = Store.get_root t.store in -+ let ops = Transaction.get_paths t |> List.rev in - let do_op_watch op cons = -- let recurse = match (fst op) with -- | Xenbus.Xb.Op.Write -> false -- | Xenbus.Xb.Op.Mkdir -> false -- | Xenbus.Xb.Op.Rm -> true -- | Xenbus.Xb.Op.Setperms -> false -+ let recurse, oldroot, root = match (fst op) with -+ | Xenbus.Xb.Op.Write|Xenbus.Xb.Op.Mkdir -> false, None, newroot -+ | Xenbus.Xb.Op.Rm -> true, None, oldroot -+ | Xenbus.Xb.Op.Setperms -> false, Some oldroot, newroot - | _ -> raise (Failure "huh ?") in -- Connections.fire_watches cons (snd op) recurse in -+ Connections.fire_watches ?oldroot root cons (snd op) recurse in - List.iter (fun op -> do_op_watch op cons) ops - - let create_implicit_path t perm path = -@@ -205,7 +207,7 @@ let reply_ack fct con t doms cons data = - fct con t doms cons data; - Packet.Ack (fun () -> - if Transaction.get_id t = Transaction.none then -- process_watch (Transaction.get_paths t) cons -+ process_watch t cons - ) - - let reply_data fct con t doms cons data = -@@ -353,14 +355,17 @@ let transaction_replay c t doms cons = - ignore @@ Connection.end_transaction c tid None - ) - --let do_watch con _t _domains cons data = -+let do_watch con t _domains cons data = - let (node, token) = - match (split None '\000' data) with - | [node; token; ""] -> node, token - | _ -> raise Invalid_Cmd_Args - in - let watch = Connections.add_watch cons con node token in -- Packet.Ack (fun () -> Connection.fire_single_watch watch) -+ Packet.Ack (fun () -> -+ (* xenstore.txt says this watch is fired immediately, -+ implying even if path doesn't exist or is unreadable *) -+ Connection.fire_single_watch_unchecked watch) - - let do_unwatch con _t _domains cons data = - let (node, token) = -@@ -391,7 +396,7 @@ let do_transaction_end con t domains cons data = - if not success then - raise Transaction_again; - if commit then begin -- process_watch (List.rev (Transaction.get_paths t)) cons; -+ process_watch t cons; - match t.Transaction.ty with - | Transaction.No -> - () (* no need to record anything *) -@@ -399,7 +404,7 @@ let do_transaction_end con t domains cons data = - record_commit ~con ~tid:id ~before:oldstore ~after:cstore - end - --let do_introduce con _t domains cons data = -+let do_introduce con t domains cons data = - if not (Connection.is_dom0 con) - then raise Define.Permission_denied; - let (domid, mfn, port) = -@@ -420,14 +425,14 @@ let do_introduce con _t domains cons data = - else try - let ndom = Domains.create domains domid mfn port in - Connections.add_domain cons ndom; -- Connections.fire_spec_watches cons Store.Path.introduce_domain; -+ Connections.fire_spec_watches (Transaction.get_root t) cons Store.Path.introduce_domain; - ndom - with _ -> raise Invalid_Cmd_Args - in - if (Domain.get_remote_port dom) <> port || (Domain.get_mfn dom) <> mfn then - raise Domain_not_match - --let do_release con _t domains cons data = -+let do_release con t domains cons data = - if not (Connection.is_dom0 con) - then raise Define.Permission_denied; - let domid = -@@ -439,7 +444,7 @@ let do_release con _t domains cons data = - Domains.del domains domid; - Connections.del_domain cons domid; - if fire_spec_watches -- then Connections.fire_spec_watches cons Store.Path.release_domain -+ then Connections.fire_spec_watches (Transaction.get_root t) cons Store.Path.release_domain - else raise Invalid_Cmd_Args - - let do_resume con _t domains _cons data = -@@ -507,6 +512,8 @@ let maybe_ignore_transaction = function - Transaction.none - | _ -> fun x -> x - -+ -+let () = Printexc.record_backtrace true - (** - * Nothrow guarantee. - *) -@@ -548,7 +555,8 @@ let process_packet ~store ~cons ~doms ~con ~req = - (* Put the response on the wire *) - send_response ty con t rid response - with exn -> -- error "process packet: %s" (Printexc.to_string exn); -+ let bt = Printexc.get_backtrace () in -+ error "process packet: %s. %s" (Printexc.to_string exn) bt; - Connection.send_error con tid rid "EIO" - - let do_input store cons doms con = -diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml -index 963734a653..25bc8c3b4a 100644 ---- a/tools/ocaml/xenstored/transaction.ml -+++ b/tools/ocaml/xenstored/transaction.ml -@@ -82,6 +82,7 @@ type t = { - start_count: int64; - store: Store.t; (* This is the store that we change in write operations. *) - quota: Quota.t; -+ oldroot: Store.Node.t; - mutable paths: (Xenbus.Xb.Op.operation * Store.Path.t) list; - mutable operations: (Packet.request * Packet.response) list; - mutable read_lowpath: Store.Path.t option; -@@ -123,6 +124,7 @@ let make ?(internal=false) id store = - start_count = !counter; - store = if id = none then store else Store.copy store; - quota = Quota.copy store.Store.quota; -+ oldroot = Store.get_root store; - paths = []; - operations = []; - read_lowpath = None; -@@ -137,6 +139,8 @@ let make ?(internal=false) id store = - let get_store t = t.store - let get_paths t = t.paths - -+let get_root t = Store.get_root t.store -+ - let is_read_only t = t.paths = [] - let add_wop t ty path = t.paths <- (ty, path) :: t.paths - let add_operation ~perm t request response = -diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml -index 8d0c50bfa4..f7b88065bb 100644 ---- a/tools/ocaml/xenstored/xenstored.ml -+++ b/tools/ocaml/xenstored/xenstored.ml -@@ -337,7 +337,9 @@ let _ = - let (notify, deaddom) = Domains.cleanup domains in - List.iter (Connections.del_domain cons) deaddom; - if deaddom <> [] || notify then -- Connections.fire_spec_watches cons Store.Path.release_domain -+ Connections.fire_spec_watches -+ (Store.get_root store) -+ cons Store.Path.release_domain - ) - else - let c = Connections.find_domain_by_port cons port in diff --git a/xsa115-o-0006-tools-ocaml-xenstored-add-xenstored.conf-flag-to-tur.patch b/xsa115-o-0006-tools-ocaml-xenstored-add-xenstored.conf-flag-to-tur.patch deleted file mode 100644 index d1fa8b2..0000000 --- a/xsa115-o-0006-tools-ocaml-xenstored-add-xenstored.conf-flag-to-tur.patch +++ /dev/null @@ -1,84 +0,0 @@ -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= -Subject: tools/ocaml/xenstored: add xenstored.conf flag to turn off watch - permission checks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -There are flags to turn off quotas and the permission system, so add one -that turns off the newly introduced watch permission checks as well. - -This is part of XSA-115. - -Signed-off-by: Edwin Török -Acked-by: Christian Lindig -Reviewed-by: Andrew Cooper - -diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml -index 644a448f2e..fa0d3c4d92 100644 ---- a/tools/ocaml/xenstored/connection.ml -+++ b/tools/ocaml/xenstored/connection.ml -@@ -218,7 +218,7 @@ let fire_single_watch_unchecked watch = - let fire_single_watch (oldroot, root) watch = - let abspath = get_watch_path watch.con watch.path |> Store.Path.of_string in - let perms = lookup_watch_perms oldroot root abspath in -- if List.exists (Perms.has watch.con.perm READ) perms then -+ if Perms.can_fire_watch watch.con.perm perms then - fire_single_watch_unchecked watch - else - let perms = perms |> List.map (Perms.Node.to_string ~sep:" ") |> String.concat ", " in -diff --git a/tools/ocaml/xenstored/oxenstored.conf.in b/tools/ocaml/xenstored/oxenstored.conf.in -index 151b65b72d..f843482981 100644 ---- a/tools/ocaml/xenstored/oxenstored.conf.in -+++ b/tools/ocaml/xenstored/oxenstored.conf.in -@@ -44,6 +44,16 @@ conflict-rate-limit-is-aggregate = true - # Activate node permission system - perms-activate = true - -+# Activate the watch permission system -+# When this is enabled unprivileged guests can only get watch events -+# for xenstore entries that they would've been able to read. -+# -+# When this is disabled unprivileged guests may get watch events -+# for xenstore entries that they cannot read. The watch event contains -+# only the entry name, not the value. -+# This restores behaviour prior to XSA-115. -+perms-watch-activate = true -+ - # Activate quota - quota-activate = true - quota-maxentity = 1000 -diff --git a/tools/ocaml/xenstored/perms.ml b/tools/ocaml/xenstored/perms.ml -index 23b80aba3d..ee7fee6bda 100644 ---- a/tools/ocaml/xenstored/perms.ml -+++ b/tools/ocaml/xenstored/perms.ml -@@ -20,6 +20,7 @@ let info fmt = Logging.info "perms" fmt - open Stdext - - let activate = ref true -+let watch_activate = ref true - - type permty = READ | WRITE | RDWR | NONE - -@@ -168,5 +169,9 @@ let check connection request node = - (* check if the current connection has the requested perm on the current node *) - let has connection request node = not (lacks connection request node) - -+let can_fire_watch connection perms = -+ not !watch_activate -+ || List.exists (has connection READ) perms -+ - let equiv perm1 perm2 = - (Node.to_string perm1) = (Node.to_string perm2) -diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml -index f7b88065bb..0d355bbcb8 100644 ---- a/tools/ocaml/xenstored/xenstored.ml -+++ b/tools/ocaml/xenstored/xenstored.ml -@@ -95,6 +95,7 @@ let parse_config filename = - ("conflict-max-history-seconds", Config.Set_float Define.conflict_max_history_seconds); - ("conflict-rate-limit-is-aggregate", Config.Set_bool Define.conflict_rate_limit_is_aggregate); - ("perms-activate", Config.Set_bool Perms.activate); -+ ("perms-watch-activate", Config.Set_bool Perms.watch_activate); - ("quota-activate", Config.Set_bool Quota.activate); - ("quota-maxwatch", Config.Set_int Define.maxwatch); - ("quota-transaction", Config.Set_int Define.maxtransaction); diff --git a/xsa286-4.14-0001-x86-pv-Drop-FLUSH_TLB_GLOBAL-in-do_mmu_update-for-XP.patch b/xsa286-4.14-0001-x86-pv-Drop-FLUSH_TLB_GLOBAL-in-do_mmu_update-for-XP.patch deleted file mode 100644 index 5e9109d..0000000 --- a/xsa286-4.14-0001-x86-pv-Drop-FLUSH_TLB_GLOBAL-in-do_mmu_update-for-XP.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 941f69a428cd989144300519e548e346c681a1b3 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 22 Oct 2020 11:28:58 +0100 -Subject: [PATCH 1/2] x86/pv: Drop FLUSH_TLB_GLOBAL in do_mmu_update() for XPTI - -c/s 9d1d31ad9498 "x86: slightly reduce Meltdown band-aid overhead" removed the -use of Global TLB flushes on the Xen entry path, but added a FLUSH_TLB_GLOBAL -to the L4 path in do_mmu_update(). - -However, this was unnecessary. - -It is the guests responsibility to perform appropriate TLB flushing if the L4 -modification altered an established mapping in a flush-relevant way. In this -case, an MMUEXT_OP hypercall will follow. The case which Xen needs to cover -is when new mappings are created, and the resync on the exit-to-guest path -covers this correctly. - -There is a corner case with multiple vCPUs in hypercalls at the same time, -which 9d1d31ad9498 changed, and this patch changes back to its original XPTI -behaviour. - -Architecturally, established TLB entries can continue to be used until the -broadcast flush has completed. Therefore, even with concurrent hypercalls, -the guest cannot depend on older mappings not being used until an MMUEXT_OP -hypercall completes. Xen's implementation of guest-initiated flushes will -take correct effect on top of an in-progress hypercall, picking up new mapping -setting before the other vCPU's MMUEXT_OP completes. - -Note: The correctness of this change is not impacted by whether XPTI uses -global mappings or not. Correctness there depends on the behaviour of Xen on -the entry/exit paths when switching two/from the XPTI "shadow" pagetables. - -This is (not really) XSA-286 (but necessary to simplify the logic). - -Fixes: 9d1d31ad9498 ("x86: slightly reduce Meltdown band-aid overhead") -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 055e1c3a3d95b1e753148369fbc4ba48782dd602) ---- - xen/arch/x86/mm.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 3cb6fabdae..1caa2df0a5 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -4193,7 +4193,7 @@ long do_mmu_update( - - cpumask_andnot(mask, pt_owner->dirty_cpumask, cpumask_of(cpu)); - if ( !cpumask_empty(mask) ) -- flush_mask(mask, FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL); -+ flush_mask(mask, FLUSH_ROOT_PGTBL); - } - - perfc_add(num_page_updates, i); --- -2.20.1 - diff --git a/xsa286-4.14-0002-x86-pv-Flush-TLB-in-response-to-paging-structure-cha.patch b/xsa286-4.14-0002-x86-pv-Flush-TLB-in-response-to-paging-structure-cha.patch deleted file mode 100644 index 22af1ca..0000000 --- a/xsa286-4.14-0002-x86-pv-Flush-TLB-in-response-to-paging-structure-cha.patch +++ /dev/null @@ -1,166 +0,0 @@ -From 10bb63c203f42d931fa1fa7dbbae7ce1765cecf2 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 19 Oct 2020 15:51:22 +0100 -Subject: [PATCH 2/2] x86/pv: Flush TLB in response to paging structure changes - -With MMU_UPDATE, a PV guest can make changes to higher level pagetables. This -is safe from Xen's point of view (as the update only affects guest mappings), -and the guest is required to flush (if necessary) after making updates. - -However, Xen's use of linear pagetables (UPDATE_VA_MAPPING, GNTTABOP_map, -writeable pagetables, etc.) is an implementation detail outside of the -API/ABI. - -Changes in the paging structure require invalidations in the linear pagetable -range for subsequent accesses into the linear pagetables to access non-stale -mappings. Xen must provide suitable flushing to prevent intermixed guest -actions from accidentally accessing/modifying the wrong pagetable. - -For all L2 and higher modifications, flush the TLB. PV guests cannot create -L2 or higher entries with the Global bit set, so no mappings established in -the linear range can be global. (This could in principle be an order 39 flush -starting at LINEAR_PT_VIRT_START, but no such mechanism exists in practice.) - -Express the necessary flushes as a set of booleans which accumulate across the -operation. Comment the flushing logic extensively. - -This is XSA-286. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 16a20963b3209788f2c0d3a3eebb7d92f03f5883) ---- - xen/arch/x86/mm.c | 69 ++++++++++++++++++++++++++++++++++++++++------- - 1 file changed, 59 insertions(+), 10 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 1caa2df0a5..61cf6a7b9b 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -3896,7 +3896,8 @@ long do_mmu_update( - struct vcpu *curr = current, *v = curr; - struct domain *d = v->domain, *pt_owner = d, *pg_owner; - mfn_t map_mfn = INVALID_MFN, mfn; -- bool sync_guest = false; -+ bool flush_linear_pt = false, flush_root_pt_local = false, -+ flush_root_pt_others = false; - uint32_t xsm_needed = 0; - uint32_t xsm_checked = 0; - int rc = put_old_guest_table(curr); -@@ -4046,6 +4047,8 @@ long do_mmu_update( - break; - rc = mod_l2_entry(va, l2e_from_intpte(req.val), mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, v); -+ if ( !rc ) -+ flush_linear_pt = true; - break; - - case PGT_l3_page_table: -@@ -4053,6 +4056,8 @@ long do_mmu_update( - break; - rc = mod_l3_entry(va, l3e_from_intpte(req.val), mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, v); -+ if ( !rc ) -+ flush_linear_pt = true; - break; - - case PGT_l4_page_table: -@@ -4060,6 +4065,8 @@ long do_mmu_update( - break; - rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, v); -+ if ( !rc ) -+ flush_linear_pt = true; - if ( !rc && pt_owner->arch.pv.xpti ) - { - bool local_in_use = false; -@@ -4068,7 +4075,7 @@ long do_mmu_update( - mfn) ) - { - local_in_use = true; -- get_cpu_info()->root_pgt_changed = true; -+ flush_root_pt_local = true; - } - - /* -@@ -4080,7 +4087,7 @@ long do_mmu_update( - (1 + !!(page->u.inuse.type_info & PGT_pinned) + - mfn_eq(pagetable_get_mfn(curr->arch.guest_table_user), - mfn) + local_in_use) ) -- sync_guest = true; -+ flush_root_pt_others = true; - } - break; - -@@ -4182,19 +4189,61 @@ long do_mmu_update( - if ( va ) - unmap_domain_page(va); - -- if ( sync_guest ) -+ /* -+ * Perform required TLB maintenance. -+ * -+ * This logic currently depend on flush_linear_pt being a superset of the -+ * flush_root_pt_* conditions. -+ * -+ * pt_owner may not be current->domain. This may occur during -+ * construction of 32bit PV guests, or debugging of PV guests. The -+ * behaviour cannot be correct with domain unpaused. We therefore expect -+ * pt_owner->dirty_cpumask to be empty, but it is a waste of effort to -+ * explicitly check for, and exclude, this corner case. -+ * -+ * flush_linear_pt requires a FLUSH_TLB to all dirty CPUs. The flush must -+ * be performed now to maintain correct behaviour across a multicall. -+ * i.e. we cannot relax FLUSH_TLB to FLUSH_ROOT_PGTBL, given that the -+ * former is a side effect of the latter, because the resync (which is in -+ * the return-to-guest path) happens too late. -+ * -+ * flush_root_pt_* requires FLUSH_ROOT_PGTBL on either the local CPU -+ * (implies pt_owner == current->domain and current->processor set in -+ * pt_owner->dirty_cpumask), and/or all *other* dirty CPUs as there are -+ * references we can't account for locally. -+ */ -+ if ( flush_linear_pt /* || flush_root_pt_local || flush_root_pt_others */ ) - { -+ unsigned int cpu = smp_processor_id(); -+ cpumask_t *mask = pt_owner->dirty_cpumask; -+ - /* -- * Force other vCPU-s of the affected guest to pick up L4 entry -- * changes (if any). -+ * Always handle local flushing separately (if applicable), to -+ * separate the flush invocations appropriately for scope of the two -+ * flush_root_pt_* variables. - */ -- unsigned int cpu = smp_processor_id(); -- cpumask_t *mask = per_cpu(scratch_cpumask, cpu); -+ if ( likely(cpumask_test_cpu(cpu, mask)) ) -+ { -+ mask = per_cpu(scratch_cpumask, cpu); - -- cpumask_andnot(mask, pt_owner->dirty_cpumask, cpumask_of(cpu)); -+ cpumask_copy(mask, pt_owner->dirty_cpumask); -+ __cpumask_clear_cpu(cpu, mask); -+ -+ flush_local(FLUSH_TLB | -+ (flush_root_pt_local ? FLUSH_ROOT_PGTBL : 0)); -+ } -+ else -+ /* Sanity check. flush_root_pt_local implies local cpu is dirty. */ -+ ASSERT(!flush_root_pt_local); -+ -+ /* Flush the remote dirty CPUs. Does not include the local CPU. */ - if ( !cpumask_empty(mask) ) -- flush_mask(mask, FLUSH_ROOT_PGTBL); -+ flush_mask(mask, FLUSH_TLB | -+ (flush_root_pt_others ? FLUSH_ROOT_PGTBL : 0)); - } -+ else -+ /* Sanity check. flush_root_pt_* implies flush_linear_pt. */ -+ ASSERT(!flush_root_pt_local && !flush_root_pt_others); - - perfc_add(num_page_updates, i); - --- -2.20.1 - diff --git a/xsa322-4.14-c.patch b/xsa322-4.14-c.patch deleted file mode 100644 index 5059f24..0000000 --- a/xsa322-4.14-c.patch +++ /dev/null @@ -1,532 +0,0 @@ -From: Juergen Gross -Subject: tools/xenstore: revoke access rights for removed domains - -Access rights of Xenstore nodes are per domid. Unfortunately existing -granted access rights are not removed when a domain is being destroyed. -This means that a new domain created with the same domid will inherit -the access rights to Xenstore nodes from the previous domain(s) with -the same domid. - -This can be avoided by adding a generation counter to each domain. -The generation counter of the domain is set to the global generation -counter when a domain structure is being allocated. When reading or -writing a node all permissions of domains which are younger than the -node itself are dropped. This is done by flagging the related entry -as invalid in order to avoid modifying permissions in a way the user -could detect. - -A special case has to be considered: for a new domain the first -Xenstore entries are already written before the domain is officially -introduced in Xenstore. In order not to drop the permissions for the -new domain a domain struct is allocated even before introduction if -the hypervisor is aware of the domain. This requires adding another -bool "introduced" to struct domain in xenstored. In order to avoid -additional padding holes convert the shutdown flag to bool, too. - -As verifying permissions has its price regarding runtime add a new -quota for limiting the number of permissions an unprivileged domain -can set for a node. The default for that new quota is 5. - -This is part of XSA-322. - -Signed-off-by: Juergen Gross -Reviewed-by: Paul Durrant -Acked-by: Julien Grall - -diff --git a/tools/xenstore/include/xenstore_lib.h b/tools/xenstore/include/xenstore_lib.h -index 0ffbae9eb5..4c9b6d1685 100644 ---- a/tools/xenstore/include/xenstore_lib.h -+++ b/tools/xenstore/include/xenstore_lib.h -@@ -34,6 +34,7 @@ enum xs_perm_type { - /* Internal use. */ - XS_PERM_ENOENT_OK = 4, - XS_PERM_OWNER = 8, -+ XS_PERM_IGNORE = 16, - }; - - struct xs_permissions -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index 92bfd54cff..505560a5de 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -104,6 +104,7 @@ int quota_nb_entry_per_domain = 1000; - int quota_nb_watch_per_domain = 128; - int quota_max_entry_size = 2048; /* 2K */ - int quota_max_transaction = 10; -+int quota_nb_perms_per_node = 5; - - void trace(const char *fmt, ...) - { -@@ -409,8 +410,13 @@ struct node *read_node(struct connection *conn, const void *ctx, - - /* Permissions are struct xs_permissions. */ - node->perms.p = hdr->perms; -+ if (domain_adjust_node_perms(node)) { -+ talloc_free(node); -+ return NULL; -+ } -+ - /* Data is binary blob (usually ascii, no nul). */ -- node->data = node->perms.p + node->perms.num; -+ node->data = node->perms.p + hdr->num_perms; - /* Children is strings, nul separated. */ - node->children = node->data + node->datalen; - -@@ -426,6 +432,9 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node, - void *p; - struct xs_tdb_record_hdr *hdr; - -+ if (domain_adjust_node_perms(node)) -+ return errno; -+ - data.dsize = sizeof(*hdr) - + node->perms.num * sizeof(node->perms.p[0]) - + node->datalen + node->childlen; -@@ -485,8 +494,9 @@ enum xs_perm_type perm_for_conn(struct connection *conn, - return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask; - - for (i = 1; i < perms->num; i++) -- if (perms->p[i].id == conn->id -- || (conn->target && perms->p[i].id == conn->target->id)) -+ if (!(perms->p[i].perms & XS_PERM_IGNORE) && -+ (perms->p[i].id == conn->id || -+ (conn->target && perms->p[i].id == conn->target->id))) - return perms->p[i].perms & mask; - - return perms->p[0].perms & mask; -@@ -1248,8 +1258,12 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) - if (perms.num < 2) - return EINVAL; - -- permstr = in->buffer + strlen(in->buffer) + 1; - perms.num--; -+ if (domain_is_unprivileged(conn) && -+ perms.num > quota_nb_perms_per_node) -+ return ENOSPC; -+ -+ permstr = in->buffer + strlen(in->buffer) + 1; - - perms.p = talloc_array(in, struct xs_permissions, perms.num); - if (!perms.p) -@@ -1904,6 +1918,7 @@ static void usage(void) - " -S, --entry-size limit the size of entry per domain, and\n" - " -W, --watch-nb limit the number of watches per domain,\n" - " -t, --transaction limit the number of transaction allowed per domain,\n" -+" -A, --perm-nb limit the number of permissions per node,\n" - " -R, --no-recovery to request that no recovery should be attempted when\n" - " the store is corrupted (debug only),\n" - " -I, --internal-db store database in memory, not on disk\n" -@@ -1924,6 +1939,7 @@ static struct option options[] = { - { "entry-size", 1, NULL, 'S' }, - { "trace-file", 1, NULL, 'T' }, - { "transaction", 1, NULL, 't' }, -+ { "perm-nb", 1, NULL, 'A' }, - { "no-recovery", 0, NULL, 'R' }, - { "internal-db", 0, NULL, 'I' }, - { "verbose", 0, NULL, 'V' }, -@@ -1946,7 +1962,7 @@ int main(int argc, char *argv[]) - int timeout; - - -- while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RVW:", options, -+ while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:T:RVW:", options, - NULL)) != -1) { - switch (opt) { - case 'D': -@@ -1988,6 +2004,9 @@ int main(int argc, char *argv[]) - case 'W': - quota_nb_watch_per_domain = strtol(optarg, NULL, 10); - break; -+ case 'A': -+ quota_nb_perms_per_node = strtol(optarg, NULL, 10); -+ break; - case 'e': - dom0_event = strtol(optarg, NULL, 10); - break; -diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c -index 9fad470f83..dc635e9be3 100644 ---- a/tools/xenstore/xenstored_domain.c -+++ b/tools/xenstore/xenstored_domain.c -@@ -67,8 +67,14 @@ struct domain - /* The connection associated with this. */ - struct connection *conn; - -+ /* Generation count at domain introduction time. */ -+ uint64_t generation; -+ - /* Have we noticed that this domain is shutdown? */ -- int shutdown; -+ bool shutdown; -+ -+ /* Has domain been officially introduced? */ -+ bool introduced; - - /* number of entry from this domain in the store */ - int nbentry; -@@ -188,6 +194,9 @@ static int destroy_domain(void *_domain) - - list_del(&domain->list); - -+ if (!domain->introduced) -+ return 0; -+ - if (domain->port) { - if (xenevtchn_unbind(xce_handle, domain->port) == -1) - eprintf("> Unbinding port %i failed!\n", domain->port); -@@ -209,21 +218,34 @@ static int destroy_domain(void *_domain) - return 0; - } - -+static bool get_domain_info(unsigned int domid, xc_dominfo_t *dominfo) -+{ -+ return xc_domain_getinfo(*xc_handle, domid, 1, dominfo) == 1 && -+ dominfo->domid == domid; -+} -+ - static void domain_cleanup(void) - { - xc_dominfo_t dominfo; - struct domain *domain; - struct connection *conn; - int notify = 0; -+ bool dom_valid; - - again: - list_for_each_entry(domain, &domains, list) { -- if (xc_domain_getinfo(*xc_handle, domain->domid, 1, -- &dominfo) == 1 && -- dominfo.domid == domain->domid) { -+ dom_valid = get_domain_info(domain->domid, &dominfo); -+ if (!domain->introduced) { -+ if (!dom_valid) { -+ talloc_free(domain); -+ goto again; -+ } -+ continue; -+ } -+ if (dom_valid) { - if ((dominfo.crashed || dominfo.shutdown) - && !domain->shutdown) { -- domain->shutdown = 1; -+ domain->shutdown = true; - notify = 1; - } - if (!dominfo.dying) -@@ -289,58 +311,84 @@ static char *talloc_domain_path(void *context, unsigned int domid) - return talloc_asprintf(context, "/local/domain/%u", domid); - } - --static struct domain *new_domain(void *context, unsigned int domid, -- int port) -+static struct domain *find_domain_struct(unsigned int domid) -+{ -+ struct domain *i; -+ -+ list_for_each_entry(i, &domains, list) { -+ if (i->domid == domid) -+ return i; -+ } -+ return NULL; -+} -+ -+static struct domain *alloc_domain(void *context, unsigned int domid) - { - struct domain *domain; -- int rc; - - domain = talloc(context, struct domain); -- if (!domain) -+ if (!domain) { -+ errno = ENOMEM; - return NULL; -+ } - -- domain->port = 0; -- domain->shutdown = 0; - domain->domid = domid; -- domain->path = talloc_domain_path(domain, domid); -- if (!domain->path) -- return NULL; -+ domain->generation = generation; -+ domain->introduced = false; - -- wrl_domain_new(domain); -+ talloc_set_destructor(domain, destroy_domain); - - list_add(&domain->list, &domains); -- talloc_set_destructor(domain, destroy_domain); -+ -+ return domain; -+} -+ -+static int new_domain(struct domain *domain, int port) -+{ -+ int rc; -+ -+ domain->port = 0; -+ domain->shutdown = false; -+ domain->path = talloc_domain_path(domain, domain->domid); -+ if (!domain->path) { -+ errno = ENOMEM; -+ return errno; -+ } -+ -+ wrl_domain_new(domain); - - /* Tell kernel we're interested in this event. */ -- rc = xenevtchn_bind_interdomain(xce_handle, domid, port); -+ rc = xenevtchn_bind_interdomain(xce_handle, domain->domid, port); - if (rc == -1) -- return NULL; -+ return errno; - domain->port = rc; - -+ domain->introduced = true; -+ - domain->conn = new_connection(writechn, readchn); -- if (!domain->conn) -- return NULL; -+ if (!domain->conn) { -+ errno = ENOMEM; -+ return errno; -+ } - - domain->conn->domain = domain; -- domain->conn->id = domid; -+ domain->conn->id = domain->domid; - - domain->remote_port = port; - domain->nbentry = 0; - domain->nbwatch = 0; - -- return domain; -+ return 0; - } - - - static struct domain *find_domain_by_domid(unsigned int domid) - { -- struct domain *i; -+ struct domain *d; - -- list_for_each_entry(i, &domains, list) { -- if (i->domid == domid) -- return i; -- } -- return NULL; -+ d = find_domain_struct(domid); -+ -+ return (d && d->introduced) ? d : NULL; - } - - static void domain_conn_reset(struct domain *domain) -@@ -386,15 +434,21 @@ int do_introduce(struct connection *conn, struct buffered_data *in) - if (port <= 0) - return EINVAL; - -- domain = find_domain_by_domid(domid); -+ domain = find_domain_struct(domid); - - if (domain == NULL) { -+ /* Hang domain off "in" until we're finished. */ -+ domain = alloc_domain(in, domid); -+ if (domain == NULL) -+ return ENOMEM; -+ } -+ -+ if (!domain->introduced) { - interface = map_interface(domid); - if (!interface) - return errno; - /* Hang domain off "in" until we're finished. */ -- domain = new_domain(in, domid, port); -- if (!domain) { -+ if (new_domain(domain, port)) { - rc = errno; - unmap_interface(interface); - return rc; -@@ -503,8 +557,8 @@ int do_resume(struct connection *conn, struct buffered_data *in) - if (IS_ERR(domain)) - return -PTR_ERR(domain); - -- domain->shutdown = 0; -- -+ domain->shutdown = false; -+ - send_ack(conn, XS_RESUME); - - return 0; -@@ -647,8 +701,10 @@ static int dom0_init(void) - if (port == -1) - return -1; - -- dom0 = new_domain(NULL, xenbus_master_domid(), port); -- if (dom0 == NULL) -+ dom0 = alloc_domain(NULL, xenbus_master_domid()); -+ if (!dom0) -+ return -1; -+ if (new_domain(dom0, port)) - return -1; - - dom0->interface = xenbus_map(); -@@ -729,6 +785,66 @@ void domain_entry_inc(struct connection *conn, struct node *node) - } - } - -+/* -+ * Check whether a domain was created before or after a specific generation -+ * count (used for testing whether a node permission is older than a domain). -+ * -+ * Return values: -+ * -1: error -+ * 0: domain has higher generation count (it is younger than a node with the -+ * given count), or domain isn't existing any longer -+ * 1: domain is older than the node -+ */ -+static int chk_domain_generation(unsigned int domid, uint64_t gen) -+{ -+ struct domain *d; -+ xc_dominfo_t dominfo; -+ -+ if (!xc_handle && domid == 0) -+ return 1; -+ -+ d = find_domain_struct(domid); -+ if (d) -+ return (d->generation <= gen) ? 1 : 0; -+ -+ if (!get_domain_info(domid, &dominfo)) -+ return 0; -+ -+ d = alloc_domain(NULL, domid); -+ return d ? 1 : -1; -+} -+ -+/* -+ * Remove permissions for no longer existing domains in order to avoid a new -+ * domain with the same domid inheriting the permissions. -+ */ -+int domain_adjust_node_perms(struct node *node) -+{ -+ unsigned int i; -+ int ret; -+ -+ ret = chk_domain_generation(node->perms.p[0].id, node->generation); -+ if (ret < 0) -+ return errno; -+ -+ /* If the owner doesn't exist any longer give it to priv domain. */ -+ if (!ret) -+ node->perms.p[0].id = priv_domid; -+ -+ for (i = 1; i < node->perms.num; i++) { -+ if (node->perms.p[i].perms & XS_PERM_IGNORE) -+ continue; -+ ret = chk_domain_generation(node->perms.p[i].id, -+ node->generation); -+ if (ret < 0) -+ return errno; -+ if (!ret) -+ node->perms.p[i].perms |= XS_PERM_IGNORE; -+ } -+ -+ return 0; -+} -+ - void domain_entry_dec(struct connection *conn, struct node *node) - { - struct domain *d; -diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h -index 259183962a..5e00087206 100644 ---- a/tools/xenstore/xenstored_domain.h -+++ b/tools/xenstore/xenstored_domain.h -@@ -56,6 +56,9 @@ bool domain_can_write(struct connection *conn); - - bool domain_is_unprivileged(struct connection *conn); - -+/* Remove node permissions for no longer existing domains. */ -+int domain_adjust_node_perms(struct node *node); -+ - /* Quota manipulation */ - void domain_entry_inc(struct connection *conn, struct node *); - void domain_entry_dec(struct connection *conn, struct node *); -diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c -index a7d8c5d475..2881f3b2e4 100644 ---- a/tools/xenstore/xenstored_transaction.c -+++ b/tools/xenstore/xenstored_transaction.c -@@ -47,7 +47,12 @@ - * transaction. - * Each time the global generation count is copied to either a node or a - * transaction it is incremented. This ensures all nodes and/or transactions -- * are having a unique generation count. -+ * are having a unique generation count. The increment is done _before_ the -+ * copy as that is needed for checking whether a domain was created before -+ * or after a node has been written (the domain's generation is set with the -+ * actual generation count without incrementing it, in order to support -+ * writing a node for a domain before the domain has been officially -+ * introduced). - * - * Transaction conflicts are detected by checking the generation count of all - * nodes read in the transaction to match with the generation count in the -@@ -161,7 +166,7 @@ struct transaction - }; - - extern int quota_max_transaction; --static uint64_t generation; -+uint64_t generation; - - static void set_tdb_key(const char *name, TDB_DATA *key) - { -@@ -237,7 +242,7 @@ int access_node(struct connection *conn, struct node *node, - bool introduce = false; - - if (type != NODE_ACCESS_READ) { -- node->generation = generation++; -+ node->generation = ++generation; - if (conn && !conn->transaction) - wrl_apply_debit_direct(conn); - } -@@ -374,7 +379,7 @@ static int finalize_transaction(struct connection *conn, - if (!data.dptr) - goto err; - hdr = (void *)data.dptr; -- hdr->generation = generation++; -+ hdr->generation = ++generation; - ret = tdb_store(tdb_ctx, key, data, - TDB_REPLACE); - talloc_free(data.dptr); -@@ -462,7 +467,7 @@ int do_transaction_start(struct connection *conn, struct buffered_data *in) - INIT_LIST_HEAD(&trans->accessed); - INIT_LIST_HEAD(&trans->changed_domains); - trans->fail = false; -- trans->generation = generation++; -+ trans->generation = ++generation; - - /* Pick an unused transaction identifier. */ - do { -diff --git a/tools/xenstore/xenstored_transaction.h b/tools/xenstore/xenstored_transaction.h -index 3386bac565..43a162bea3 100644 ---- a/tools/xenstore/xenstored_transaction.h -+++ b/tools/xenstore/xenstored_transaction.h -@@ -27,6 +27,8 @@ enum node_access_type { - - struct transaction; - -+extern uint64_t generation; -+ - int do_transaction_start(struct connection *conn, struct buffered_data *node); - int do_transaction_end(struct connection *conn, struct buffered_data *in); - -diff --git a/tools/xenstore/xs_lib.c b/tools/xenstore/xs_lib.c -index 3e43f8809d..d407d5713a 100644 ---- a/tools/xenstore/xs_lib.c -+++ b/tools/xenstore/xs_lib.c -@@ -152,7 +152,7 @@ bool xs_strings_to_perms(struct xs_permissions *perms, unsigned int num, - bool xs_perm_to_string(const struct xs_permissions *perm, - char *buffer, size_t buf_len) - { -- switch ((int)perm->perms) { -+ switch ((int)perm->perms & ~XS_PERM_IGNORE) { - case XS_PERM_WRITE: - *buffer = 'w'; - break; diff --git a/xsa322-o.patch b/xsa322-o.patch deleted file mode 100644 index 75f7c20..0000000 --- a/xsa322-o.patch +++ /dev/null @@ -1,110 +0,0 @@ -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= -Subject: tools/ocaml/xenstored: clean up permissions for dead domains -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -domain ids are prone to wrapping (15-bits), and with sufficient number -of VMs in a reboot loop it is possible to trigger it. Xenstore entries -may linger after a domain dies, until a toolstack cleans it up. During -this time there is a window where a wrapped domid could access these -xenstore keys (that belonged to another VM). - -To prevent this do a cleanup when a domain dies: - * walk the entire xenstore tree and update permissions for all nodes - * if the dead domain had an ACL entry: remove it - * if the dead domain was the owner: change the owner to Dom0 - -This is done without quota checks or a transaction. Quota checks would -be a no-op (either the domain is dead, or it is Dom0 where they are not -enforced). Transactions are not needed, because this is all done -atomically by oxenstored's single thread. - -The xenstore entries owned by the dead domain are not deleted, because -that could confuse a toolstack / backends that are still bound to it -(or generate unexpected watch events). It is the responsibility of a -toolstack to remove the xenstore entries themselves. - -This is part of XSA-322. - -Signed-off-by: Edwin Török -Acked-by: Christian Lindig - -diff --git a/tools/ocaml/xenstored/perms.ml b/tools/ocaml/xenstored/perms.ml -index ee7fee6bda..e8a16221f8 100644 ---- a/tools/ocaml/xenstored/perms.ml -+++ b/tools/ocaml/xenstored/perms.ml -@@ -58,6 +58,15 @@ let get_other perms = perms.other - let get_acl perms = perms.acl - let get_owner perm = perm.owner - -+(** [remote_domid ~domid perm] removes all ACLs for [domid] from perm. -+* If [domid] was the owner then it is changed to Dom0. -+* This is used for cleaning up after dead domains. -+* *) -+let remove_domid ~domid perm = -+ let acl = List.filter (fun (acl_domid, _) -> acl_domid <> domid) perm.acl in -+ let owner = if perm.owner = domid then 0 else perm.owner in -+ { perm with acl; owner } -+ - let default0 = create 0 NONE [] - - let perm_of_string s = -diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml -index f99b9e935c..73e04cc18b 100644 ---- a/tools/ocaml/xenstored/process.ml -+++ b/tools/ocaml/xenstored/process.ml -@@ -443,6 +443,7 @@ let do_release con t domains cons data = - let fire_spec_watches = Domains.exist domains domid in - Domains.del domains domid; - Connections.del_domain cons domid; -+ Store.reset_permissions (Transaction.get_store t) domid; - if fire_spec_watches - then Connections.fire_spec_watches (Transaction.get_root t) cons Store.Path.release_domain - else raise Invalid_Cmd_Args -diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml -index 6b6e440e98..3b05128f1b 100644 ---- a/tools/ocaml/xenstored/store.ml -+++ b/tools/ocaml/xenstored/store.ml -@@ -89,6 +89,13 @@ let check_owner node connection = - - let rec recurse fct node = fct node; List.iter (recurse fct) node.children - -+(** [recurse_map f tree] applies [f] on each node in the tree recursively *) -+let recurse_map f = -+ let rec walk node = -+ f { node with children = List.rev_map walk node.children |> List.rev } -+ in -+ walk -+ - let unpack node = (Symbol.to_string node.name, node.perms, node.value) - - end -@@ -405,6 +412,15 @@ let setperms store perm path nperms = - Quota.del_entry store.quota old_owner; - Quota.add_entry store.quota new_owner - -+let reset_permissions store domid = -+ Logging.info "store|node" "Cleaning up xenstore ACLs for domid %d" domid; -+ store.root <- Node.recurse_map (fun node -> -+ let perms = Perms.Node.remove_domid ~domid node.perms in -+ if perms <> node.perms then -+ Logging.debug "store|node" "Changed permissions for node %s" (Node.get_name node); -+ { node with perms } -+ ) store.root -+ - type ops = { - store: t; - write: Path.t -> string -> unit; -diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml -index 0d355bbcb8..ff9fbbbac2 100644 ---- a/tools/ocaml/xenstored/xenstored.ml -+++ b/tools/ocaml/xenstored/xenstored.ml -@@ -336,6 +336,7 @@ let _ = - finally (fun () -> - if Some port = eventchn.Event.virq_port then ( - let (notify, deaddom) = Domains.cleanup domains in -+ List.iter (Store.reset_permissions store) deaddom; - List.iter (Connections.del_domain cons) deaddom; - if deaddom <> [] || notify then - Connections.fire_spec_watches diff --git a/xsa323.patch b/xsa323.patch deleted file mode 100644 index aadf5c7..0000000 --- a/xsa323.patch +++ /dev/null @@ -1,140 +0,0 @@ -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= -Subject: tools/ocaml/xenstored: Fix path length validation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Currently, oxenstored checks the length of paths against 1024, then -prepends "/local/domain/$DOMID/" to relative paths. This allows a domU -to create paths which can't subsequently be read by anyone, even dom0. -This also interferes with listing directories, etc. - -Define a new oxenstored.conf entry: quota-path-max, defaulting to 1024 -as before. For paths that begin with "/local/domain/$DOMID/" check the -relative path length against this quota. For all other paths check the -entire path length. - -This ensures that if the domid changes (and thus the length of a prefix -changes) a path that used to be valid stays valid (e.g. after a -live-migration). It also ensures that regardless how the client tries -to access a path (domid-relative or absolute) it will get consistent -results, since the limit is always applied on the final canonicalized -path. - -Delete the unused Domain.get_path to avoid it being confused with -Connection.get_path (which differs by a trailing slash only). - -Rewrite Util.path_validate to apply the appropriate length restriction -based on whether the path is relative or not. Remove the check for -connection_path being absolute, because it is not guest controlled data. - -This is part of XSA-323. - -Signed-off-by: Andrew Cooper -Signed-off-by: Edwin Török -Acked-by: Christian Lindig - -diff --git a/tools/ocaml/libs/xb/partial.ml b/tools/ocaml/libs/xb/partial.ml -index d4d1c7bdec..b6e2a716e2 100644 ---- a/tools/ocaml/libs/xb/partial.ml -+++ b/tools/ocaml/libs/xb/partial.ml -@@ -28,6 +28,7 @@ external header_of_string_internal: string -> int * int * int * int - = "stub_header_of_string" - - let xenstore_payload_max = 4096 (* xen/include/public/io/xs_wire.h *) -+let xenstore_rel_path_max = 2048 (* xen/include/public/io/xs_wire.h *) - - let of_string s = - let tid, rid, opint, dlen = header_of_string_internal s in -diff --git a/tools/ocaml/libs/xb/partial.mli b/tools/ocaml/libs/xb/partial.mli -index 359a75e88d..b9216018f5 100644 ---- a/tools/ocaml/libs/xb/partial.mli -+++ b/tools/ocaml/libs/xb/partial.mli -@@ -9,6 +9,7 @@ external header_size : unit -> int = "stub_header_size" - external header_of_string_internal : string -> int * int * int * int - = "stub_header_of_string" - val xenstore_payload_max : int -+val xenstore_rel_path_max : int - val of_string : string -> pkt - val append : pkt -> string -> int -> unit - val to_complete : pkt -> int -diff --git a/tools/ocaml/xenstored/define.ml b/tools/ocaml/xenstored/define.ml -index ea9e1b7620..ebe18b8e31 100644 ---- a/tools/ocaml/xenstored/define.ml -+++ b/tools/ocaml/xenstored/define.ml -@@ -31,6 +31,8 @@ let conflict_rate_limit_is_aggregate = ref true - - let domid_self = 0x7FF0 - -+let path_max = ref Xenbus.Partial.xenstore_rel_path_max -+ - exception Not_a_directory of string - exception Not_a_value of string - exception Already_exist -diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml -index aeb185ff7e..81cb59b8f1 100644 ---- a/tools/ocaml/xenstored/domain.ml -+++ b/tools/ocaml/xenstored/domain.ml -@@ -38,7 +38,6 @@ type t = - } - - let is_dom0 d = d.id = 0 --let get_path dom = "/local/domain/" ^ (sprintf "%u" dom.id) - let get_id domain = domain.id - let get_interface d = d.interface - let get_mfn d = d.mfn -diff --git a/tools/ocaml/xenstored/oxenstored.conf.in b/tools/ocaml/xenstored/oxenstored.conf.in -index f843482981..4ae48e42d4 100644 ---- a/tools/ocaml/xenstored/oxenstored.conf.in -+++ b/tools/ocaml/xenstored/oxenstored.conf.in -@@ -61,6 +61,7 @@ quota-maxsize = 2048 - quota-maxwatch = 100 - quota-transaction = 10 - quota-maxrequests = 1024 -+quota-path-max = 1024 - - # Activate filed base backend - persistent = false -diff --git a/tools/ocaml/xenstored/utils.ml b/tools/ocaml/xenstored/utils.ml -index e8c9fe4e94..eb79bf0146 100644 ---- a/tools/ocaml/xenstored/utils.ml -+++ b/tools/ocaml/xenstored/utils.ml -@@ -93,7 +93,7 @@ let read_file_single_integer filename = - let path_validate path connection_path = - let len = String.length path in - -- if len = 0 || len > 1024 then raise Define.Invalid_path; -+ if len = 0 then raise Define.Invalid_path; - - let abs_path = - match String.get path 0 with -@@ -101,4 +101,17 @@ let path_validate path connection_path = - | _ -> connection_path ^ path - in - -+ (* Regardless whether client specified absolute or relative path, -+ canonicalize it (above) and, for domain-relative paths, check the -+ length of the relative part. -+ -+ This prevents paths becoming invalid across migrate when the length -+ of the domid changes in @param connection_path. -+ *) -+ let len = String.length abs_path in -+ let on_absolute _ _ = len in -+ let on_relative _ offset = len - offset in -+ let len = Scanf.ksscanf abs_path on_absolute "/local/domain/%d/%n" on_relative in -+ if len > !Define.path_max then raise Define.Invalid_path; -+ - abs_path -diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml -index ff9fbbbac2..39d6d767e4 100644 ---- a/tools/ocaml/xenstored/xenstored.ml -+++ b/tools/ocaml/xenstored/xenstored.ml -@@ -102,6 +102,7 @@ let parse_config filename = - ("quota-maxentity", Config.Set_int Quota.maxent); - ("quota-maxsize", Config.Set_int Quota.maxsize); - ("quota-maxrequests", Config.Set_int Define.maxrequests); -+ ("quota-path-max", Config.Set_int Define.path_max); - ("test-eagain", Config.Set_bool Transaction.test_eagain); - ("persistent", Config.Set_bool Disk.enable); - ("xenstored-log-file", Config.String Logging.set_xenstored_log_destination); diff --git a/xsa324.patch b/xsa324.patch deleted file mode 100644 index c5e542d..0000000 --- a/xsa324.patch +++ /dev/null @@ -1,48 +0,0 @@ -From: Juergen Gross -Subject: tools/xenstore: drop watch event messages exceeding maximum size - -By setting a watch with a very large tag it is possible to trick -xenstored to send watch event messages exceeding the maximum allowed -payload size. This might in turn lead to a crash of xenstored as the -resulting error can cause dereferencing a NULL pointer in case there -is no active request being handled by the guest the watch event is -being sent to. - -Fix that by just dropping such watch events. Additionally modify the -error handling to test the pointer to be not NULL before dereferencing -it. - -This is XSA-324. - -Signed-off-by: Juergen Gross -Acked-by: Julien Grall - -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index 33f95dcf3c..3d74dbbb40 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -674,6 +674,9 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type, - /* Replies reuse the request buffer, events need a new one. */ - if (type != XS_WATCH_EVENT) { - bdata = conn->in; -+ /* Drop asynchronous responses, e.g. errors for watch events. */ -+ if (!bdata) -+ return; - bdata->inhdr = true; - bdata->used = 0; - conn->in = NULL; -diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c -index 71c108ea99..9ff20690c0 100644 ---- a/tools/xenstore/xenstored_watch.c -+++ b/tools/xenstore/xenstored_watch.c -@@ -92,6 +92,10 @@ static void add_event(struct connection *conn, - } - - len = strlen(name) + 1 + strlen(watch->token) + 1; -+ /* Don't try to send over-long events. */ -+ if (len > XENSTORE_PAYLOAD_MAX) -+ return; -+ - data = talloc_array(ctx, char, len); - if (!data) - return; diff --git a/xsa325-4.14.patch b/xsa325-4.14.patch deleted file mode 100644 index a17f546..0000000 --- a/xsa325-4.14.patch +++ /dev/null @@ -1,192 +0,0 @@ -From: Harsha Shamsundara Havanur -Subject: tools/xenstore: Preserve bad client until they are destroyed - -XenStored will kill any connection that it thinks has misbehaved, -this is currently happening in two places: - * In `handle_input()` if the sanity check on the ring and the message - fails. - * In `handle_output()` when failing to write the response in the ring. - -As the domain structure is a child of the connection, XenStored will -destroy its view of the domain when killing the connection. This will -result in sending @releaseDomain event to all the watchers. - -As the watch event doesn't carry which domain has been released, -the watcher (such as XenStored) will generally go through the list of -domains registers and check if one of them is shutting down/dying. -In the case of a client misbehaving, the domain will likely to be -running, so no action will be performed. - -When the domain is effectively destroyed, XenStored will not be aware of -the domain anymore. So the watch event is not going to be sent. -By consequence, the watchers of the event will not release mappings -they may have on the domain. This will result in a zombie domain. - -In order to send @releaseDomain event at the correct time, we want -to keep the domain structure until the domain is effectively -shutting-down/dying. - -We also want to keep the connection around so we could possibly revive -the connection in the future. - -A new flag 'is_ignored' is added to mark whether a connection should be -ignored when checking if there are work to do. Additionally any -transactions, watches, buffers associated to the connection will be -freed as you can't do much with them (restarting the connection will -likely need a reset). - -As a side note, when the device model were running in a stubdomain, a -guest would have been able to introduce a use-after-free because there -is two parents for a guest connection. - -This is XSA-325. - -Reported-by: Pawel Wieczorkiewicz -Signed-off-by: Harsha Shamsundara Havanur -Signed-off-by: Julien Grall -Reviewed-by: Juergen Gross -Reviewed-by: Paul Durrant - -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index af3d17004b3f..27d8f15b6b76 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -1355,6 +1355,32 @@ static struct { - [XS_DIRECTORY_PART] = { "DIRECTORY_PART", send_directory_part }, - }; - -+/* -+ * Keep the connection alive but stop processing any new request or sending -+ * reponse. This is to allow sending @releaseDomain watch event at the correct -+ * moment and/or to allow the connection to restart (not yet implemented). -+ * -+ * All watches, transactions, buffers will be freed. -+ */ -+static void ignore_connection(struct connection *conn) -+{ -+ struct buffered_data *out, *tmp; -+ -+ trace("CONN %p ignored\n", conn); -+ -+ conn->is_ignored = true; -+ conn_delete_all_watches(conn); -+ conn_delete_all_transactions(conn); -+ -+ list_for_each_entry_safe(out, tmp, &conn->out_list, list) { -+ list_del(&out->list); -+ talloc_free(out); -+ } -+ -+ talloc_free(conn->in); -+ conn->in = NULL; -+} -+ - static const char *sockmsg_string(enum xsd_sockmsg_type type) - { - if ((unsigned int)type < ARRAY_SIZE(wire_funcs) && wire_funcs[type].str) -@@ -1413,8 +1439,10 @@ static void consider_message(struct connection *conn) - assert(conn->in == NULL); - } - --/* Errors in reading or allocating here mean we get out of sync, so we -- * drop the whole client connection. */ -+/* -+ * Errors in reading or allocating here means we get out of sync, so we mark -+ * the connection as ignored. -+ */ - static void handle_input(struct connection *conn) - { - int bytes; -@@ -1471,14 +1499,14 @@ static void handle_input(struct connection *conn) - return; - - bad_client: -- /* Kill it. */ -- talloc_free(conn); -+ ignore_connection(conn); - } - - static void handle_output(struct connection *conn) - { -+ /* Ignore the connection if an error occured */ - if (!write_messages(conn)) -- talloc_free(conn); -+ ignore_connection(conn); - } - - struct connection *new_connection(connwritefn_t *write, connreadfn_t *read) -@@ -1494,6 +1522,7 @@ struct connection *new_connection(connwritefn_t *write, connreadfn_t *read) - new->write = write; - new->read = read; - new->can_write = true; -+ new->is_ignored = false; - new->transaction_started = 0; - INIT_LIST_HEAD(&new->out_list); - INIT_LIST_HEAD(&new->watches); -@@ -2186,8 +2215,9 @@ int main(int argc, char *argv[]) - if (fds[conn->pollfd_idx].revents - & ~(POLLIN|POLLOUT)) - talloc_free(conn); -- else if (fds[conn->pollfd_idx].revents -- & POLLIN) -+ else if ((fds[conn->pollfd_idx].revents -+ & POLLIN) && -+ !conn->is_ignored) - handle_input(conn); - } - if (talloc_free(conn) == 0) -@@ -2199,8 +2229,9 @@ int main(int argc, char *argv[]) - if (fds[conn->pollfd_idx].revents - & ~(POLLIN|POLLOUT)) - talloc_free(conn); -- else if (fds[conn->pollfd_idx].revents -- & POLLOUT) -+ else if ((fds[conn->pollfd_idx].revents -+ & POLLOUT) && -+ !conn->is_ignored) - handle_output(conn); - } - if (talloc_free(conn) == 0) -diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h -index eb19b71f5f46..196a6fd2b0be 100644 ---- a/tools/xenstore/xenstored_core.h -+++ b/tools/xenstore/xenstored_core.h -@@ -80,6 +80,9 @@ struct connection - /* Is this a read-only connection? */ - bool can_write; - -+ /* Is this connection ignored? */ -+ bool is_ignored; -+ - /* Buffered incoming data. */ - struct buffered_data *in; - -diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c -index dc635e9be30c..d5e1e3e9d42d 100644 ---- a/tools/xenstore/xenstored_domain.c -+++ b/tools/xenstore/xenstored_domain.c -@@ -286,6 +286,10 @@ bool domain_can_read(struct connection *conn) - - if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0) - return false; -+ -+ if (conn->is_ignored) -+ return false; -+ - return (intf->req_cons != intf->req_prod); - } - -@@ -303,6 +307,10 @@ bool domain_is_unprivileged(struct connection *conn) - bool domain_can_write(struct connection *conn) - { - struct xenstore_domain_interface *intf = conn->domain->interface; -+ -+ if (conn->is_ignored) -+ return false; -+ - return ((intf->rsp_prod - intf->rsp_cons) != XENSTORE_RING_SIZE); - } - --- -2.17.1 - diff --git a/xsa330.patch b/xsa330.patch deleted file mode 100644 index c834516..0000000 --- a/xsa330.patch +++ /dev/null @@ -1,66 +0,0 @@ -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= -Subject: tools/ocaml/xenstored: delete watch from trie too when resetting - watches -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -c/s f8c72b526129 "oxenstored: implement XS_RESET_WATCHES" from Xen 4.6 -introduced reset watches support in oxenstored by mirroring the change -in cxenstored. - -However the OCaml version has some additional data structures to -optimize watch firing, and just resetting the watches in one of the data -structures creates a security bug where a malicious guest kernel can -exceed its watch quota, driving oxenstored into OOM: - * create watches - * reset watches (this still keeps the watches lingering in another data - structure, using memory) - * create some more watches - * loop until oxenstored dies - -The guest kernel doesn't necessarily have to be malicious to trigger -this: - * if control/platform-feature-xs_reset_watches is set - * the guest kexecs (e.g. because it crashes) - * on boot more watches are set up - * this will slowly "leak" memory for watches in oxenstored, driving it - towards OOM. - -This is XSA-330. - -Fixes: f8c72b526129 ("oxenstored: implement XS_RESET_WATCHES") -Signed-off-by: Edwin Török -Acked-by: Christian Lindig -Reviewed-by: Andrew Cooper - -diff --git a/tools/ocaml/xenstored/connections.ml b/tools/ocaml/xenstored/connections.ml -index 9f9f7ee2f0..6ee3552ec2 100644 ---- a/tools/ocaml/xenstored/connections.ml -+++ b/tools/ocaml/xenstored/connections.ml -@@ -134,6 +134,10 @@ let del_watch cons con path token = - cons.watches <- Trie.set cons.watches key watches; - watch - -+let del_watches cons con = -+ Connection.del_watches con; -+ cons.watches <- Trie.map (del_watches_of_con con) cons.watches -+ - (* path is absolute *) - let fire_watches ?oldroot root cons path recurse = - let key = key_of_path path in -diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml -index 73e04cc18b..437d2dcf9e 100644 ---- a/tools/ocaml/xenstored/process.ml -+++ b/tools/ocaml/xenstored/process.ml -@@ -179,8 +179,8 @@ let do_isintroduced con _t domains _cons data = - if domid = Define.domid_self || Domains.exist domains domid then "T\000" else "F\000" - - (* only in xen >= 4.2 *) --let do_reset_watches con _t _domains _cons _data = -- Connection.del_watches con; -+let do_reset_watches con _t _domains cons _data = -+ Connections.del_watches cons con; - Connection.del_transactions con - - (* only in >= xen3.3 *) diff --git a/xsa333.patch b/xsa333.patch deleted file mode 100644 index 6b86c94..0000000 --- a/xsa333.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: Andrew Cooper -Subject: x86/pv: Handle the Intel-specific MSR_MISC_ENABLE correctly - -This MSR doesn't exist on AMD hardware, and switching away from the safe -functions in the common MSR path was an erroneous change. - -Partially revert the change. - -This is XSA-333. - -Fixes: 4fdc932b3cc ("x86/Intel: drop another 32-bit leftover") -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -Reviewed-by: Wei Liu - -diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c -index efeb2a727e..6332c74b80 100644 ---- a/xen/arch/x86/pv/emul-priv-op.c -+++ b/xen/arch/x86/pv/emul-priv-op.c -@@ -924,7 +924,8 @@ static int read_msr(unsigned int reg, uint64_t *val, - return X86EMUL_OKAY; - - case MSR_IA32_MISC_ENABLE: -- rdmsrl(reg, *val); -+ if ( rdmsr_safe(reg, *val) ) -+ break; - *val = guest_misc_enable(*val); - return X86EMUL_OKAY; - -@@ -1059,7 +1060,8 @@ static int write_msr(unsigned int reg, uint64_t val, - break; - - case MSR_IA32_MISC_ENABLE: -- rdmsrl(reg, temp); -+ if ( rdmsr_safe(reg, temp) ) -+ break; - if ( val != guest_misc_enable(temp) ) - goto invalid; - return X86EMUL_OKAY; diff --git a/xsa334.patch b/xsa334.patch deleted file mode 100644 index 4260cdb..0000000 --- a/xsa334.patch +++ /dev/null @@ -1,51 +0,0 @@ -From: Andrew Cooper -Subject: xen/memory: Don't skip the RCU unlock path in acquire_resource() - -In the case that an HVM Stubdomain makes an XENMEM_acquire_resource hypercall, -the FIXME path will bypass rcu_unlock_domain() on the way out of the function. - -Move the check to the start of the function. This does change the behaviour -of the get-size path for HVM Stubdomains, but that functionality is currently -broken and unused anyway, as well as being quite useless to entities which -can't actually map the resource anyway. - -This is XSA-334. - -Fixes: 83fa6552ce ("common: add a new mappable resource type: XENMEM_resource_grant_table") -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/common/memory.c b/xen/common/memory.c -index 1a3c9ffb30..29741d8904 100644 ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -1058,6 +1058,14 @@ static int acquire_resource( - xen_pfn_t mfn_list[32]; - int rc; - -+ /* -+ * FIXME: Until foreign pages inserted into the P2M are properly -+ * reference counted, it is unsafe to allow mapping of -+ * resource pages unless the caller is the hardware domain. -+ */ -+ if ( paging_mode_translate(currd) && !is_hardware_domain(currd) ) -+ return -EACCES; -+ - if ( copy_from_guest(&xmar, arg, 1) ) - return -EFAULT; - -@@ -1114,14 +1122,6 @@ static int acquire_resource( - xen_pfn_t gfn_list[ARRAY_SIZE(mfn_list)]; - unsigned int i; - -- /* -- * FIXME: Until foreign pages inserted into the P2M are properly -- * reference counted, it is unsafe to allow mapping of -- * resource pages unless the caller is the hardware domain. -- */ -- if ( !is_hardware_domain(currd) ) -- return -EACCES; -- - if ( copy_from_guest(gfn_list, xmar.frame_list, xmar.nr_frames) ) - rc = -EFAULT; - diff --git a/xsa335-qemu.patch b/xsa335-qemu.patch deleted file mode 100644 index 5617502..0000000 --- a/xsa335-qemu.patch +++ /dev/null @@ -1,84 +0,0 @@ -From c5bd2924c6d6a5bcbffb8b5e7798a88970131c07 Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Mon, 17 Aug 2020 08:34:22 +0200 -Subject: [PATCH] usb: fix setup_len init (CVE-2020-14364) - -Store calculated setup_len in a local variable, verify it, and only -write it to the struct (USBDevice->setup_len) in case it passed the -sanity checks. - -This prevents other code (do_token_{in,out} functions specifically) -from working with invalid USBDevice->setup_len values and overrunning -the USBDevice->setup_buf[] buffer. - -Fixes: CVE-2020-14364 -Signed-off-by: Gerd Hoffmann ---- - hw/usb/core.c | 16 ++++++++++------ - 1 file changed, 10 insertions(+), 6 deletions(-) - -diff --git a/hw/usb/core.c b/hw/usb/core.c -index 5abd128b6bc5..5234dcc73fea 100644 ---- a/hw/usb/core.c -+++ b/hw/usb/core.c -@@ -129,6 +129,7 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream) - static void do_token_setup(USBDevice *s, USBPacket *p) - { - int request, value, index; -+ unsigned int setup_len; - - if (p->iov.size != 8) { - p->status = USB_RET_STALL; -@@ -138,14 +139,15 @@ static void do_token_setup(USBDevice *s, USBPacket *p) - usb_packet_copy(p, s->setup_buf, p->iov.size); - s->setup_index = 0; - p->actual_length = 0; -- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; -- if (s->setup_len > sizeof(s->data_buf)) { -+ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; -+ if (setup_len > sizeof(s->data_buf)) { - fprintf(stderr, - "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", -- s->setup_len, sizeof(s->data_buf)); -+ setup_len, sizeof(s->data_buf)); - p->status = USB_RET_STALL; - return; - } -+ s->setup_len = setup_len; - - request = (s->setup_buf[0] << 8) | s->setup_buf[1]; - value = (s->setup_buf[3] << 8) | s->setup_buf[2]; -@@ -259,26 +261,28 @@ static void do_token_out(USBDevice *s, USBPacket *p) - static void do_parameter(USBDevice *s, USBPacket *p) - { - int i, request, value, index; -+ unsigned int setup_len; - - for (i = 0; i < 8; i++) { - s->setup_buf[i] = p->parameter >> (i*8); - } - - s->setup_state = SETUP_STATE_PARAM; -- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; - s->setup_index = 0; - - request = (s->setup_buf[0] << 8) | s->setup_buf[1]; - value = (s->setup_buf[3] << 8) | s->setup_buf[2]; - index = (s->setup_buf[5] << 8) | s->setup_buf[4]; - -- if (s->setup_len > sizeof(s->data_buf)) { -+ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; -+ if (setup_len > sizeof(s->data_buf)) { - fprintf(stderr, - "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", -- s->setup_len, sizeof(s->data_buf)); -+ setup_len, sizeof(s->data_buf)); - p->status = USB_RET_STALL; - return; - } -+ s->setup_len = setup_len; - - if (p->pid == USB_TOKEN_OUT) { - usb_packet_copy(p, s->data_buf, s->setup_len); --- -2.18.4 diff --git a/xsa335-trad.patch b/xsa335-trad.patch deleted file mode 100644 index 1310b84..0000000 --- a/xsa335-trad.patch +++ /dev/null @@ -1,45 +0,0 @@ -From a62cdd675bc6a8053f6797b6add29b2853b081e3 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 19 Aug 2020 18:31:45 +0100 -Subject: [PATCH] SUPPORT.md: Desupport qemu trad except stub dm - -While investigating XSA-335 we discovered that many upstream security -fixes were missing. It is not practical to backport them. There is -no good reason to be running this very ancient version of qemu, except -that it is the only way to run a stub dm which is currently supported -by upstream. - -Signed-off-by: Ian Jackson ---- - SUPPORT.md | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/SUPPORT.md b/SUPPORT.md -index 1479055c45..b0939052e2 100644 ---- a/SUPPORT.md -+++ b/SUPPORT.md -@@ -758,6 +758,21 @@ See the section **Blkback** for image formats supported by QEMU. - - Status: Supported, not security supported - -+### qemu-xen-traditional ### -+ -+The Xen Project provides an old version of qemu with modifications -+which enable use as a device model stub domain. The old version is -+normally selected by default only in a stub dm configuration, but it -+can be requested explicitly in other configurations, for example in -+`xl` with `device_model_version="QEMU_XEN_TRADITIONAL"`. -+ -+ Status, Device Model Stub Domains: Supported, with caveats -+ Status, as host process device model: No security support, not recommended -+ -+qemu-xen-traditional is security supported only for those available -+devices which are supported for mainstream QEMU (see above), with -+trusted driver domains (see Device Model Stub Domains). -+ - ## Virtual Firmware - - ### x86/HVM iPXE --- -2.20.1 - diff --git a/xsa336.patch b/xsa336.patch deleted file mode 100644 index b44c298..0000000 --- a/xsa336.patch +++ /dev/null @@ -1,283 +0,0 @@ -From: Roger Pau Monné -Subject: x86/vpt: fix race when migrating timers between vCPUs - -The current vPT code will migrate the emulated timers between vCPUs -(change the pt->vcpu field) while just holding the destination lock, -either from create_periodic_time or pt_adjust_global_vcpu_target if -the global target is adjusted. Changing the periodic_timer vCPU field -in this way creates a race where a third party could grab the lock in -the unlocked region of pt_adjust_global_vcpu_target (or before -create_periodic_time performs the vcpu change) and then release the -lock from a different vCPU, creating a locking imbalance. - -Introduce a per-domain rwlock in order to protect periodic_time -migration between vCPU lists. Taking the lock in read mode prevents -any timer from being migrated to a different vCPU, while taking it in -write mode allows performing migration of timers across vCPUs. The -per-vcpu locks are still used to protect all the other fields from the -periodic_timer struct. - -Note that such migration shouldn't happen frequently, and hence -there's no performance drop as a result of such locking. - -This is XSA-336. - -Reported-by: Igor Druzhinin -Tested-by: Igor Druzhinin -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich ---- -Changes since v2: - - Re-order pt_adjust_vcpu to remove one if. - - Fix pt_lock to not call pt_vcpu_lock, as we might end up using a - stale value of pt->vcpu when taking the per-vcpu lock. - -Changes since v1: - - Use a per-domain rwlock to protect timer vCPU migration. - ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -658,6 +658,8 @@ int hvm_domain_initialise(struct domain - /* need link to containing domain */ - d->arch.hvm.pl_time->domain = d; - -+ rwlock_init(&d->arch.hvm.pl_time->pt_migrate); -+ - /* Set the default IO Bitmap. */ - if ( is_hardware_domain(d) ) - { ---- a/xen/arch/x86/hvm/vpt.c -+++ b/xen/arch/x86/hvm/vpt.c -@@ -153,23 +153,32 @@ static int pt_irq_masked(struct periodic - return 1; - } - --static void pt_lock(struct periodic_time *pt) -+static void pt_vcpu_lock(struct vcpu *v) - { -- struct vcpu *v; -+ read_lock(&v->domain->arch.hvm.pl_time->pt_migrate); -+ spin_lock(&v->arch.hvm.tm_lock); -+} - -- for ( ; ; ) -- { -- v = pt->vcpu; -- spin_lock(&v->arch.hvm.tm_lock); -- if ( likely(pt->vcpu == v) ) -- break; -- spin_unlock(&v->arch.hvm.tm_lock); -- } -+static void pt_vcpu_unlock(struct vcpu *v) -+{ -+ spin_unlock(&v->arch.hvm.tm_lock); -+ read_unlock(&v->domain->arch.hvm.pl_time->pt_migrate); -+} -+ -+static void pt_lock(struct periodic_time *pt) -+{ -+ /* -+ * We cannot use pt_vcpu_lock here, because we need to acquire the -+ * per-domain lock first and then (re-)fetch the value of pt->vcpu, or -+ * else we might be using a stale value of pt->vcpu. -+ */ -+ read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate); -+ spin_lock(&pt->vcpu->arch.hvm.tm_lock); - } - - static void pt_unlock(struct periodic_time *pt) - { -- spin_unlock(&pt->vcpu->arch.hvm.tm_lock); -+ pt_vcpu_unlock(pt->vcpu); - } - - static void pt_process_missed_ticks(struct periodic_time *pt) -@@ -219,7 +228,7 @@ void pt_save_timer(struct vcpu *v) - if ( v->pause_flags & VPF_blocked ) - return; - -- spin_lock(&v->arch.hvm.tm_lock); -+ pt_vcpu_lock(v); - - list_for_each_entry ( pt, head, list ) - if ( !pt->do_not_freeze ) -@@ -227,7 +236,7 @@ void pt_save_timer(struct vcpu *v) - - pt_freeze_time(v); - -- spin_unlock(&v->arch.hvm.tm_lock); -+ pt_vcpu_unlock(v); - } - - void pt_restore_timer(struct vcpu *v) -@@ -235,7 +244,7 @@ void pt_restore_timer(struct vcpu *v) - struct list_head *head = &v->arch.hvm.tm_list; - struct periodic_time *pt; - -- spin_lock(&v->arch.hvm.tm_lock); -+ pt_vcpu_lock(v); - - list_for_each_entry ( pt, head, list ) - { -@@ -248,7 +257,7 @@ void pt_restore_timer(struct vcpu *v) - - pt_thaw_time(v); - -- spin_unlock(&v->arch.hvm.tm_lock); -+ pt_vcpu_unlock(v); - } - - static void pt_timer_fn(void *data) -@@ -309,7 +318,7 @@ int pt_update_irq(struct vcpu *v) - int irq, pt_vector = -1; - bool level; - -- spin_lock(&v->arch.hvm.tm_lock); -+ pt_vcpu_lock(v); - - earliest_pt = NULL; - max_lag = -1ULL; -@@ -339,7 +348,7 @@ int pt_update_irq(struct vcpu *v) - - if ( earliest_pt == NULL ) - { -- spin_unlock(&v->arch.hvm.tm_lock); -+ pt_vcpu_unlock(v); - return -1; - } - -@@ -347,7 +356,7 @@ int pt_update_irq(struct vcpu *v) - irq = earliest_pt->irq; - level = earliest_pt->level; - -- spin_unlock(&v->arch.hvm.tm_lock); -+ pt_vcpu_unlock(v); - - switch ( earliest_pt->source ) - { -@@ -394,7 +403,7 @@ int pt_update_irq(struct vcpu *v) - time_cb *cb = NULL; - void *cb_priv; - -- spin_lock(&v->arch.hvm.tm_lock); -+ pt_vcpu_lock(v); - /* Make sure the timer is still on the list. */ - list_for_each_entry ( pt, &v->arch.hvm.tm_list, list ) - if ( pt == earliest_pt ) -@@ -404,7 +413,7 @@ int pt_update_irq(struct vcpu *v) - cb_priv = pt->priv; - break; - } -- spin_unlock(&v->arch.hvm.tm_lock); -+ pt_vcpu_unlock(v); - - if ( cb != NULL ) - cb(v, cb_priv); -@@ -441,12 +450,12 @@ void pt_intr_post(struct vcpu *v, struct - if ( intack.source == hvm_intsrc_vector ) - return; - -- spin_lock(&v->arch.hvm.tm_lock); -+ pt_vcpu_lock(v); - - pt = is_pt_irq(v, intack); - if ( pt == NULL ) - { -- spin_unlock(&v->arch.hvm.tm_lock); -+ pt_vcpu_unlock(v); - return; - } - -@@ -455,7 +464,7 @@ void pt_intr_post(struct vcpu *v, struct - cb = pt->cb; - cb_priv = pt->priv; - -- spin_unlock(&v->arch.hvm.tm_lock); -+ pt_vcpu_unlock(v); - - if ( cb != NULL ) - cb(v, cb_priv); -@@ -466,12 +475,12 @@ void pt_migrate(struct vcpu *v) - struct list_head *head = &v->arch.hvm.tm_list; - struct periodic_time *pt; - -- spin_lock(&v->arch.hvm.tm_lock); -+ pt_vcpu_lock(v); - - list_for_each_entry ( pt, head, list ) - migrate_timer(&pt->timer, v->processor); - -- spin_unlock(&v->arch.hvm.tm_lock); -+ pt_vcpu_unlock(v); - } - - void create_periodic_time( -@@ -490,7 +499,7 @@ void create_periodic_time( - - destroy_periodic_time(pt); - -- spin_lock(&v->arch.hvm.tm_lock); -+ write_lock(&v->domain->arch.hvm.pl_time->pt_migrate); - - pt->pending_intr_nr = 0; - pt->do_not_freeze = 0; -@@ -540,7 +549,7 @@ void create_periodic_time( - init_timer(&pt->timer, pt_timer_fn, pt, v->processor); - set_timer(&pt->timer, pt->scheduled); - -- spin_unlock(&v->arch.hvm.tm_lock); -+ write_unlock(&v->domain->arch.hvm.pl_time->pt_migrate); - } - - void destroy_periodic_time(struct periodic_time *pt) -@@ -565,30 +574,20 @@ void destroy_periodic_time(struct period - - static void pt_adjust_vcpu(struct periodic_time *pt, struct vcpu *v) - { -- int on_list; -- - ASSERT(pt->source == PTSRC_isa || pt->source == PTSRC_ioapic); - - if ( pt->vcpu == NULL ) - return; - -- pt_lock(pt); -- on_list = pt->on_list; -- if ( pt->on_list ) -- list_del(&pt->list); -- pt->on_list = 0; -- pt_unlock(pt); -- -- spin_lock(&v->arch.hvm.tm_lock); -+ write_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate); - pt->vcpu = v; -- if ( on_list ) -+ if ( pt->on_list ) - { -- pt->on_list = 1; -+ list_del(&pt->list); - list_add(&pt->list, &v->arch.hvm.tm_list); -- - migrate_timer(&pt->timer, v->processor); - } -- spin_unlock(&v->arch.hvm.tm_lock); -+ write_unlock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate); - } - - void pt_adjust_global_vcpu_target(struct vcpu *v) ---- a/xen/include/asm-x86/hvm/vpt.h -+++ b/xen/include/asm-x86/hvm/vpt.h -@@ -128,6 +128,13 @@ struct pl_time { /* platform time */ - struct RTCState vrtc; - struct HPETState vhpet; - struct PMTState vpmt; -+ /* -+ * rwlock to prevent periodic_time vCPU migration. Take the lock in read -+ * mode in order to prevent the vcpu field of periodic_time from changing. -+ * Lock must be taken in write mode when changes to the vcpu field are -+ * performed, as it allows exclusive access to all the timers of a domain. -+ */ -+ rwlock_t pt_migrate; - /* guest_time = Xen sys time + stime_offset */ - int64_t stime_offset; - /* Ensures monotonicity in appropriate timer modes. */ diff --git a/xsa337-1.patch b/xsa337-1.patch deleted file mode 100644 index eae4185..0000000 --- a/xsa337-1.patch +++ /dev/null @@ -1,87 +0,0 @@ -From: Roger Pau Monné -Subject: x86/msi: get rid of read_msi_msg - -It's safer and faster to just use the cached last written -(untranslated) MSI message stored in msi_desc for the single user that -calls read_msi_msg. - -This also prevents relying on the data read from the device MSI -registers in order to figure out the index into the IOMMU interrupt -remapping table, which is not safe. - -This is part of XSA-337. - -Reported-by: Andrew Cooper -Requested-by: Andrew Cooper -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich - ---- a/xen/arch/x86/msi.c -+++ b/xen/arch/x86/msi.c -@@ -185,54 +185,6 @@ void msi_compose_msg(unsigned vector, co - MSI_DATA_VECTOR(vector); - } - --static bool read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) --{ -- switch ( entry->msi_attrib.type ) -- { -- case PCI_CAP_ID_MSI: -- { -- struct pci_dev *dev = entry->dev; -- int pos = entry->msi_attrib.pos; -- uint16_t data; -- -- msg->address_lo = pci_conf_read32(dev->sbdf, -- msi_lower_address_reg(pos)); -- if ( entry->msi_attrib.is_64 ) -- { -- msg->address_hi = pci_conf_read32(dev->sbdf, -- msi_upper_address_reg(pos)); -- data = pci_conf_read16(dev->sbdf, msi_data_reg(pos, 1)); -- } -- else -- { -- msg->address_hi = 0; -- data = pci_conf_read16(dev->sbdf, msi_data_reg(pos, 0)); -- } -- msg->data = data; -- break; -- } -- case PCI_CAP_ID_MSIX: -- { -- void __iomem *base = entry->mask_base; -- -- if ( unlikely(!msix_memory_decoded(entry->dev, -- entry->msi_attrib.pos)) ) -- return false; -- msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); -- msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); -- msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); -- break; -- } -- default: -- BUG(); -- } -- -- if ( iommu_intremap ) -- iommu_read_msi_from_ire(entry, msg); -- -- return true; --} -- - static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) - { - entry->msg = *msg; -@@ -304,10 +256,7 @@ void set_msi_affinity(struct irq_desc *d - - ASSERT(spin_is_locked(&desc->lock)); - -- memset(&msg, 0, sizeof(msg)); -- if ( !read_msi_msg(msi_desc, &msg) ) -- return; -- -+ msg = msi_desc->msg; - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(desc->arch.vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; diff --git a/xsa337-2.patch b/xsa337-2.patch deleted file mode 100644 index 8089e68..0000000 --- a/xsa337-2.patch +++ /dev/null @@ -1,184 +0,0 @@ -From: Jan Beulich -Subject: x86/MSI-X: restrict reading of table/PBA bases from BARs - -When assigned to less trusted or un-trusted guests, devices may change -state behind our backs (they may e.g. get reset by means we may not know -about). Therefore we should avoid reading BARs from hardware once a -device is no longer owned by Dom0. Furthermore when we can't read a BAR, -or when we read zero, we shouldn't instead use the caller provided -address unless that caller can be trusted. - -Re-arrange the logic in msix_capability_init() such that only Dom0 (and -only if the device isn't DomU-owned yet) or calls through -PHYSDEVOP_prepare_msix will actually result in the reading of the -respective BAR register(s). Additionally do so only as long as in-use -table entries are known (note that invocation of PHYSDEVOP_prepare_msix -counts as a "pseudo" entry). In all other uses the value already -recorded will get used instead. - -Clear the recorded values in _pci_cleanup_msix() as well as on the one -affected error path. (Adjust this error path to also avoid blindly -disabling MSI-X when it was enabled on entry to the function.) - -While moving around variable declarations (in many cases to reduce their -scopes), also adjust some of their types. - -This is part of XSA-337. - -Signed-off-by: Jan Beulich -Reviewed-by: Roger Pau Monné ---- -v2: Use "unsigned int" for moved bir, pbus, etc. Further restrict under - what conditions to read the BAR(s). - ---- a/xen/arch/x86/msi.c -+++ b/xen/arch/x86/msi.c -@@ -771,16 +771,14 @@ static int msix_capability_init(struct p - { - struct arch_msix *msix = dev->msix; - struct msi_desc *entry = NULL; -- int vf; - u16 control; - u64 table_paddr; - u32 table_offset; -- u8 bir, pbus, pslot, pfunc; - u16 seg = dev->seg; - u8 bus = dev->bus; - u8 slot = PCI_SLOT(dev->devfn); - u8 func = PCI_FUNC(dev->devfn); -- bool maskall = msix->host_maskall; -+ bool maskall = msix->host_maskall, zap_on_error = false; - unsigned int pos = pci_find_cap_offset(seg, bus, slot, func, - PCI_CAP_ID_MSIX); - -@@ -822,43 +820,45 @@ static int msix_capability_init(struct p - - /* Locate MSI-X table region */ - table_offset = pci_conf_read32(dev->sbdf, msix_table_offset_reg(pos)); -- bir = (u8)(table_offset & PCI_MSIX_BIRMASK); -- table_offset &= ~PCI_MSIX_BIRMASK; -+ if ( !msix->used_entries && -+ (!msi || -+ (is_hardware_domain(current->domain) && -+ (dev->domain == current->domain || dev->domain == dom_io))) ) -+ { -+ unsigned int bir = table_offset & PCI_MSIX_BIRMASK, pbus, pslot, pfunc; -+ int vf; -+ paddr_t pba_paddr; -+ unsigned int pba_offset; - -- if ( !dev->info.is_virtfn ) -- { -- pbus = bus; -- pslot = slot; -- pfunc = func; -- vf = -1; -- } -- else -- { -- pbus = dev->info.physfn.bus; -- pslot = PCI_SLOT(dev->info.physfn.devfn); -- pfunc = PCI_FUNC(dev->info.physfn.devfn); -- vf = PCI_BDF2(dev->bus, dev->devfn); -- } -- -- table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf); -- WARN_ON(msi && msi->table_base != table_paddr); -- if ( !table_paddr ) -- { -- if ( !msi || !msi->table_base ) -+ if ( !dev->info.is_virtfn ) - { -- pci_conf_write16(dev->sbdf, msix_control_reg(pos), -- control & ~PCI_MSIX_FLAGS_ENABLE); -- xfree(entry); -- return -ENXIO; -+ pbus = bus; -+ pslot = slot; -+ pfunc = func; -+ vf = -1; -+ } -+ else -+ { -+ pbus = dev->info.physfn.bus; -+ pslot = PCI_SLOT(dev->info.physfn.devfn); -+ pfunc = PCI_FUNC(dev->info.physfn.devfn); -+ vf = PCI_BDF2(dev->bus, dev->devfn); - } -- table_paddr = msi->table_base; -- } -- table_paddr += table_offset; - -- if ( !msix->used_entries ) -- { -- u64 pba_paddr; -- u32 pba_offset; -+ table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf); -+ WARN_ON(msi && msi->table_base != table_paddr); -+ if ( !table_paddr ) -+ { -+ if ( !msi || !msi->table_base ) -+ { -+ pci_conf_write16(dev->sbdf, msix_control_reg(pos), -+ control & ~PCI_MSIX_FLAGS_ENABLE); -+ xfree(entry); -+ return -ENXIO; -+ } -+ table_paddr = msi->table_base; -+ } -+ table_paddr += table_offset & ~PCI_MSIX_BIRMASK; - - msix->table.first = PFN_DOWN(table_paddr); - msix->table.last = PFN_DOWN(table_paddr + -@@ -877,7 +877,18 @@ static int msix_capability_init(struct p - BITS_TO_LONGS(msix->nr_entries) - 1); - WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, msix->pba.first, - msix->pba.last)); -+ -+ zap_on_error = true; -+ } -+ else if ( !msix->table.first ) -+ { -+ pci_conf_write16(dev->sbdf, msix_control_reg(pos), control); -+ xfree(entry); -+ return -ENODATA; - } -+ else -+ table_paddr = (msix->table.first << PAGE_SHIFT) + -+ PAGE_OFFSET(table_offset & ~PCI_MSIX_BIRMASK); - - if ( entry ) - { -@@ -888,8 +899,15 @@ static int msix_capability_init(struct p - - if ( idx < 0 ) - { -- pci_conf_write16(dev->sbdf, msix_control_reg(pos), -- control & ~PCI_MSIX_FLAGS_ENABLE); -+ if ( zap_on_error ) -+ { -+ msix->table.first = 0; -+ msix->pba.first = 0; -+ -+ control &= ~PCI_MSIX_FLAGS_ENABLE; -+ } -+ -+ pci_conf_write16(dev->sbdf, msix_control_reg(pos), control); - xfree(entry); - return idx; - } -@@ -1072,9 +1090,14 @@ static void _pci_cleanup_msix(struct arc - if ( rangeset_remove_range(mmio_ro_ranges, msix->table.first, - msix->table.last) ) - WARN(); -+ msix->table.first = 0; -+ msix->table.last = 0; -+ - if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first, - msix->pba.last) ) - WARN(); -+ msix->pba.first = 0; -+ msix->pba.last = 0; - } - } - diff --git a/xsa338.patch b/xsa338.patch deleted file mode 100644 index 7765219..0000000 --- a/xsa338.patch +++ /dev/null @@ -1,42 +0,0 @@ -From: Jan Beulich -Subject: evtchn: relax port_is_valid() - -To avoid ports potentially becoming invalid behind the back of certain -other functions (due to ->max_evtchn shrinking) because of -- a guest invoking evtchn_reset() and from a 2nd vCPU opening new - channels in parallel (see also XSA-343), -- alloc_unbound_xen_event_channel() produced channels living above the - 2-level range (see also XSA-342), -drop the max_evtchns check from port_is_valid(). For a port for which -the function once returned "true", the returned value may not turn into -"false" later on. The function's result may only depend on bounds which -can only ever grow (which is the case for d->valid_evtchns). - -This also eliminates a false sense of safety, utilized by some of the -users (see again XSA-343): Without a suitable lock held, d->max_evtchns -may change at any time, and hence deducing that certain other operations -are safe when port_is_valid() returned true is not legitimate. The -opportunities to abuse this may get widened by the change here -(depending on guest and host configuration), but will be taken care of -by the other XSA. - -This is XSA-338. - -Fixes: 48974e6ce52e ("evtchn: use a per-domain variable for the max number of event channels") -Signed-off-by: Jan Beulich -Reviewed-by: Stefano Stabellini -Reviewed-by: Julien Grall ---- -v5: New, split from larger patch. - ---- a/xen/include/xen/event.h -+++ b/xen/include/xen/event.h -@@ -107,8 +107,6 @@ void notify_via_xen_event_channel(struct - - static inline bool_t port_is_valid(struct domain *d, unsigned int p) - { -- if ( p >= d->max_evtchns ) -- return 0; - return p < read_atomic(&d->valid_evtchns); - } - diff --git a/xsa339.patch b/xsa339.patch deleted file mode 100644 index 3311ae0..0000000 --- a/xsa339.patch +++ /dev/null @@ -1,76 +0,0 @@ -From: Andrew Cooper -Subject: x86/pv: Avoid double exception injection - -There is at least one path (SYSENTER with NT set, Xen converts to #GP) which -ends up injecting the #GP fault twice, first in compat_sysenter(), and then a -second time in compat_test_all_events(), due to the stale TBF_EXCEPTION left -in TRAPBOUNCE_flags. - -The guest kernel sees the second fault first, which is a kernel level #GP -pointing at the head of the #GP handler, and is therefore a userspace -trigger-able DoS. - -This particular bug has bitten us several times before, so rearrange -{compat_,}create_bounce_frame() to clobber TRAPBOUNCE on success, rather than -leaving this task to one area of code which isn't used uniformly. - -Other scenarios which might result in a double injection (e.g. two calls -directly to compat_create_bounce_frame) will now crash the guest, which is far -more obvious than letting the kernel run with corrupt state. - -This is XSA-339 - -Fixes: fdac9515607b ("x86: clear EFLAGS.NT in SYSENTER entry path") -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index c3e62f8734..73619f57ca 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -78,7 +78,6 @@ compat_process_softirqs: - sti - .Lcompat_bounce_exception: - call compat_create_bounce_frame -- movb $0, TRAPBOUNCE_flags(%rdx) - jmp compat_test_all_events - - ALIGN -@@ -352,7 +351,13 @@ __UNLIKELY_END(compat_bounce_null_selector) - movl %eax,UREGS_cs+8(%rsp) - movl TRAPBOUNCE_eip(%rdx),%eax - movl %eax,UREGS_rip+8(%rsp) -+ -+ /* Trapbounce complete. Clobber state to avoid an erroneous second injection. */ -+ xor %eax, %eax -+ mov %ax, TRAPBOUNCE_cs(%rdx) -+ mov %al, TRAPBOUNCE_flags(%rdx) - ret -+ - .section .fixup,"ax" - .Lfx13: - xorl %edi,%edi -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 1e880eb9f6..71a00e846b 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -90,7 +90,6 @@ process_softirqs: - sti - .Lbounce_exception: - call create_bounce_frame -- movb $0, TRAPBOUNCE_flags(%rdx) - jmp test_all_events - - ALIGN -@@ -512,6 +511,11 @@ UNLIKELY_START(z, create_bounce_frame_bad_bounce_ip) - jmp asm_domain_crash_synchronous /* Does not return */ - __UNLIKELY_END(create_bounce_frame_bad_bounce_ip) - movq %rax,UREGS_rip+8(%rsp) -+ -+ /* Trapbounce complete. Clobber state to avoid an erroneous second injection. */ -+ xor %eax, %eax -+ mov %rax, TRAPBOUNCE_eip(%rdx) -+ mov %al, TRAPBOUNCE_flags(%rdx) - ret - - .pushsection .fixup, "ax", @progbits diff --git a/xsa340.patch b/xsa340.patch deleted file mode 100644 index 38d04da..0000000 --- a/xsa340.patch +++ /dev/null @@ -1,65 +0,0 @@ -From: Julien Grall -Subject: xen/evtchn: Add missing barriers when accessing/allocating an event channel - -While the allocation of a bucket is always performed with the per-domain -lock, the bucket may be accessed without the lock taken (for instance, see -evtchn_send()). - -Instead such sites relies on port_is_valid() to return a non-zero value -when the port has a struct evtchn associated to it. The function will -mostly check whether the port is less than d->valid_evtchns as all the -buckets/event channels should be allocated up to that point. - -Unfortunately a compiler is free to re-order the assignment in -evtchn_allocate_port() so it would be possible to have d->valid_evtchns -updated before the new bucket has finish to allocate. - -Additionally on Arm, even if this was compiled "correctly", the -processor can still re-order the memory access. - -Add a write memory barrier in the allocation side and a read memory -barrier when the port is valid to prevent any re-ordering issue. - -This is XSA-340. - -Reported-by: Julien Grall -Signed-off-by: Julien Grall -Reviewed-by: Stefano Stabellini - ---- a/xen/common/event_channel.c -+++ b/xen/common/event_channel.c -@@ -178,6 +178,13 @@ int evtchn_allocate_port(struct domain * - return -ENOMEM; - bucket_from_port(d, port) = chn; - -+ /* -+ * d->valid_evtchns is used to check whether the bucket can be -+ * accessed without the per-domain lock. Therefore, -+ * d->valid_evtchns should be seen *after* the new bucket has -+ * been setup. -+ */ -+ smp_wmb(); - write_atomic(&d->valid_evtchns, d->valid_evtchns + EVTCHNS_PER_BUCKET); - } - ---- a/xen/include/xen/event.h -+++ b/xen/include/xen/event.h -@@ -107,7 +107,17 @@ void notify_via_xen_event_channel(struct - - static inline bool_t port_is_valid(struct domain *d, unsigned int p) - { -- return p < read_atomic(&d->valid_evtchns); -+ if ( p >= read_atomic(&d->valid_evtchns) ) -+ return false; -+ -+ /* -+ * The caller will usually access the event channel afterwards and -+ * may be done without taking the per-domain lock. The barrier is -+ * going in pair the smp_wmb() barrier in evtchn_allocate_port(). -+ */ -+ smp_rmb(); -+ -+ return true; - } - - static inline struct evtchn *evtchn_from_port(struct domain *d, unsigned int p) diff --git a/xsa342.patch b/xsa342.patch deleted file mode 100644 index c878c07..0000000 --- a/xsa342.patch +++ /dev/null @@ -1,152 +0,0 @@ -From: Jan Beulich -Subject: evtchn/x86: enforce correct upper limit for 32-bit guests - -The recording of d->max_evtchns in evtchn_2l_init(), in particular with -the limited set of callers of the function, is insufficient. Neither for -PV nor for HVM guests the bitness is known at domain_create() time, yet -the upper bound in 2-level mode depends upon guest bitness. Recording -too high a limit "allows" x86 32-bit domains to open not properly usable -event channels, management of which (inside Xen) would then result in -corruption of the shared info and vCPU info structures. - -Keep the upper limit dynamic for the 2-level case, introducing a helper -function to retrieve the effective limit. This helper is now supposed to -be private to the event channel code. The used in do_poll() and -domain_dump_evtchn_info() weren't consistent with port uses elsewhere -and hence get switched to port_is_valid(). - -Furthermore FIFO mode's setup_ports() gets adjusted to loop only up to -the prior ABI limit, rather than all the way up to the new one. - -Finally a word on the change to do_poll(): Accessing ->max_evtchns -without holding a suitable lock was never safe, as it as well as -->evtchn_port_ops may change behind do_poll()'s back. Using -port_is_valid() instead widens some the window for potential abuse, -until we've dealt with the race altogether (see XSA-343). - -This is XSA-342. - -Reported-by: Julien Grall -Fixes: 48974e6ce52e ("evtchn: use a per-domain variable for the max number of event channels") -Signed-off-by: Jan Beulich -Reviewed-by: Stefano Stabellini -Reviewed-by: Julien Grall ---- -v6: Drop redundant port_is_valid() check from domain_dump_evtchn_info(). - Add comment in evtchn_fifo_init_control(). -v5: Split off port_is_valid() change. Drop max_evtchns field. -v4: Also mention the alloc_unbound_xen_event_channel() case that gets - fixed here. -v2: Remove max_evtchns check from port_is_valid(). - ---- a/xen/common/event_2l.c -+++ b/xen/common/event_2l.c -@@ -103,7 +103,6 @@ static const struct evtchn_port_ops evtc - void evtchn_2l_init(struct domain *d) - { - d->evtchn_port_ops = &evtchn_port_ops_2l; -- d->max_evtchns = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d); - } - - /* ---- a/xen/common/event_channel.c -+++ b/xen/common/event_channel.c -@@ -151,7 +151,7 @@ static void free_evtchn_bucket(struct do - - int evtchn_allocate_port(struct domain *d, evtchn_port_t port) - { -- if ( port > d->max_evtchn_port || port >= d->max_evtchns ) -+ if ( port > d->max_evtchn_port || port >= max_evtchns(d) ) - return -ENOSPC; - - if ( port_is_valid(d, port) ) -@@ -1396,13 +1396,11 @@ static void domain_dump_evtchn_info(stru - - spin_lock(&d->event_lock); - -- for ( port = 1; port < d->max_evtchns; ++port ) -+ for ( port = 1; port_is_valid(d, port); ++port ) - { - const struct evtchn *chn; - char *ssid; - -- if ( !port_is_valid(d, port) ) -- continue; - chn = evtchn_from_port(d, port); - if ( chn->state == ECS_FREE ) - continue; ---- a/xen/common/event_fifo.c -+++ b/xen/common/event_fifo.c -@@ -478,7 +478,7 @@ static void cleanup_event_array(struct d - d->evtchn_fifo = NULL; - } - --static void setup_ports(struct domain *d) -+static void setup_ports(struct domain *d, unsigned int prev_evtchns) - { - unsigned int port; - -@@ -488,7 +488,7 @@ static void setup_ports(struct domain *d - * - save its pending state. - * - set default priority. - */ -- for ( port = 1; port < d->max_evtchns; port++ ) -+ for ( port = 1; port < prev_evtchns; port++ ) - { - struct evtchn *evtchn; - -@@ -546,6 +546,8 @@ int evtchn_fifo_init_control(struct evtc - if ( !d->evtchn_fifo ) - { - struct vcpu *vcb; -+ /* Latch the value before it changes during setup_event_array(). */ -+ unsigned int prev_evtchns = max_evtchns(d); - - for_each_vcpu ( d, vcb ) { - rc = setup_control_block(vcb); -@@ -562,8 +564,7 @@ int evtchn_fifo_init_control(struct evtc - goto error; - - d->evtchn_port_ops = &evtchn_port_ops_fifo; -- d->max_evtchns = EVTCHN_FIFO_NR_CHANNELS; -- setup_ports(d); -+ setup_ports(d, prev_evtchns); - } - else - rc = map_control_block(v, gfn, offset); ---- a/xen/common/sched/core.c -+++ b/xen/common/sched/core.c -@@ -1428,7 +1428,7 @@ static long do_poll(struct sched_poll *s - goto out; - - rc = -EINVAL; -- if ( port >= d->max_evtchns ) -+ if ( !port_is_valid(d, port) ) - goto out; - - rc = 0; ---- a/xen/include/xen/event.h -+++ b/xen/include/xen/event.h -@@ -105,6 +105,12 @@ void notify_via_xen_event_channel(struct - #define bucket_from_port(d, p) \ - ((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET]) - -+static inline unsigned int max_evtchns(const struct domain *d) -+{ -+ return d->evtchn_fifo ? EVTCHN_FIFO_NR_CHANNELS -+ : BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d); -+} -+ - static inline bool_t port_is_valid(struct domain *d, unsigned int p) - { - if ( p >= read_atomic(&d->valid_evtchns) ) ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -359,7 +359,6 @@ struct domain - /* Event channel information. */ - struct evtchn *evtchn; /* first bucket only */ - struct evtchn **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */ -- unsigned int max_evtchns; /* number supported by ABI */ - unsigned int max_evtchn_port; /* max permitted port number */ - unsigned int valid_evtchns; /* number of allocated event channels */ - spinlock_t event_lock; diff --git a/xsa343-1.patch b/xsa343-1.patch deleted file mode 100644 index 0abbc03..0000000 --- a/xsa343-1.patch +++ /dev/null @@ -1,199 +0,0 @@ -From: Jan Beulich -Subject: evtchn: evtchn_reset() shouldn't succeed with still-open ports - -While the function closes all ports, it does so without holding any -lock, and hence racing requests may be issued causing new ports to get -opened. This would have been problematic in particular if such a newly -opened port had a port number above the new implementation limit (i.e. -when switching from FIFO to 2-level) after the reset, as prior to -"evtchn: relax port_is_valid()" this could have led to e.g. -evtchn_close()'s "BUG_ON(!port_is_valid(d2, port2))" to trigger. - -Introduce a counter of active ports and check that it's (still) no -larger then the number of Xen internally used ones after obtaining the -necessary lock in evtchn_reset(). - -As to the access model of the new {active,xen}_evtchns fields - while -all writes get done using write_atomic(), reads ought to use -read_atomic() only when outside of a suitably locked region. - -Note that as of now evtchn_bind_virq() and evtchn_bind_ipi() don't have -a need to call check_free_port(). - -This is part of XSA-343. - -Signed-off-by: Jan Beulich -Reviewed-by: Stefano Stabellini -Reviewed-by: Julien Grall ---- -v7: Drop optimization from evtchn_reset(). -v6: Fix loop exit condition in evtchn_reset(). Use {read,write}_atomic() - also for xen_evtchns. -v5: Move increment in alloc_unbound_xen_event_channel() out of the inner - locked region. -v4: Account for Xen internal ports. -v3: Document intended access next to new struct field. -v2: Add comment to check_free_port(). Drop commented out calls. - ---- a/xen/common/event_channel.c -+++ b/xen/common/event_channel.c -@@ -188,6 +188,8 @@ int evtchn_allocate_port(struct domain * - write_atomic(&d->valid_evtchns, d->valid_evtchns + EVTCHNS_PER_BUCKET); - } - -+ write_atomic(&d->active_evtchns, d->active_evtchns + 1); -+ - return 0; - } - -@@ -211,11 +213,26 @@ static int get_free_port(struct domain * - return -ENOSPC; - } - -+/* -+ * Check whether a port is still marked free, and if so update the domain -+ * counter accordingly. To be used on function exit paths. -+ */ -+static void check_free_port(struct domain *d, evtchn_port_t port) -+{ -+ if ( port_is_valid(d, port) && -+ evtchn_from_port(d, port)->state == ECS_FREE ) -+ write_atomic(&d->active_evtchns, d->active_evtchns - 1); -+} -+ - void evtchn_free(struct domain *d, struct evtchn *chn) - { - /* Clear pending event to avoid unexpected behavior on re-bind. */ - evtchn_port_clear_pending(d, chn); - -+ if ( consumer_is_xen(chn) ) -+ write_atomic(&d->xen_evtchns, d->xen_evtchns - 1); -+ write_atomic(&d->active_evtchns, d->active_evtchns - 1); -+ - /* Reset binding to vcpu0 when the channel is freed. */ - chn->state = ECS_FREE; - chn->notify_vcpu_id = 0; -@@ -258,6 +275,7 @@ static long evtchn_alloc_unbound(evtchn_ - alloc->port = port; - - out: -+ check_free_port(d, port); - spin_unlock(&d->event_lock); - rcu_unlock_domain(d); - -@@ -351,6 +369,7 @@ static long evtchn_bind_interdomain(evtc - bind->local_port = lport; - - out: -+ check_free_port(ld, lport); - spin_unlock(&ld->event_lock); - if ( ld != rd ) - spin_unlock(&rd->event_lock); -@@ -488,7 +507,7 @@ static long evtchn_bind_pirq(evtchn_bind - struct domain *d = current->domain; - struct vcpu *v = d->vcpu[0]; - struct pirq *info; -- int port, pirq = bind->pirq; -+ int port = 0, pirq = bind->pirq; - long rc; - - if ( (pirq < 0) || (pirq >= d->nr_pirqs) ) -@@ -536,6 +555,7 @@ static long evtchn_bind_pirq(evtchn_bind - arch_evtchn_bind_pirq(d, pirq); - - out: -+ check_free_port(d, port); - spin_unlock(&d->event_lock); - - return rc; -@@ -1011,10 +1031,10 @@ int evtchn_unmask(unsigned int port) - return 0; - } - -- - int evtchn_reset(struct domain *d) - { - unsigned int i; -+ int rc = 0; - - if ( d != current->domain && !d->controller_pause_count ) - return -EINVAL; -@@ -1024,7 +1044,9 @@ int evtchn_reset(struct domain *d) - - spin_lock(&d->event_lock); - -- if ( d->evtchn_fifo ) -+ if ( d->active_evtchns > d->xen_evtchns ) -+ rc = -EAGAIN; -+ else if ( d->evtchn_fifo ) - { - /* Switching back to 2-level ABI. */ - evtchn_fifo_destroy(d); -@@ -1033,7 +1055,7 @@ int evtchn_reset(struct domain *d) - - spin_unlock(&d->event_lock); - -- return 0; -+ return rc; - } - - static long evtchn_set_priority(const struct evtchn_set_priority *set_priority) -@@ -1219,10 +1241,9 @@ int alloc_unbound_xen_event_channel( - - spin_lock(&ld->event_lock); - -- rc = get_free_port(ld); -+ port = rc = get_free_port(ld); - if ( rc < 0 ) - goto out; -- port = rc; - chn = evtchn_from_port(ld, port); - - rc = xsm_evtchn_unbound(XSM_TARGET, ld, chn, remote_domid); -@@ -1238,7 +1259,10 @@ int alloc_unbound_xen_event_channel( - - spin_unlock(&chn->lock); - -+ write_atomic(&ld->xen_evtchns, ld->xen_evtchns + 1); -+ - out: -+ check_free_port(ld, port); - spin_unlock(&ld->event_lock); - - return rc < 0 ? rc : port; -@@ -1314,6 +1338,7 @@ int evtchn_init(struct domain *d, unsign - return -EINVAL; - } - evtchn_from_port(d, 0)->state = ECS_RESERVED; -+ write_atomic(&d->active_evtchns, 0); - - #if MAX_VIRT_CPUS > BITS_PER_LONG - d->poll_mask = xzalloc_array(unsigned long, BITS_TO_LONGS(d->max_vcpus)); -@@ -1340,6 +1365,8 @@ void evtchn_destroy(struct domain *d) - for ( i = 0; port_is_valid(d, i); i++ ) - evtchn_close(d, i, 0); - -+ ASSERT(!d->active_evtchns); -+ - clear_global_virq_handlers(d); - - evtchn_fifo_destroy(d); ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -361,6 +361,16 @@ struct domain - struct evtchn **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */ - unsigned int max_evtchn_port; /* max permitted port number */ - unsigned int valid_evtchns; /* number of allocated event channels */ -+ /* -+ * Number of in-use event channels. Writers should use write_atomic(). -+ * Readers need to use read_atomic() only when not holding event_lock. -+ */ -+ unsigned int active_evtchns; -+ /* -+ * Number of event channels used internally by Xen (not subject to -+ * EVTCHNOP_reset). Read/write access like for active_evtchns. -+ */ -+ unsigned int xen_evtchns; - spinlock_t event_lock; - const struct evtchn_port_ops *evtchn_port_ops; - struct evtchn_fifo_domain *evtchn_fifo; diff --git a/xsa343-2.patch b/xsa343-2.patch deleted file mode 100644 index b8eb499..0000000 --- a/xsa343-2.patch +++ /dev/null @@ -1,295 +0,0 @@ -From: Jan Beulich -Subject: evtchn: convert per-channel lock to be IRQ-safe - -... in order for send_guest_{global,vcpu}_virq() to be able to make use -of it. - -This is part of XSA-343. - -Signed-off-by: Jan Beulich -Acked-by: Julien Grall ---- -v6: New. ---- -TBD: This is the "dumb" conversion variant. In a couple of cases the - slightly simpler spin_{,un}lock_irq() could apparently be used. - ---- a/xen/common/event_channel.c -+++ b/xen/common/event_channel.c -@@ -248,6 +248,7 @@ static long evtchn_alloc_unbound(evtchn_ - int port; - domid_t dom = alloc->dom; - long rc; -+ unsigned long flags; - - d = rcu_lock_domain_by_any_id(dom); - if ( d == NULL ) -@@ -263,14 +264,14 @@ static long evtchn_alloc_unbound(evtchn_ - if ( rc ) - goto out; - -- spin_lock(&chn->lock); -+ spin_lock_irqsave(&chn->lock, flags); - - chn->state = ECS_UNBOUND; - if ( (chn->u.unbound.remote_domid = alloc->remote_dom) == DOMID_SELF ) - chn->u.unbound.remote_domid = current->domain->domain_id; - evtchn_port_init(d, chn); - -- spin_unlock(&chn->lock); -+ spin_unlock_irqrestore(&chn->lock, flags); - - alloc->port = port; - -@@ -283,26 +284,32 @@ static long evtchn_alloc_unbound(evtchn_ - } - - --static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn) -+static unsigned long double_evtchn_lock(struct evtchn *lchn, -+ struct evtchn *rchn) - { -- if ( lchn < rchn ) -+ unsigned long flags; -+ -+ if ( lchn <= rchn ) - { -- spin_lock(&lchn->lock); -- spin_lock(&rchn->lock); -+ spin_lock_irqsave(&lchn->lock, flags); -+ if ( lchn != rchn ) -+ spin_lock(&rchn->lock); - } - else - { -- if ( lchn != rchn ) -- spin_lock(&rchn->lock); -+ spin_lock_irqsave(&rchn->lock, flags); - spin_lock(&lchn->lock); - } -+ -+ return flags; - } - --static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn) -+static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn, -+ unsigned long flags) - { -- spin_unlock(&lchn->lock); - if ( lchn != rchn ) -- spin_unlock(&rchn->lock); -+ spin_unlock(&lchn->lock); -+ spin_unlock_irqrestore(&rchn->lock, flags); - } - - static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) -@@ -312,6 +319,7 @@ static long evtchn_bind_interdomain(evtc - int lport, rport = bind->remote_port; - domid_t rdom = bind->remote_dom; - long rc; -+ unsigned long flags; - - if ( rdom == DOMID_SELF ) - rdom = current->domain->domain_id; -@@ -347,7 +355,7 @@ static long evtchn_bind_interdomain(evtc - if ( rc ) - goto out; - -- double_evtchn_lock(lchn, rchn); -+ flags = double_evtchn_lock(lchn, rchn); - - lchn->u.interdomain.remote_dom = rd; - lchn->u.interdomain.remote_port = rport; -@@ -364,7 +372,7 @@ static long evtchn_bind_interdomain(evtc - */ - evtchn_port_set_pending(ld, lchn->notify_vcpu_id, lchn); - -- double_evtchn_unlock(lchn, rchn); -+ double_evtchn_unlock(lchn, rchn, flags); - - bind->local_port = lport; - -@@ -387,6 +395,7 @@ int evtchn_bind_virq(evtchn_bind_virq_t - struct domain *d = current->domain; - int virq = bind->virq, vcpu = bind->vcpu; - int rc = 0; -+ unsigned long flags; - - if ( (virq < 0) || (virq >= ARRAY_SIZE(v->virq_to_evtchn)) ) - return -EINVAL; -@@ -424,14 +433,14 @@ int evtchn_bind_virq(evtchn_bind_virq_t - - chn = evtchn_from_port(d, port); - -- spin_lock(&chn->lock); -+ spin_lock_irqsave(&chn->lock, flags); - - chn->state = ECS_VIRQ; - chn->notify_vcpu_id = vcpu; - chn->u.virq = virq; - evtchn_port_init(d, chn); - -- spin_unlock(&chn->lock); -+ spin_unlock_irqrestore(&chn->lock, flags); - - v->virq_to_evtchn[virq] = bind->port = port; - -@@ -448,6 +457,7 @@ static long evtchn_bind_ipi(evtchn_bind_ - struct domain *d = current->domain; - int port, vcpu = bind->vcpu; - long rc = 0; -+ unsigned long flags; - - if ( domain_vcpu(d, vcpu) == NULL ) - return -ENOENT; -@@ -459,13 +469,13 @@ static long evtchn_bind_ipi(evtchn_bind_ - - chn = evtchn_from_port(d, port); - -- spin_lock(&chn->lock); -+ spin_lock_irqsave(&chn->lock, flags); - - chn->state = ECS_IPI; - chn->notify_vcpu_id = vcpu; - evtchn_port_init(d, chn); - -- spin_unlock(&chn->lock); -+ spin_unlock_irqrestore(&chn->lock, flags); - - bind->port = port; - -@@ -509,6 +519,7 @@ static long evtchn_bind_pirq(evtchn_bind - struct pirq *info; - int port = 0, pirq = bind->pirq; - long rc; -+ unsigned long flags; - - if ( (pirq < 0) || (pirq >= d->nr_pirqs) ) - return -EINVAL; -@@ -541,14 +552,14 @@ static long evtchn_bind_pirq(evtchn_bind - goto out; - } - -- spin_lock(&chn->lock); -+ spin_lock_irqsave(&chn->lock, flags); - - chn->state = ECS_PIRQ; - chn->u.pirq.irq = pirq; - link_pirq_port(port, chn, v); - evtchn_port_init(d, chn); - -- spin_unlock(&chn->lock); -+ spin_unlock_irqrestore(&chn->lock, flags); - - bind->port = port; - -@@ -569,6 +580,7 @@ int evtchn_close(struct domain *d1, int - struct evtchn *chn1, *chn2; - int port2; - long rc = 0; -+ unsigned long flags; - - again: - spin_lock(&d1->event_lock); -@@ -668,14 +680,14 @@ int evtchn_close(struct domain *d1, int - BUG_ON(chn2->state != ECS_INTERDOMAIN); - BUG_ON(chn2->u.interdomain.remote_dom != d1); - -- double_evtchn_lock(chn1, chn2); -+ flags = double_evtchn_lock(chn1, chn2); - - evtchn_free(d1, chn1); - - chn2->state = ECS_UNBOUND; - chn2->u.unbound.remote_domid = d1->domain_id; - -- double_evtchn_unlock(chn1, chn2); -+ double_evtchn_unlock(chn1, chn2, flags); - - goto out; - -@@ -683,9 +695,9 @@ int evtchn_close(struct domain *d1, int - BUG(); - } - -- spin_lock(&chn1->lock); -+ spin_lock_irqsave(&chn1->lock, flags); - evtchn_free(d1, chn1); -- spin_unlock(&chn1->lock); -+ spin_unlock_irqrestore(&chn1->lock, flags); - - out: - if ( d2 != NULL ) -@@ -705,13 +717,14 @@ int evtchn_send(struct domain *ld, unsig - struct evtchn *lchn, *rchn; - struct domain *rd; - int rport, ret = 0; -+ unsigned long flags; - - if ( !port_is_valid(ld, lport) ) - return -EINVAL; - - lchn = evtchn_from_port(ld, lport); - -- spin_lock(&lchn->lock); -+ spin_lock_irqsave(&lchn->lock, flags); - - /* Guest cannot send via a Xen-attached event channel. */ - if ( unlikely(consumer_is_xen(lchn)) ) -@@ -746,7 +759,7 @@ int evtchn_send(struct domain *ld, unsig - } - - out: -- spin_unlock(&lchn->lock); -+ spin_unlock_irqrestore(&lchn->lock, flags); - - return ret; - } -@@ -1238,6 +1251,7 @@ int alloc_unbound_xen_event_channel( - { - struct evtchn *chn; - int port, rc; -+ unsigned long flags; - - spin_lock(&ld->event_lock); - -@@ -1250,14 +1264,14 @@ int alloc_unbound_xen_event_channel( - if ( rc ) - goto out; - -- spin_lock(&chn->lock); -+ spin_lock_irqsave(&chn->lock, flags); - - chn->state = ECS_UNBOUND; - chn->xen_consumer = get_xen_consumer(notification_fn); - chn->notify_vcpu_id = lvcpu; - chn->u.unbound.remote_domid = remote_domid; - -- spin_unlock(&chn->lock); -+ spin_unlock_irqrestore(&chn->lock, flags); - - write_atomic(&ld->xen_evtchns, ld->xen_evtchns + 1); - -@@ -1280,11 +1294,12 @@ void notify_via_xen_event_channel(struct - { - struct evtchn *lchn, *rchn; - struct domain *rd; -+ unsigned long flags; - - ASSERT(port_is_valid(ld, lport)); - lchn = evtchn_from_port(ld, lport); - -- spin_lock(&lchn->lock); -+ spin_lock_irqsave(&lchn->lock, flags); - - if ( likely(lchn->state == ECS_INTERDOMAIN) ) - { -@@ -1294,7 +1309,7 @@ void notify_via_xen_event_channel(struct - evtchn_port_set_pending(rd, rchn->notify_vcpu_id, rchn); - } - -- spin_unlock(&lchn->lock); -+ spin_unlock_irqrestore(&lchn->lock, flags); - } - - void evtchn_check_pollers(struct domain *d, unsigned int port) diff --git a/xsa343-3.patch b/xsa343-3.patch deleted file mode 100644 index e513e30..0000000 --- a/xsa343-3.patch +++ /dev/null @@ -1,392 +0,0 @@ -From: Jan Beulich -Subject: evtchn: address races with evtchn_reset() - -Neither d->evtchn_port_ops nor max_evtchns(d) may be used in an entirely -lock-less manner, as both may change by a racing evtchn_reset(). In the -common case, at least one of the domain's event lock or the per-channel -lock needs to be held. In the specific case of the inter-domain sending -by evtchn_send() and notify_via_xen_event_channel() holding the other -side's per-channel lock is sufficient, as the channel can't change state -without both per-channel locks held. Without such a channel changing -state, evtchn_reset() can't complete successfully. - -Lock-free accesses continue to be permitted for the shim (calling some -otherwise internal event channel functions), as this happens while the -domain is in effectively single-threaded mode. Special care also needs -taking for the shim's marking of in-use ports as ECS_RESERVED (allowing -use of such ports in the shim case is okay because switching into and -hence also out of FIFO mode is impossihble there). - -As a side effect, certain operations on Xen bound event channels which -were mistakenly permitted so far (e.g. unmask or poll) will be refused -now. - -This is part of XSA-343. - -Reported-by: Julien Grall -Signed-off-by: Jan Beulich -Acked-by: Julien Grall ---- -v9: Add arch_evtchn_is_special() to fix PV shim. -v8: Add BUILD_BUG_ON() in evtchn_usable(). -v7: Add locking related comment ahead of struct evtchn_port_ops. -v6: New. ---- -TBD: I've been considering to move some of the wrappers from xen/event.h - into event_channel.c (or even drop them altogether), when they - require external locking (e.g. evtchn_port_init() or - evtchn_port_set_priority()). Does anyone have a strong opinion - either way? - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -2488,14 +2488,24 @@ static void dump_irqs(unsigned char key) - - for ( i = 0; i < action->nr_guests; ) - { -+ struct evtchn *evtchn; -+ unsigned int pending = 2, masked = 2; -+ - d = action->guest[i++]; - pirq = domain_irq_to_pirq(d, irq); - info = pirq_info(d, pirq); -+ evtchn = evtchn_from_port(d, info->evtchn); -+ local_irq_disable(); -+ if ( spin_trylock(&evtchn->lock) ) -+ { -+ pending = evtchn_is_pending(d, evtchn); -+ masked = evtchn_is_masked(d, evtchn); -+ spin_unlock(&evtchn->lock); -+ } -+ local_irq_enable(); - printk("d%d:%3d(%c%c%c)%c", -- d->domain_id, pirq, -- evtchn_port_is_pending(d, info->evtchn) ? 'P' : '-', -- evtchn_port_is_masked(d, info->evtchn) ? 'M' : '-', -- info->masked ? 'M' : '-', -+ d->domain_id, pirq, "-P?"[pending], -+ "-M?"[masked], info->masked ? 'M' : '-', - i < action->nr_guests ? ',' : '\n'); - } - } ---- a/xen/arch/x86/pv/shim.c -+++ b/xen/arch/x86/pv/shim.c -@@ -660,8 +660,11 @@ void pv_shim_inject_evtchn(unsigned int - if ( port_is_valid(guest, port) ) - { - struct evtchn *chn = evtchn_from_port(guest, port); -+ unsigned long flags; - -+ spin_lock_irqsave(&chn->lock, flags); - evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn); -+ spin_unlock_irqrestore(&chn->lock, flags); - } - } - ---- a/xen/common/event_2l.c -+++ b/xen/common/event_2l.c -@@ -63,8 +63,10 @@ static void evtchn_2l_unmask(struct doma - } - } - --static bool evtchn_2l_is_pending(const struct domain *d, evtchn_port_t port) -+static bool evtchn_2l_is_pending(const struct domain *d, -+ const struct evtchn *evtchn) - { -+ evtchn_port_t port = evtchn->port; - unsigned int max_ports = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d); - - ASSERT(port < max_ports); -@@ -72,8 +74,10 @@ static bool evtchn_2l_is_pending(const s - guest_test_bit(d, port, &shared_info(d, evtchn_pending))); - } - --static bool evtchn_2l_is_masked(const struct domain *d, evtchn_port_t port) -+static bool evtchn_2l_is_masked(const struct domain *d, -+ const struct evtchn *evtchn) - { -+ evtchn_port_t port = evtchn->port; - unsigned int max_ports = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d); - - ASSERT(port < max_ports); ---- a/xen/common/event_channel.c -+++ b/xen/common/event_channel.c -@@ -156,8 +156,9 @@ int evtchn_allocate_port(struct domain * - - if ( port_is_valid(d, port) ) - { -- if ( evtchn_from_port(d, port)->state != ECS_FREE || -- evtchn_port_is_busy(d, port) ) -+ const struct evtchn *chn = evtchn_from_port(d, port); -+ -+ if ( chn->state != ECS_FREE || evtchn_is_busy(d, chn) ) - return -EBUSY; - } - else -@@ -774,6 +775,7 @@ void send_guest_vcpu_virq(struct vcpu *v - unsigned long flags; - int port; - struct domain *d; -+ struct evtchn *chn; - - ASSERT(!virq_is_global(virq)); - -@@ -784,7 +786,10 @@ void send_guest_vcpu_virq(struct vcpu *v - goto out; - - d = v->domain; -- evtchn_port_set_pending(d, v->vcpu_id, evtchn_from_port(d, port)); -+ chn = evtchn_from_port(d, port); -+ spin_lock(&chn->lock); -+ evtchn_port_set_pending(d, v->vcpu_id, chn); -+ spin_unlock(&chn->lock); - - out: - spin_unlock_irqrestore(&v->virq_lock, flags); -@@ -813,7 +818,9 @@ void send_guest_global_virq(struct domai - goto out; - - chn = evtchn_from_port(d, port); -+ spin_lock(&chn->lock); - evtchn_port_set_pending(d, chn->notify_vcpu_id, chn); -+ spin_unlock(&chn->lock); - - out: - spin_unlock_irqrestore(&v->virq_lock, flags); -@@ -823,6 +830,7 @@ void send_guest_pirq(struct domain *d, c - { - int port; - struct evtchn *chn; -+ unsigned long flags; - - /* - * PV guests: It should not be possible to race with __evtchn_close(). The -@@ -837,7 +845,9 @@ void send_guest_pirq(struct domain *d, c - } - - chn = evtchn_from_port(d, port); -+ spin_lock_irqsave(&chn->lock, flags); - evtchn_port_set_pending(d, chn->notify_vcpu_id, chn); -+ spin_unlock_irqrestore(&chn->lock, flags); - } - - static struct domain *global_virq_handlers[NR_VIRQS] __read_mostly; -@@ -1034,12 +1044,15 @@ int evtchn_unmask(unsigned int port) - { - struct domain *d = current->domain; - struct evtchn *evtchn; -+ unsigned long flags; - - if ( unlikely(!port_is_valid(d, port)) ) - return -EINVAL; - - evtchn = evtchn_from_port(d, port); -+ spin_lock_irqsave(&evtchn->lock, flags); - evtchn_port_unmask(d, evtchn); -+ spin_unlock_irqrestore(&evtchn->lock, flags); - - return 0; - } -@@ -1449,8 +1462,8 @@ static void domain_dump_evtchn_info(stru - - printk(" %4u [%d/%d/", - port, -- evtchn_port_is_pending(d, port), -- evtchn_port_is_masked(d, port)); -+ evtchn_is_pending(d, chn), -+ evtchn_is_masked(d, chn)); - evtchn_port_print_state(d, chn); - printk("]: s=%d n=%d x=%d", - chn->state, chn->notify_vcpu_id, chn->xen_consumer); ---- a/xen/common/event_fifo.c -+++ b/xen/common/event_fifo.c -@@ -296,23 +296,26 @@ static void evtchn_fifo_unmask(struct do - evtchn_fifo_set_pending(v, evtchn); - } - --static bool evtchn_fifo_is_pending(const struct domain *d, evtchn_port_t port) -+static bool evtchn_fifo_is_pending(const struct domain *d, -+ const struct evtchn *evtchn) - { -- const event_word_t *word = evtchn_fifo_word_from_port(d, port); -+ const event_word_t *word = evtchn_fifo_word_from_port(d, evtchn->port); - - return word && guest_test_bit(d, EVTCHN_FIFO_PENDING, word); - } - --static bool_t evtchn_fifo_is_masked(const struct domain *d, evtchn_port_t port) -+static bool_t evtchn_fifo_is_masked(const struct domain *d, -+ const struct evtchn *evtchn) - { -- const event_word_t *word = evtchn_fifo_word_from_port(d, port); -+ const event_word_t *word = evtchn_fifo_word_from_port(d, evtchn->port); - - return !word || guest_test_bit(d, EVTCHN_FIFO_MASKED, word); - } - --static bool_t evtchn_fifo_is_busy(const struct domain *d, evtchn_port_t port) -+static bool_t evtchn_fifo_is_busy(const struct domain *d, -+ const struct evtchn *evtchn) - { -- const event_word_t *word = evtchn_fifo_word_from_port(d, port); -+ const event_word_t *word = evtchn_fifo_word_from_port(d, evtchn->port); - - return word && guest_test_bit(d, EVTCHN_FIFO_LINKED, word); - } ---- a/xen/include/asm-x86/event.h -+++ b/xen/include/asm-x86/event.h -@@ -47,4 +47,10 @@ static inline bool arch_virq_is_global(u - return true; - } - -+#ifdef CONFIG_PV_SHIM -+# include -+# define arch_evtchn_is_special(chn) \ -+ (pv_shim && (chn)->port && (chn)->state == ECS_RESERVED) -+#endif -+ - #endif ---- a/xen/include/xen/event.h -+++ b/xen/include/xen/event.h -@@ -133,6 +133,24 @@ static inline struct evtchn *evtchn_from - return bucket_from_port(d, p) + (p % EVTCHNS_PER_BUCKET); - } - -+/* -+ * "usable" as in "by a guest", i.e. Xen consumed channels are assumed to be -+ * taken care of separately where used for Xen's internal purposes. -+ */ -+static bool evtchn_usable(const struct evtchn *evtchn) -+{ -+ if ( evtchn->xen_consumer ) -+ return false; -+ -+#ifdef arch_evtchn_is_special -+ if ( arch_evtchn_is_special(evtchn) ) -+ return true; -+#endif -+ -+ BUILD_BUG_ON(ECS_FREE > ECS_RESERVED); -+ return evtchn->state > ECS_RESERVED; -+} -+ - /* Wait on a Xen-attached event channel. */ - #define wait_on_xen_event_channel(port, condition) \ - do { \ -@@ -165,19 +183,24 @@ int evtchn_reset(struct domain *d); - - /* - * Low-level event channel port ops. -+ * -+ * All hooks have to be called with a lock held which prevents the channel -+ * from changing state. This may be the domain event lock, the per-channel -+ * lock, or in the case of sending interdomain events also the other side's -+ * per-channel lock. Exceptions apply in certain cases for the PV shim. - */ - struct evtchn_port_ops { - void (*init)(struct domain *d, struct evtchn *evtchn); - void (*set_pending)(struct vcpu *v, struct evtchn *evtchn); - void (*clear_pending)(struct domain *d, struct evtchn *evtchn); - void (*unmask)(struct domain *d, struct evtchn *evtchn); -- bool (*is_pending)(const struct domain *d, evtchn_port_t port); -- bool (*is_masked)(const struct domain *d, evtchn_port_t port); -+ bool (*is_pending)(const struct domain *d, const struct evtchn *evtchn); -+ bool (*is_masked)(const struct domain *d, const struct evtchn *evtchn); - /* - * Is the port unavailable because it's still being cleaned up - * after being closed? - */ -- bool (*is_busy)(const struct domain *d, evtchn_port_t port); -+ bool (*is_busy)(const struct domain *d, const struct evtchn *evtchn); - int (*set_priority)(struct domain *d, struct evtchn *evtchn, - unsigned int priority); - void (*print_state)(struct domain *d, const struct evtchn *evtchn); -@@ -193,38 +216,67 @@ static inline void evtchn_port_set_pendi - unsigned int vcpu_id, - struct evtchn *evtchn) - { -- d->evtchn_port_ops->set_pending(d->vcpu[vcpu_id], evtchn); -+ if ( evtchn_usable(evtchn) ) -+ d->evtchn_port_ops->set_pending(d->vcpu[vcpu_id], evtchn); - } - - static inline void evtchn_port_clear_pending(struct domain *d, - struct evtchn *evtchn) - { -- d->evtchn_port_ops->clear_pending(d, evtchn); -+ if ( evtchn_usable(evtchn) ) -+ d->evtchn_port_ops->clear_pending(d, evtchn); - } - - static inline void evtchn_port_unmask(struct domain *d, - struct evtchn *evtchn) - { -- d->evtchn_port_ops->unmask(d, evtchn); -+ if ( evtchn_usable(evtchn) ) -+ d->evtchn_port_ops->unmask(d, evtchn); - } - --static inline bool evtchn_port_is_pending(const struct domain *d, -- evtchn_port_t port) -+static inline bool evtchn_is_pending(const struct domain *d, -+ const struct evtchn *evtchn) - { -- return d->evtchn_port_ops->is_pending(d, port); -+ return evtchn_usable(evtchn) && d->evtchn_port_ops->is_pending(d, evtchn); - } - --static inline bool evtchn_port_is_masked(const struct domain *d, -- evtchn_port_t port) -+static inline bool evtchn_port_is_pending(struct domain *d, evtchn_port_t port) - { -- return d->evtchn_port_ops->is_masked(d, port); -+ struct evtchn *evtchn = evtchn_from_port(d, port); -+ bool rc; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&evtchn->lock, flags); -+ rc = evtchn_is_pending(d, evtchn); -+ spin_unlock_irqrestore(&evtchn->lock, flags); -+ -+ return rc; -+} -+ -+static inline bool evtchn_is_masked(const struct domain *d, -+ const struct evtchn *evtchn) -+{ -+ return !evtchn_usable(evtchn) || d->evtchn_port_ops->is_masked(d, evtchn); -+} -+ -+static inline bool evtchn_port_is_masked(struct domain *d, evtchn_port_t port) -+{ -+ struct evtchn *evtchn = evtchn_from_port(d, port); -+ bool rc; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&evtchn->lock, flags); -+ rc = evtchn_is_masked(d, evtchn); -+ spin_unlock_irqrestore(&evtchn->lock, flags); -+ -+ return rc; - } - --static inline bool evtchn_port_is_busy(const struct domain *d, -- evtchn_port_t port) -+static inline bool evtchn_is_busy(const struct domain *d, -+ const struct evtchn *evtchn) - { - return d->evtchn_port_ops->is_busy && -- d->evtchn_port_ops->is_busy(d, port); -+ d->evtchn_port_ops->is_busy(d, evtchn); - } - - static inline int evtchn_port_set_priority(struct domain *d, -@@ -233,6 +285,8 @@ static inline int evtchn_port_set_priori - { - if ( !d->evtchn_port_ops->set_priority ) - return -ENOSYS; -+ if ( !evtchn_usable(evtchn) ) -+ return -EACCES; - return d->evtchn_port_ops->set_priority(d, evtchn, priority); - } - diff --git a/xsa344-1.patch b/xsa344-1.patch deleted file mode 100644 index 3834075..0000000 --- a/xsa344-1.patch +++ /dev/null @@ -1,137 +0,0 @@ -From: Jan Beulich -Subject: evtchn: arrange for preemption in evtchn_destroy() - -Especially closing of fully established interdomain channels can take -quite some time, due to the locking involved. Therefore we shouldn't -assume we can clean up still active ports all in one go. Besides adding -the necessary preemption check, also avoid pointlessly starting from -(or now really ending at) 0; 1 is the lowest numbered port which may -need closing. - -Since we're now reducing ->valid_evtchns, free_xen_event_channel(), -and (at least to be on the safe side) notify_via_xen_event_channel() -need to cope with attempts to close / unbind from / send through already -closed (and no longer valid, as per port_is_valid()) ports. - -This is part of XSA-344. - -Signed-off-by: Jan Beulich -Acked-by: Julien Grall -Reviewed-by: Stefano Stabellini ---- -v8: Drop ->active->evtchns part of main loop condition. Drop BUG_ON() - conversion in evtchn_close(). -v7: Comment the barriers added in v6. -v6: Add barriers ahead of new ->is_dying checks. -v5: Also adjust BUG_ON() in evtchn_close(). -v4: New. - ---- a/xen/common/domain.c -+++ b/xen/common/domain.c -@@ -715,12 +715,14 @@ int domain_kill(struct domain *d) - return domain_kill(d); - d->is_dying = DOMDYING_dying; - argo_destroy(d); -- evtchn_destroy(d); - gnttab_release_mappings(d); - vnuma_destroy(d->vnuma); - domain_set_outstanding_pages(d, 0); - /* fallthrough */ - case DOMDYING_dying: -+ rc = evtchn_destroy(d); -+ if ( rc ) -+ break; - rc = domain_relinquish_resources(d); - if ( rc != 0 ) - break; ---- a/xen/common/event_channel.c -+++ b/xen/common/event_channel.c -@@ -1297,7 +1297,16 @@ int alloc_unbound_xen_event_channel( - - void free_xen_event_channel(struct domain *d, int port) - { -- BUG_ON(!port_is_valid(d, port)); -+ if ( !port_is_valid(d, port) ) -+ { -+ /* -+ * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing -+ * with the spin_barrier() and BUG_ON() in evtchn_destroy(). -+ */ -+ smp_rmb(); -+ BUG_ON(!d->is_dying); -+ return; -+ } - - evtchn_close(d, port, 0); - } -@@ -1309,7 +1318,17 @@ void notify_via_xen_event_channel(struct - struct domain *rd; - unsigned long flags; - -- ASSERT(port_is_valid(ld, lport)); -+ if ( !port_is_valid(ld, lport) ) -+ { -+ /* -+ * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing -+ * with the spin_barrier() and BUG_ON() in evtchn_destroy(). -+ */ -+ smp_rmb(); -+ ASSERT(ld->is_dying); -+ return; -+ } -+ - lchn = evtchn_from_port(ld, lport); - - spin_lock_irqsave(&lchn->lock, flags); -@@ -1380,8 +1399,7 @@ int evtchn_init(struct domain *d, unsign - return 0; - } - -- --void evtchn_destroy(struct domain *d) -+int evtchn_destroy(struct domain *d) - { - unsigned int i; - -@@ -1390,14 +1408,29 @@ void evtchn_destroy(struct domain *d) - spin_barrier(&d->event_lock); - - /* Close all existing event channels. */ -- for ( i = 0; port_is_valid(d, i); i++ ) -+ for ( i = d->valid_evtchns; --i; ) -+ { - evtchn_close(d, i, 0); - -+ /* -+ * Avoid preempting when called from domain_create()'s error path, -+ * and don't check too often (choice of frequency is arbitrary). -+ */ -+ if ( i && !(i & 0x3f) && d->is_dying != DOMDYING_dead && -+ hypercall_preempt_check() ) -+ { -+ write_atomic(&d->valid_evtchns, i); -+ return -ERESTART; -+ } -+ } -+ - ASSERT(!d->active_evtchns); - - clear_global_virq_handlers(d); - - evtchn_fifo_destroy(d); -+ -+ return 0; - } - - ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -138,7 +138,7 @@ struct evtchn - } __attribute__((aligned(64))); - - int evtchn_init(struct domain *d, unsigned int max_port); --void evtchn_destroy(struct domain *d); /* from domain_kill */ -+int evtchn_destroy(struct domain *d); /* from domain_kill */ - void evtchn_destroy_final(struct domain *d); /* from complete_domain_destroy */ - - struct waitqueue_vcpu; diff --git a/xsa344-2.patch b/xsa344-2.patch deleted file mode 100644 index c03344b..0000000 --- a/xsa344-2.patch +++ /dev/null @@ -1,210 +0,0 @@ -From: Jan Beulich -Subject: evtchn: arrange for preemption in evtchn_reset() - -Like for evtchn_destroy() looping over all possible event channels to -close them can take a significant amount of time. Unlike done there, we -can't alter domain properties (i.e. d->valid_evtchns) here. Borrow, in a -lightweight form, the paging domctl continuation concept, redirecting -the continuations to different sub-ops. Just like there this is to be -able to allow for predictable overall results of the involved sub-ops: -Racing requests should either complete or be refused. - -Note that a domain can't interfere with an already started (by a remote -domain) reset, due to being paused. It can prevent a remote reset from -happening by leaving a reset unfinished, but that's only going to affect -itself. - -This is part of XSA-344. - -Signed-off-by: Jan Beulich -Acked-by: Julien Grall -Reviewed-by: Stefano Stabellini ---- -v9: Undo v7 changes. -v7: Move extension of loop condition in evtchn_reset() here, to match - the earlier patch'es change to evtchn_destroy(). -v6: Also protect the last write of d->next_evtchn. Re-base over changes - to earlier patches. -v4: New. - ---- a/xen/common/domain.c -+++ b/xen/common/domain.c -@@ -1145,7 +1145,7 @@ void domain_unpause_except_self(struct d - domain_unpause(d); - } - --int domain_soft_reset(struct domain *d) -+int domain_soft_reset(struct domain *d, bool resuming) - { - struct vcpu *v; - int rc; -@@ -1159,7 +1159,7 @@ int domain_soft_reset(struct domain *d) - } - spin_unlock(&d->shutdown_lock); - -- rc = evtchn_reset(d); -+ rc = evtchn_reset(d, resuming); - if ( rc ) - return rc; - ---- a/xen/common/domctl.c -+++ b/xen/common/domctl.c -@@ -477,12 +477,22 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe - } - - case XEN_DOMCTL_soft_reset: -+ case XEN_DOMCTL_soft_reset_cont: - if ( d == current->domain ) /* no domain_pause() */ - { - ret = -EINVAL; - break; - } -- ret = domain_soft_reset(d); -+ ret = domain_soft_reset(d, op->cmd == XEN_DOMCTL_soft_reset_cont); -+ if ( ret == -ERESTART ) -+ { -+ op->cmd = XEN_DOMCTL_soft_reset_cont; -+ if ( !__copy_field_to_guest(u_domctl, op, cmd) ) -+ ret = hypercall_create_continuation(__HYPERVISOR_domctl, -+ "h", u_domctl); -+ else -+ ret = -EFAULT; -+ } - break; - - case XEN_DOMCTL_destroydomain: ---- a/xen/common/event_channel.c -+++ b/xen/common/event_channel.c -@@ -1057,7 +1057,7 @@ int evtchn_unmask(unsigned int port) - return 0; - } - --int evtchn_reset(struct domain *d) -+int evtchn_reset(struct domain *d, bool resuming) - { - unsigned int i; - int rc = 0; -@@ -1065,11 +1065,40 @@ int evtchn_reset(struct domain *d) - if ( d != current->domain && !d->controller_pause_count ) - return -EINVAL; - -- for ( i = 0; port_is_valid(d, i); i++ ) -+ spin_lock(&d->event_lock); -+ -+ /* -+ * If we are resuming, then start where we stopped. Otherwise, check -+ * that a reset operation is not already in progress, and if none is, -+ * record that this is now the case. -+ */ -+ i = resuming ? d->next_evtchn : !d->next_evtchn; -+ if ( i > d->next_evtchn ) -+ d->next_evtchn = i; -+ -+ spin_unlock(&d->event_lock); -+ -+ if ( !i ) -+ return -EBUSY; -+ -+ for ( ; port_is_valid(d, i); i++ ) -+ { - evtchn_close(d, i, 1); - -+ /* NB: Choice of frequency is arbitrary. */ -+ if ( !(i & 0x3f) && hypercall_preempt_check() ) -+ { -+ spin_lock(&d->event_lock); -+ d->next_evtchn = i; -+ spin_unlock(&d->event_lock); -+ return -ERESTART; -+ } -+ } -+ - spin_lock(&d->event_lock); - -+ d->next_evtchn = 0; -+ - if ( d->active_evtchns > d->xen_evtchns ) - rc = -EAGAIN; - else if ( d->evtchn_fifo ) -@@ -1204,7 +1233,8 @@ long do_event_channel_op(int cmd, XEN_GU - break; - } - -- case EVTCHNOP_reset: { -+ case EVTCHNOP_reset: -+ case EVTCHNOP_reset_cont: { - struct evtchn_reset reset; - struct domain *d; - -@@ -1217,9 +1247,13 @@ long do_event_channel_op(int cmd, XEN_GU - - rc = xsm_evtchn_reset(XSM_TARGET, current->domain, d); - if ( !rc ) -- rc = evtchn_reset(d); -+ rc = evtchn_reset(d, cmd == EVTCHNOP_reset_cont); - - rcu_unlock_domain(d); -+ -+ if ( rc == -ERESTART ) -+ rc = hypercall_create_continuation(__HYPERVISOR_event_channel_op, -+ "ih", EVTCHNOP_reset_cont, arg); - break; - } - ---- a/xen/include/public/domctl.h -+++ b/xen/include/public/domctl.h -@@ -1159,7 +1159,10 @@ struct xen_domctl { - #define XEN_DOMCTL_iomem_permission 20 - #define XEN_DOMCTL_ioport_permission 21 - #define XEN_DOMCTL_hypercall_init 22 --#define XEN_DOMCTL_arch_setup 23 /* Obsolete IA64 only */ -+#ifdef __XEN__ -+/* #define XEN_DOMCTL_arch_setup 23 Obsolete IA64 only */ -+#define XEN_DOMCTL_soft_reset_cont 23 -+#endif - #define XEN_DOMCTL_settimeoffset 24 - #define XEN_DOMCTL_getvcpuaffinity 25 - #define XEN_DOMCTL_real_mode_area 26 /* Obsolete PPC only */ ---- a/xen/include/public/event_channel.h -+++ b/xen/include/public/event_channel.h -@@ -74,6 +74,9 @@ - #define EVTCHNOP_init_control 11 - #define EVTCHNOP_expand_array 12 - #define EVTCHNOP_set_priority 13 -+#ifdef __XEN__ -+#define EVTCHNOP_reset_cont 14 -+#endif - /* ` } */ - - typedef uint32_t evtchn_port_t; ---- a/xen/include/xen/event.h -+++ b/xen/include/xen/event.h -@@ -171,7 +171,7 @@ void evtchn_check_pollers(struct domain - void evtchn_2l_init(struct domain *d); - - /* Close all event channels and reset to 2-level ABI. */ --int evtchn_reset(struct domain *d); -+int evtchn_reset(struct domain *d, bool resuming); - - /* - * Low-level event channel port ops. ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -371,6 +371,8 @@ struct domain - * EVTCHNOP_reset). Read/write access like for active_evtchns. - */ - unsigned int xen_evtchns; -+ /* Port to resume from in evtchn_reset(), when in a continuation. */ -+ unsigned int next_evtchn; - spinlock_t event_lock; - const struct evtchn_port_ops *evtchn_port_ops; - struct evtchn_fifo_domain *evtchn_fifo; -@@ -663,7 +665,7 @@ int domain_kill(struct domain *d); - int domain_shutdown(struct domain *d, u8 reason); - void domain_resume(struct domain *d); - --int domain_soft_reset(struct domain *d); -+int domain_soft_reset(struct domain *d, bool resuming); - - int vcpu_start_shutdown_deferral(struct vcpu *v); - void vcpu_end_shutdown_deferral(struct vcpu *v); diff --git a/xsa345-4.14-0001-x86-mm-Refactor-map_pages_to_xen-to-have-only-a-sing.patch b/xsa345-4.14-0001-x86-mm-Refactor-map_pages_to_xen-to-have-only-a-sing.patch deleted file mode 100644 index 847c34a..0000000 --- a/xsa345-4.14-0001-x86-mm-Refactor-map_pages_to_xen-to-have-only-a-sing.patch +++ /dev/null @@ -1,94 +0,0 @@ -From e9c5a9ee5e2e888f8bb05cf0a353ed635300abe3 Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Sat, 11 Jan 2020 21:57:41 +0000 -Subject: [PATCH 1/3] x86/mm: Refactor map_pages_to_xen to have only a single - exit path - -We will soon need to perform clean-ups before returning. - -No functional change. - -This is part of XSA-345. - -Reported-by: Hongyan Xia -Signed-off-by: Wei Liu -Signed-off-by: Hongyan Xia -Signed-off-by: George Dunlap -Acked-by: Jan Beulich ---- - xen/arch/x86/mm.c | 17 +++++++++++------ - 1 file changed, 11 insertions(+), 6 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 82bc676553..03f6e6aa62 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -5088,6 +5088,7 @@ int map_pages_to_xen( - l2_pgentry_t *pl2e, ol2e; - l1_pgentry_t *pl1e, ol1e; - unsigned int i; -+ int rc = -ENOMEM; - - #define flush_flags(oldf) do { \ - unsigned int o_ = (oldf); \ -@@ -5108,7 +5109,8 @@ int map_pages_to_xen( - l3_pgentry_t ol3e, *pl3e = virt_to_xen_l3e(virt); - - if ( !pl3e ) -- return -ENOMEM; -+ goto out; -+ - ol3e = *pl3e; - - if ( cpu_has_page1gb && -@@ -5198,7 +5200,7 @@ int map_pages_to_xen( - - l2t = alloc_xen_pagetable(); - if ( l2t == NULL ) -- return -ENOMEM; -+ goto out; - - for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) - l2e_write(l2t + i, -@@ -5227,7 +5229,7 @@ int map_pages_to_xen( - - pl2e = virt_to_xen_l2e(virt); - if ( !pl2e ) -- return -ENOMEM; -+ goto out; - - if ( ((((virt >> PAGE_SHIFT) | mfn_x(mfn)) & - ((1u << PAGETABLE_ORDER) - 1)) == 0) && -@@ -5271,7 +5273,7 @@ int map_pages_to_xen( - { - pl1e = virt_to_xen_l1e(virt); - if ( pl1e == NULL ) -- return -ENOMEM; -+ goto out; - } - else if ( l2e_get_flags(*pl2e) & _PAGE_PSE ) - { -@@ -5299,7 +5301,7 @@ int map_pages_to_xen( - - l1t = alloc_xen_pagetable(); - if ( l1t == NULL ) -- return -ENOMEM; -+ goto out; - - for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) - l1e_write(&l1t[i], -@@ -5445,7 +5447,10 @@ int map_pages_to_xen( - - #undef flush_flags - -- return 0; -+ rc = 0; -+ -+ out: -+ return rc; - } - - int populate_pt_range(unsigned long virt, unsigned long nr_mfns) --- -2.25.1 - diff --git a/xsa345-4.14-0002-x86-mm-Refactor-modify_xen_mappings-to-have-one-exit.patch b/xsa345-4.14-0002-x86-mm-Refactor-modify_xen_mappings-to-have-one-exit.patch deleted file mode 100644 index d31af1c..0000000 --- a/xsa345-4.14-0002-x86-mm-Refactor-modify_xen_mappings-to-have-one-exit.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 8645adb7ac679e5ddc5c39e0c5c918e4a2ba5391 Mon Sep 17 00:00:00 2001 -From: Wei Liu -Date: Sat, 11 Jan 2020 21:57:42 +0000 -Subject: [PATCH 2/3] x86/mm: Refactor modify_xen_mappings to have one exit - path - -We will soon need to perform clean-ups before returning. - -No functional change. - -This is part of XSA-345. - -Reported-by: Hongyan Xia -Signed-off-by: Wei Liu -Signed-off-by: Hongyan Xia -Signed-off-by: George Dunlap -Acked-by: Jan Beulich ---- - xen/arch/x86/mm.c | 12 +++++++++--- - 1 file changed, 9 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 03f6e6aa62..2468347a45 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -5477,6 +5477,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - l1_pgentry_t *pl1e; - unsigned int i; - unsigned long v = s; -+ int rc = -ENOMEM; - - /* Set of valid PTE bits which may be altered. */ - #define FLAGS_MASK (_PAGE_NX|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|_PAGE_PRESENT) -@@ -5520,7 +5521,8 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - /* PAGE1GB: shatter the superpage and fall through. */ - l2t = alloc_xen_pagetable(); - if ( !l2t ) -- return -ENOMEM; -+ goto out; -+ - for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) - l2e_write(l2t + i, - l2e_from_pfn(l3e_get_pfn(*pl3e) + -@@ -5577,7 +5579,8 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - /* PSE: shatter the superpage and try again. */ - l1t = alloc_xen_pagetable(); - if ( !l1t ) -- return -ENOMEM; -+ goto out; -+ - for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) - l1e_write(&l1t[i], - l1e_from_pfn(l2e_get_pfn(*pl2e) + i, -@@ -5710,7 +5713,10 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - flush_area(NULL, FLUSH_TLB_GLOBAL); - - #undef FLAGS_MASK -- return 0; -+ rc = 0; -+ -+ out: -+ return rc; - } - - #undef flush_area --- -2.25.1 - diff --git a/xsa345-4.14-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch b/xsa345-4.14-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch deleted file mode 100644 index 2e4dd6f..0000000 --- a/xsa345-4.14-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch +++ /dev/null @@ -1,249 +0,0 @@ -From 6b020418d0554d9ec6eb201f50776a72db67739b Mon Sep 17 00:00:00 2001 -From: Hongyan Xia -Date: Sat, 11 Jan 2020 21:57:43 +0000 -Subject: [PATCH 3/3] x86/mm: Prevent some races in hypervisor mapping updates - -map_pages_to_xen will attempt to coalesce mappings into 2MiB and 1GiB -superpages if possible, to maximize TLB efficiency. This means both -replacing superpage entries with smaller entries, and replacing -smaller entries with superpages. - -Unfortunately, while some potential races are handled correctly, -others are not. These include: - -1. When one processor modifies a sub-superpage mapping while another -processor replaces the entire range with a superpage. - -Take the following example: - -Suppose L3[N] points to L2. And suppose we have two processors, A and -B. - -* A walks the pagetables, get a pointer to L2. -* B replaces L3[N] with a 1GiB mapping. -* B Frees L2 -* A writes L2[M] # - -This is race exacerbated by the fact that virt_to_xen_l[21]e doesn't -handle higher-level superpages properly: If you call virt_xen_to_l2e -on a virtual address within an L3 superpage, you'll either hit a BUG() -(most likely), or get a pointer into the middle of a data page; same -with virt_xen_to_l1 on a virtual address within either an L3 or L2 -superpage. - -So take the following example: - -* A reads pl3e and discovers it to point to an L2. -* B replaces L3[N] with a 1GiB mapping -* A calls virt_to_xen_l2e() and hits the BUG_ON() # - -2. When two processors simultaneously try to replace a sub-superpage -mapping with a superpage mapping. - -Take the following example: - -Suppose L3[N] points to L2. And suppose we have two processors, A and B, -both trying to replace L3[N] with a superpage. - -* A walks the pagetables, get a pointer to pl3e, and takes a copy ol3e pointing to L2. -* B walks the pagetables, gets a pointre to pl3e, and takes a copy ol3e pointing to L2. -* A writes the new value into L3[N] -* B writes the new value into L3[N] -* A recursively frees all the L1's under L2, then frees L2 -* B recursively double-frees all the L1's under L2, then double-frees L2 # - -Fix this by grabbing a lock for the entirety of the mapping update -operation. - -Rather than grabbing map_pgdir_lock for the entire operation, however, -repurpose the PGT_locked bit from L3's page->type_info as a lock. -This means that rather than locking the entire address space, we -"only" lock a single 512GiB chunk of hypervisor address space at a -time. - -There was a proposal for a lock-and-reverify approach, where we walk -the pagetables to the point where we decide what to do; then grab the -map_pgdir_lock, re-verify the information we collected without the -lock, and finally make the change (starting over again if anything had -changed). Without being able to guarantee that the L2 table wasn't -freed, however, that means every read would need to be considered -potentially unsafe. Thinking carefully about that is probably -something that wants to be done on public, not under time pressure. - -This is part of XSA-345. - -Reported-by: Hongyan Xia -Signed-off-by: Hongyan Xia -Signed-off-by: George Dunlap -Reviewed-by: Jan Beulich ---- - xen/arch/x86/mm.c | 92 +++++++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 89 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 2468347a45..9c55b2b9e3 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2088,6 +2088,50 @@ void page_unlock(struct page_info *page) - current_locked_page_set(NULL); - } - -+/* -+ * L3 table locks: -+ * -+ * Used for serialization in map_pages_to_xen() and modify_xen_mappings(). -+ * -+ * For Xen PT pages, the page->u.inuse.type_info is unused and it is safe to -+ * reuse the PGT_locked flag. This lock is taken only when we move down to L3 -+ * tables and below, since L4 (and above, for 5-level paging) is still globally -+ * protected by map_pgdir_lock. -+ * -+ * PV MMU update hypercalls call map_pages_to_xen while holding a page's page_lock(). -+ * This has two implications: -+ * - We cannot reuse reuse current_locked_page_* for debugging -+ * - To avoid the chance of deadlock, even for different pages, we -+ * must never grab page_lock() after grabbing l3t_lock(). This -+ * includes any page_lock()-based locks, such as -+ * mem_sharing_page_lock(). -+ * -+ * Also note that we grab the map_pgdir_lock while holding the -+ * l3t_lock(), so to avoid deadlock we must avoid grabbing them in -+ * reverse order. -+ */ -+static void l3t_lock(struct page_info *page) -+{ -+ unsigned long x, nx; -+ -+ do { -+ while ( (x = page->u.inuse.type_info) & PGT_locked ) -+ cpu_relax(); -+ nx = x | PGT_locked; -+ } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x ); -+} -+ -+static void l3t_unlock(struct page_info *page) -+{ -+ unsigned long x, nx, y = page->u.inuse.type_info; -+ -+ do { -+ x = y; -+ BUG_ON(!(x & PGT_locked)); -+ nx = x & ~PGT_locked; -+ } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x ); -+} -+ - #ifdef CONFIG_PV - /* - * PTE flags that a guest may change without re-validating the PTE. -@@ -5078,6 +5122,23 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v) - flush_area_local((const void *)v, f) : \ - flush_area_all((const void *)v, f)) - -+#define L3T_INIT(page) (page) = ZERO_BLOCK_PTR -+ -+#define L3T_LOCK(page) \ -+ do { \ -+ if ( locking ) \ -+ l3t_lock(page); \ -+ } while ( false ) -+ -+#define L3T_UNLOCK(page) \ -+ do { \ -+ if ( locking && (page) != ZERO_BLOCK_PTR ) \ -+ { \ -+ l3t_unlock(page); \ -+ (page) = ZERO_BLOCK_PTR; \ -+ } \ -+ } while ( false ) -+ - int map_pages_to_xen( - unsigned long virt, - mfn_t mfn, -@@ -5089,6 +5150,7 @@ int map_pages_to_xen( - l1_pgentry_t *pl1e, ol1e; - unsigned int i; - int rc = -ENOMEM; -+ struct page_info *current_l3page; - - #define flush_flags(oldf) do { \ - unsigned int o_ = (oldf); \ -@@ -5104,13 +5166,20 @@ int map_pages_to_xen( - } \ - } while (0) - -+ L3T_INIT(current_l3page); -+ - while ( nr_mfns != 0 ) - { -- l3_pgentry_t ol3e, *pl3e = virt_to_xen_l3e(virt); -+ l3_pgentry_t *pl3e, ol3e; - -+ L3T_UNLOCK(current_l3page); -+ -+ pl3e = virt_to_xen_l3e(virt); - if ( !pl3e ) - goto out; - -+ current_l3page = virt_to_page(pl3e); -+ L3T_LOCK(current_l3page); - ol3e = *pl3e; - - if ( cpu_has_page1gb && -@@ -5450,6 +5519,7 @@ int map_pages_to_xen( - rc = 0; - - out: -+ L3T_UNLOCK(current_l3page); - return rc; - } - -@@ -5478,6 +5548,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - unsigned int i; - unsigned long v = s; - int rc = -ENOMEM; -+ struct page_info *current_l3page; - - /* Set of valid PTE bits which may be altered. */ - #define FLAGS_MASK (_PAGE_NX|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|_PAGE_PRESENT) -@@ -5486,11 +5557,22 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - ASSERT(IS_ALIGNED(s, PAGE_SIZE)); - ASSERT(IS_ALIGNED(e, PAGE_SIZE)); - -+ L3T_INIT(current_l3page); -+ - while ( v < e ) - { -- l3_pgentry_t *pl3e = virt_to_xen_l3e(v); -+ l3_pgentry_t *pl3e; -+ -+ L3T_UNLOCK(current_l3page); - -- if ( !pl3e || !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) -+ pl3e = virt_to_xen_l3e(v); -+ if ( !pl3e ) -+ goto out; -+ -+ current_l3page = virt_to_page(pl3e); -+ L3T_LOCK(current_l3page); -+ -+ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) - { - /* Confirm the caller isn't trying to create new mappings. */ - ASSERT(!(nf & _PAGE_PRESENT)); -@@ -5716,9 +5798,13 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - rc = 0; - - out: -+ L3T_UNLOCK(current_l3page); - return rc; - } - -+#undef L3T_LOCK -+#undef L3T_UNLOCK -+ - #undef flush_area - - int destroy_xen_mappings(unsigned long s, unsigned long e) --- -2.25.1 - diff --git a/xsa346-1.patch b/xsa346-1.patch deleted file mode 100644 index 3ffeb65..0000000 --- a/xsa346-1.patch +++ /dev/null @@ -1,50 +0,0 @@ -From: Jan Beulich -Subject: IOMMU: suppress "iommu_dont_flush_iotlb" when about to free a page - -Deferring flushes to a single, wide range one - as is done when -handling XENMAPSPACE_gmfn_range - is okay only as long as -pages don't get freed ahead of the eventual flush. While the only -function setting the flag (xenmem_add_to_physmap()) suggests by its name -that it's only mapping new entries, in reality the way -xenmem_add_to_physmap_one() works means an unmap would happen not only -for the page being moved (but not freed) but, if the destination GFN is -populated, also for the page being displaced from that GFN. Collapsing -the two flushes for this GFN into just one (end even more so deferring -it to a batched invocation) is not correct. - -This is part of XSA-346. - -Fixes: cf95b2a9fd5a ("iommu: Introduce per cpu flag (iommu_dont_flush_iotlb) to avoid unnecessary iotlb... ") -Signed-off-by: Jan Beulich -Reviewed-by: Paul Durrant -Acked-by: Julien Grall - ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -293,6 +293,7 @@ int guest_remove_page(struct domain *d, - p2m_type_t p2mt; - #endif - mfn_t mfn; -+ bool *dont_flush_p, dont_flush; - int rc; - - #ifdef CONFIG_X86 -@@ -379,8 +380,18 @@ int guest_remove_page(struct domain *d, - return -ENXIO; - } - -+ /* -+ * Since we're likely to free the page below, we need to suspend -+ * xenmem_add_to_physmap()'s suppressing of IOMMU TLB flushes. -+ */ -+ dont_flush_p = &this_cpu(iommu_dont_flush_iotlb); -+ dont_flush = *dont_flush_p; -+ *dont_flush_p = false; -+ - rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0); - -+ *dont_flush_p = dont_flush; -+ - /* - * With the lack of an IOMMU on some platforms, domains with DMA-capable - * device must retrieve the same pfn when the hypercall populate_physmap diff --git a/xsa346-2.patch b/xsa346-2.patch deleted file mode 100644 index 630496a..0000000 --- a/xsa346-2.patch +++ /dev/null @@ -1,204 +0,0 @@ -From: Jan Beulich -Subject: IOMMU: hold page ref until after deferred TLB flush - -When moving around a page via XENMAPSPACE_gmfn_range, deferring the TLB -flush for the "from" GFN range requires that the page remains allocated -to the guest until the TLB flush has actually occurred. Otherwise a -parallel hypercall to remove the page would only flush the TLB for the -GFN it has been moved to, but not the one is was mapped at originally. - -This is part of XSA-346. - -Fixes: cf95b2a9fd5a ("iommu: Introduce per cpu flag (iommu_dont_flush_iotlb) to avoid unnecessary iotlb... ") -Reported-by: Julien Grall -Signed-off-by: Jan Beulich -Acked-by: Julien Grall - ---- a/xen/arch/arm/mm.c -+++ b/xen/arch/arm/mm.c -@@ -1407,7 +1407,7 @@ void share_xen_page_with_guest(struct pa - int xenmem_add_to_physmap_one( - struct domain *d, - unsigned int space, -- union xen_add_to_physmap_batch_extra extra, -+ union add_to_physmap_extra extra, - unsigned long idx, - gfn_t gfn) - { -@@ -1480,10 +1480,6 @@ int xenmem_add_to_physmap_one( - break; - } - case XENMAPSPACE_dev_mmio: -- /* extra should be 0. Reserved for future use. */ -- if ( extra.res0 ) -- return -EOPNOTSUPP; -- - rc = map_dev_mmio_region(d, gfn, 1, _mfn(idx)); - return rc; - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -4497,7 +4497,7 @@ static int handle_iomem_range(unsigned l - int xenmem_add_to_physmap_one( - struct domain *d, - unsigned int space, -- union xen_add_to_physmap_batch_extra extra, -+ union add_to_physmap_extra extra, - unsigned long idx, - gfn_t gpfn) - { -@@ -4581,9 +4581,20 @@ int xenmem_add_to_physmap_one( - rc = guest_physmap_add_page(d, gpfn, mfn, PAGE_ORDER_4K); - - put_both: -- /* In the XENMAPSPACE_gmfn case, we took a ref of the gfn at the top. */ -+ /* -+ * In the XENMAPSPACE_gmfn case, we took a ref of the gfn at the top. -+ * We also may need to transfer ownership of the page reference to our -+ * caller. -+ */ - if ( space == XENMAPSPACE_gmfn ) -+ { - put_gfn(d, gfn); -+ if ( !rc && extra.ppage ) -+ { -+ *extra.ppage = page; -+ page = NULL; -+ } -+ } - - if ( page ) - put_page(page); ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -815,13 +815,12 @@ int xenmem_add_to_physmap(struct domain - { - unsigned int done = 0; - long rc = 0; -- union xen_add_to_physmap_batch_extra extra; -+ union add_to_physmap_extra extra = {}; -+ struct page_info *pages[16]; - - ASSERT(paging_mode_translate(d)); - -- if ( xatp->space != XENMAPSPACE_gmfn_foreign ) -- extra.res0 = 0; -- else -+ if ( xatp->space == XENMAPSPACE_gmfn_foreign ) - extra.foreign_domid = DOMID_INVALID; - - if ( xatp->space != XENMAPSPACE_gmfn_range ) -@@ -836,7 +835,10 @@ int xenmem_add_to_physmap(struct domain - xatp->size -= start; - - if ( is_iommu_enabled(d) ) -+ { - this_cpu(iommu_dont_flush_iotlb) = 1; -+ extra.ppage = &pages[0]; -+ } - - while ( xatp->size > done ) - { -@@ -848,8 +850,12 @@ int xenmem_add_to_physmap(struct domain - xatp->idx++; - xatp->gpfn++; - -+ if ( extra.ppage ) -+ ++extra.ppage; -+ - /* Check for continuation if it's not the last iteration. */ -- if ( xatp->size > ++done && hypercall_preempt_check() ) -+ if ( (++done > ARRAY_SIZE(pages) && extra.ppage) || -+ (xatp->size > done && hypercall_preempt_check()) ) - { - rc = start + done; - break; -@@ -859,6 +865,7 @@ int xenmem_add_to_physmap(struct domain - if ( is_iommu_enabled(d) ) - { - int ret; -+ unsigned int i; - - this_cpu(iommu_dont_flush_iotlb) = 0; - -@@ -867,6 +874,15 @@ int xenmem_add_to_physmap(struct domain - if ( unlikely(ret) && rc >= 0 ) - rc = ret; - -+ /* -+ * Now that the IOMMU TLB flush was done for the original GFN, drop -+ * the page references. The 2nd flush below is fine to make later, as -+ * whoever removes the page again from its new GFN will have to do -+ * another flush anyway. -+ */ -+ for ( i = 0; i < done; ++i ) -+ put_page(pages[i]); -+ - ret = iommu_iotlb_flush(d, _dfn(xatp->gpfn - done), done, - IOMMU_FLUSHF_added | IOMMU_FLUSHF_modified); - if ( unlikely(ret) && rc >= 0 ) -@@ -880,6 +896,8 @@ static int xenmem_add_to_physmap_batch(s - struct xen_add_to_physmap_batch *xatpb, - unsigned int extent) - { -+ union add_to_physmap_extra extra = {}; -+ - if ( unlikely(xatpb->size < extent) ) - return -EILSEQ; - -@@ -891,6 +909,19 @@ static int xenmem_add_to_physmap_batch(s - !guest_handle_subrange_okay(xatpb->errs, extent, xatpb->size - 1) ) - return -EFAULT; - -+ switch ( xatpb->space ) -+ { -+ case XENMAPSPACE_dev_mmio: -+ /* res0 is reserved for future use. */ -+ if ( xatpb->u.res0 ) -+ return -EOPNOTSUPP; -+ break; -+ -+ case XENMAPSPACE_gmfn_foreign: -+ extra.foreign_domid = xatpb->u.foreign_domid; -+ break; -+ } -+ - while ( xatpb->size > extent ) - { - xen_ulong_t idx; -@@ -903,8 +934,7 @@ static int xenmem_add_to_physmap_batch(s - extent, 1)) ) - return -EFAULT; - -- rc = xenmem_add_to_physmap_one(d, xatpb->space, -- xatpb->u, -+ rc = xenmem_add_to_physmap_one(d, xatpb->space, extra, - idx, _gfn(gpfn)); - - if ( unlikely(__copy_to_guest_offset(xatpb->errs, extent, &rc, 1)) ) ---- a/xen/include/xen/mm.h -+++ b/xen/include/xen/mm.h -@@ -592,8 +592,22 @@ void scrub_one_page(struct page_info *); - page_list_del(pg, page_to_list(d, pg)) - #endif - -+union add_to_physmap_extra { -+ /* -+ * XENMAPSPACE_gmfn: When deferring TLB flushes, a page reference needs -+ * to be kept until after the flush, so the page can't get removed from -+ * the domain (and re-used for another purpose) beforehand. By passing -+ * non-NULL, the caller of xenmem_add_to_physmap_one() indicates it wants -+ * to have ownership of such a reference transferred in the success case. -+ */ -+ struct page_info **ppage; -+ -+ /* XENMAPSPACE_gmfn_foreign */ -+ domid_t foreign_domid; -+}; -+ - int xenmem_add_to_physmap_one(struct domain *d, unsigned int space, -- union xen_add_to_physmap_batch_extra extra, -+ union add_to_physmap_extra extra, - unsigned long idx, gfn_t gfn); - - int xenmem_add_to_physmap(struct domain *d, struct xen_add_to_physmap *xatp, diff --git a/xsa347-4.14-1.patch b/xsa347-4.14-1.patch deleted file mode 100644 index c110ee6..0000000 --- a/xsa347-4.14-1.patch +++ /dev/null @@ -1,149 +0,0 @@ -From: Jan Beulich -Subject: AMD/IOMMU: convert amd_iommu_pte from struct to union - -This is to add a "raw" counterpart to the bitfield equivalent. Take the -opportunity and - - convert fields to bool / unsigned int, - - drop the naming of the reserved field, - - shorten the names of the ignored ones. - -This is part of XSA-347. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Reviewed-by: Paul Durrant - ---- a/xen/drivers/passthrough/amd/iommu-defs.h -+++ b/xen/drivers/passthrough/amd/iommu-defs.h -@@ -451,20 +451,23 @@ union amd_iommu_x2apic_control { - #define IOMMU_PAGE_TABLE_U32_PER_ENTRY (IOMMU_PAGE_TABLE_ENTRY_SIZE / 4) - #define IOMMU_PAGE_TABLE_ALIGNMENT 4096 - --struct amd_iommu_pte { -- uint64_t pr:1; -- uint64_t ignored0:4; -- uint64_t a:1; -- uint64_t d:1; -- uint64_t ignored1:2; -- uint64_t next_level:3; -- uint64_t mfn:40; -- uint64_t reserved:7; -- uint64_t u:1; -- uint64_t fc:1; -- uint64_t ir:1; -- uint64_t iw:1; -- uint64_t ignored2:1; -+union amd_iommu_pte { -+ uint64_t raw; -+ struct { -+ bool pr:1; -+ unsigned int ign0:4; -+ bool a:1; -+ bool d:1; -+ unsigned int ign1:2; -+ unsigned int next_level:3; -+ uint64_t mfn:40; -+ unsigned int :7; -+ bool u:1; -+ bool fc:1; -+ bool ir:1; -+ bool iw:1; -+ unsigned int ign2:1; -+ }; - }; - - /* Paging modes */ ---- a/xen/drivers/passthrough/amd/iommu_map.c -+++ b/xen/drivers/passthrough/amd/iommu_map.c -@@ -34,7 +34,7 @@ static unsigned int pfn_to_pde_idx(unsig - static unsigned int clear_iommu_pte_present(unsigned long l1_mfn, - unsigned long dfn) - { -- struct amd_iommu_pte *table, *pte; -+ union amd_iommu_pte *table, *pte; - unsigned int flush_flags; - - table = map_domain_page(_mfn(l1_mfn)); -@@ -48,7 +48,7 @@ static unsigned int clear_iommu_pte_pres - return flush_flags; - } - --static unsigned int set_iommu_pde_present(struct amd_iommu_pte *pte, -+static unsigned int set_iommu_pde_present(union amd_iommu_pte *pte, - unsigned long next_mfn, - unsigned int next_level, bool iw, - bool ir) -@@ -83,7 +83,7 @@ static unsigned int set_iommu_pte_presen - int pde_level, - bool iw, bool ir) - { -- struct amd_iommu_pte *table, *pde; -+ union amd_iommu_pte *table, *pde; - unsigned int flush_flags; - - table = map_domain_page(_mfn(pt_mfn)); -@@ -174,7 +174,7 @@ void iommu_dte_set_guest_cr3(struct amd_ - static int iommu_pde_from_dfn(struct domain *d, unsigned long dfn, - unsigned long pt_mfn[], bool map) - { -- struct amd_iommu_pte *pde, *next_table_vaddr; -+ union amd_iommu_pte *pde, *next_table_vaddr; - unsigned long next_table_mfn; - unsigned int level; - struct page_info *table; -@@ -448,7 +448,7 @@ int __init amd_iommu_quarantine_init(str - unsigned long end_gfn = - 1ul << (DEFAULT_DOMAIN_ADDRESS_WIDTH - PAGE_SHIFT); - unsigned int level = amd_iommu_get_paging_mode(end_gfn); -- struct amd_iommu_pte *table; -+ union amd_iommu_pte *table; - - if ( hd->arch.root_table ) - { -@@ -479,7 +479,7 @@ int __init amd_iommu_quarantine_init(str - - for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ ) - { -- struct amd_iommu_pte *pde = &table[i]; -+ union amd_iommu_pte *pde = &table[i]; - - /* - * PDEs are essentially a subset of PTEs, so this function ---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c -+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c -@@ -387,7 +387,7 @@ static void deallocate_next_page_table(s - - static void deallocate_page_table(struct page_info *pg) - { -- struct amd_iommu_pte *table_vaddr; -+ union amd_iommu_pte *table_vaddr; - unsigned int index, level = PFN_ORDER(pg); - - PFN_ORDER(pg) = 0; -@@ -402,7 +402,7 @@ static void deallocate_page_table(struct - - for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) - { -- struct amd_iommu_pte *pde = &table_vaddr[index]; -+ union amd_iommu_pte *pde = &table_vaddr[index]; - - if ( pde->mfn && pde->next_level && pde->pr ) - { -@@ -554,7 +554,7 @@ static void amd_dump_p2m_table_level(str - paddr_t gpa, int indent) - { - paddr_t address; -- struct amd_iommu_pte *table_vaddr; -+ const union amd_iommu_pte *table_vaddr; - int index; - - if ( level < 1 ) -@@ -570,7 +570,7 @@ static void amd_dump_p2m_table_level(str - - for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) - { -- struct amd_iommu_pte *pde = &table_vaddr[index]; -+ const union amd_iommu_pte *pde = &table_vaddr[index]; - - if ( !(index % 2) ) - process_pending_softirqs(); diff --git a/xsa347-4.14-2.patch b/xsa347-4.14-2.patch deleted file mode 100644 index a8f4776..0000000 --- a/xsa347-4.14-2.patch +++ /dev/null @@ -1,72 +0,0 @@ -From: Jan Beulich -Subject: AMD/IOMMU: update live PTEs atomically - -Updating a live PTE bitfield by bitfield risks the compiler re-ordering -the individual updates as well as splitting individual updates into -multiple memory writes. Construct the new entry fully in a local -variable, do the check to determine the flushing needs on the thus -established new entry, and then write the new entry by a single insn. - -Similarly using memset() to clear a PTE is unsafe, as the order of -writes the function does is, at least in principle, undefined. - -This is part of XSA-347. - -Signed-off-by: Jan Beulich -Reviewed-by: Paul Durrant - ---- a/xen/drivers/passthrough/amd/iommu_map.c -+++ b/xen/drivers/passthrough/amd/iommu_map.c -@@ -41,7 +41,7 @@ static unsigned int clear_iommu_pte_pres - pte = &table[pfn_to_pde_idx(dfn, 1)]; - - flush_flags = pte->pr ? IOMMU_FLUSHF_modified : 0; -- memset(pte, 0, sizeof(*pte)); -+ write_atomic(&pte->raw, 0); - - unmap_domain_page(table); - -@@ -53,26 +53,30 @@ static unsigned int set_iommu_pde_presen - unsigned int next_level, bool iw, - bool ir) - { -+ union amd_iommu_pte new = {}, old; - unsigned int flush_flags = IOMMU_FLUSHF_added; - -- if ( pte->pr && -- (pte->mfn != next_mfn || -- pte->iw != iw || -- pte->ir != ir || -- pte->next_level != next_level) ) -- flush_flags |= IOMMU_FLUSHF_modified; -- - /* - * FC bit should be enabled in PTE, this helps to solve potential - * issues with ATS devices - */ -- pte->fc = !next_level; -+ new.fc = !next_level; -+ -+ new.mfn = next_mfn; -+ new.iw = iw; -+ new.ir = ir; -+ new.next_level = next_level; -+ new.pr = true; -+ -+ old.raw = read_atomic(&pte->raw); -+ old.ign0 = 0; -+ old.ign1 = 0; -+ old.ign2 = 0; -+ -+ if ( old.pr && old.raw != new.raw ) -+ flush_flags |= IOMMU_FLUSHF_modified; - -- pte->mfn = next_mfn; -- pte->iw = iw; -- pte->ir = ir; -- pte->next_level = next_level; -- pte->pr = 1; -+ write_atomic(&pte->raw, new.raw); - - return flush_flags; - } diff --git a/xsa347-4.14-3.patch b/xsa347-4.14-3.patch deleted file mode 100644 index 1fb8c60..0000000 --- a/xsa347-4.14-3.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: Jan Beulich -Subject: AMD/IOMMU: ensure suitable ordering of DTE modifications - -DMA and interrupt translation should be enabled only after other -applicable DTE fields have been written. Similarly when disabling -translation or when moving a device between domains, translation should -first be disabled, before other entry fields get modified. Note however -that the "moving" aspect doesn't apply to the interrupt remapping side, -as domain specifics are maintained in the IRTEs here, not the DTE. We -also never disable interrupt remapping once it got enabled for a device -(the respective argument passed is always the immutable iommu_intremap). - -This is part of XSA-347. - -Signed-off-by: Jan Beulich -Reviewed-by: Paul Durrant - ---- a/xen/drivers/passthrough/amd/iommu_map.c -+++ b/xen/drivers/passthrough/amd/iommu_map.c -@@ -103,11 +103,18 @@ void amd_iommu_set_root_page_table(struc - uint64_t root_ptr, uint16_t domain_id, - uint8_t paging_mode, bool valid) - { -+ if ( valid || dte->v ) -+ { -+ dte->tv = false; -+ dte->v = true; -+ smp_wmb(); -+ } - dte->domain_id = domain_id; - dte->pt_root = paddr_to_pfn(root_ptr); - dte->iw = true; - dte->ir = true; - dte->paging_mode = paging_mode; -+ smp_wmb(); - dte->tv = true; - dte->v = valid; - } -@@ -130,6 +137,7 @@ void amd_iommu_set_intremap_table( - } - - dte->ig = false; /* unmapped interrupts result in i/o page faults */ -+ smp_wmb(); - dte->iv = valid; - } - ---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c -+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c -@@ -117,7 +117,10 @@ static void amd_iommu_setup_domain_devic - /* Undo what amd_iommu_disable_domain_device() may have done. */ - ivrs_dev = &get_ivrs_mappings(iommu->seg)[req_id]; - if ( dte->it_root ) -+ { - dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED; -+ smp_wmb(); -+ } - dte->iv = iommu_intremap; - dte->ex = ivrs_dev->dte_allow_exclusion; - dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT); diff --git a/xsa348-1.patch b/xsa348-1.patch deleted file mode 100644 index 721c047..0000000 --- a/xsa348-1.patch +++ /dev/null @@ -1,113 +0,0 @@ -From: Jan Beulich -Subject: x86: replace reset_stack_and_jump_nolp() - -Move the necessary check into check_for_livepatch_work(), rather than -mostly duplicating reset_stack_and_jump() for this purpose. This is to -prevent an inflation of reset_stack_and_jump() flavors. - -Signed-off-by: Jan Beulich -Reviewed-by: Juergen Gross ---- -Of course instead of adding the check right into -check_for_livepatch_work(), a wrapper could be introduced. - ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -192,7 +192,7 @@ static void noreturn continue_idle_domai - { - /* Idle vcpus might be attached to non-idle units! */ - if ( !is_idle_domain(v->sched_unit->domain) ) -- reset_stack_and_jump_nolp(guest_idle_loop); -+ reset_stack_and_jump(guest_idle_loop); - - reset_stack_and_jump(idle_loop); - } ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -1036,7 +1036,7 @@ static void noreturn svm_do_resume(struc - - hvm_do_resume(v); - -- reset_stack_and_jump_nolp(svm_asm_do_resume); -+ reset_stack_and_jump(svm_asm_do_resume); - } - - void svm_vmenter_helper(const struct cpu_user_regs *regs) ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -1909,7 +1909,7 @@ void vmx_do_resume(struct vcpu *v) - if ( host_cr4 != read_cr4() ) - __vmwrite(HOST_CR4, read_cr4()); - -- reset_stack_and_jump_nolp(vmx_asm_do_vmentry); -+ reset_stack_and_jump(vmx_asm_do_vmentry); - } - - static inline unsigned long vmr(unsigned long field) ---- a/xen/arch/x86/pv/domain.c -+++ b/xen/arch/x86/pv/domain.c -@@ -113,7 +113,7 @@ static int parse_pcid(const char *s) - static void noreturn continue_nonidle_domain(struct vcpu *v) - { - check_wakeup_from_wait(); -- reset_stack_and_jump_nolp(ret_from_intr); -+ reset_stack_and_jump(ret_from_intr); - } - - static int setup_compat_l4(struct vcpu *v) ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -676,7 +676,7 @@ static void __init noreturn reinit_bsp_s - asm volatile ("setssbsy" ::: "memory"); - } - -- reset_stack_and_jump_nolp(init_done); -+ reset_stack_and_jump(init_done); - } - - /* ---- a/xen/common/livepatch.c -+++ b/xen/common/livepatch.c -@@ -1635,6 +1635,11 @@ void check_for_livepatch_work(void) - s_time_t timeout; - unsigned long flags; - -+ /* Only do any work when invoked in truly idle state. */ -+ if ( system_state != SYS_STATE_active || -+ !is_idle_domain(current->sched_unit->domain) ) -+ return; -+ - /* Fast path: no work to do. */ - if ( !per_cpu(work_to_do, cpu ) ) - return; ---- a/xen/include/asm-x86/current.h -+++ b/xen/include/asm-x86/current.h -@@ -155,13 +155,13 @@ unsigned long get_stack_dump_bottom (uns - # define SHADOW_STACK_WORK "" - #endif - --#define switch_stack_and_jump(fn, instr) \ -+#define reset_stack_and_jump(fn) \ - ({ \ - unsigned int tmp; \ - __asm__ __volatile__ ( \ - SHADOW_STACK_WORK \ - "mov %[stk], %%rsp;" \ -- instr \ -+ CHECK_FOR_LIVEPATCH_WORK \ - "jmp %c[fun];" \ - : [val] "=&r" (tmp), \ - [ssp] "=&r" (tmp) \ -@@ -176,12 +176,6 @@ unsigned long get_stack_dump_bottom (uns - unreachable(); \ - }) - --#define reset_stack_and_jump(fn) \ -- switch_stack_and_jump(fn, CHECK_FOR_LIVEPATCH_WORK) -- --#define reset_stack_and_jump_nolp(fn) \ -- switch_stack_and_jump(fn, "") -- - /* - * Which VCPU's state is currently running on each CPU? - * This is not necesasrily the same as 'current' as a CPU may be diff --git a/xsa348-2.patch b/xsa348-2.patch deleted file mode 100644 index a8106bf..0000000 --- a/xsa348-2.patch +++ /dev/null @@ -1,85 +0,0 @@ -From: Jan Beulich -Subject: x86: fold guest_idle_loop() into idle_loop() - -The latter can easily be made cover both cases. This is in preparation -of using idle_loop directly for populating idle_csw.tail. - -Take the liberty and also adjust indentation / spacing in involved code. - -Signed-off-by: Jan Beulich -Reviewed-by: Juergen Gross - ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -133,14 +133,22 @@ void play_dead(void) - static void idle_loop(void) - { - unsigned int cpu = smp_processor_id(); -+ /* -+ * Idle vcpus might be attached to non-idle units! We don't do any -+ * standard idle work like tasklets or livepatching in this case. -+ */ -+ bool guest = !is_idle_domain(current->sched_unit->domain); - - for ( ; ; ) - { - if ( cpu_is_offline(cpu) ) -+ { -+ ASSERT(!guest); - play_dead(); -+ } - - /* Are we here for running vcpu context tasklets, or for idling? */ -- if ( unlikely(tasklet_work_to_do(cpu)) ) -+ if ( !guest && unlikely(tasklet_work_to_do(cpu)) ) - { - do_tasklet(); - /* Livepatch work is always kicked off via a tasklet. */ -@@ -151,28 +159,14 @@ static void idle_loop(void) - * and then, after it is done, whether softirqs became pending - * while we were scrubbing. - */ -- else if ( !softirq_pending(cpu) && !scrub_free_pages() && -- !softirq_pending(cpu) ) -- pm_idle(); -- do_softirq(); -- } --} -- --/* -- * Idle loop for siblings in active schedule units. -- * We don't do any standard idle work like tasklets or livepatching. -- */ --static void guest_idle_loop(void) --{ -- unsigned int cpu = smp_processor_id(); -- -- for ( ; ; ) -- { -- ASSERT(!cpu_is_offline(cpu)); -- -- if ( !softirq_pending(cpu) && !scrub_free_pages() && -- !softirq_pending(cpu)) -- sched_guest_idle(pm_idle, cpu); -+ else if ( !softirq_pending(cpu) && !scrub_free_pages() && -+ !softirq_pending(cpu) ) -+ { -+ if ( guest ) -+ sched_guest_idle(pm_idle, cpu); -+ else -+ pm_idle(); -+ } - do_softirq(); - } - } -@@ -190,10 +184,6 @@ void startup_cpu_idle_loop(void) - - static void noreturn continue_idle_domain(struct vcpu *v) - { -- /* Idle vcpus might be attached to non-idle units! */ -- if ( !is_idle_domain(v->sched_unit->domain) ) -- reset_stack_and_jump(guest_idle_loop); -- - reset_stack_and_jump(idle_loop); - } - diff --git a/xsa348-3.patch b/xsa348-3.patch deleted file mode 100644 index 43b8bbc..0000000 --- a/xsa348-3.patch +++ /dev/null @@ -1,174 +0,0 @@ -From: Jan Beulich -Subject: x86: avoid calling {svm,vmx}_do_resume() - -These functions follow the following path: hvm_do_resume() -> -handle_hvm_io_completion() -> hvm_wait_for_io() -> -wait_on_xen_event_channel() -> do_softirq() -> schedule() -> -sched_context_switch() -> continue_running() and hence may -recursively invoke themselves. If this ends up happening a couple of -times, a stack overflow would result. - -Prevent this by also resetting the stack at the -->arch.ctxt_switch->tail() invocations (in both places for consistency) -and thus jumping to the functions instead of calling them. - -This is XSA-348 / CVE-2020-29566. - -Reported-by: Julien Grall -Signed-off-by: Jan Beulich -Reviewed-by: Juergen Gross ---- -v2: Fix LIVEPATCH builds crashing. - ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -130,7 +130,7 @@ void play_dead(void) - dead_idle(); - } - --static void idle_loop(void) -+static void noreturn idle_loop(void) - { - unsigned int cpu = smp_processor_id(); - /* -@@ -182,11 +182,6 @@ void startup_cpu_idle_loop(void) - reset_stack_and_jump(idle_loop); - } - --static void noreturn continue_idle_domain(struct vcpu *v) --{ -- reset_stack_and_jump(idle_loop); --} -- - void init_hypercall_page(struct domain *d, void *ptr) - { - memset(ptr, 0xcc, PAGE_SIZE); -@@ -710,7 +705,7 @@ int arch_domain_create(struct domain *d, - static const struct arch_csw idle_csw = { - .from = paravirt_ctxt_switch_from, - .to = paravirt_ctxt_switch_to, -- .tail = continue_idle_domain, -+ .tail = idle_loop, - }; - - d->arch.ctxt_switch = &idle_csw; -@@ -2047,20 +2042,12 @@ void context_switch(struct vcpu *prev, s - /* Ensure that the vcpu has an up-to-date time base. */ - update_vcpu_system_time(next); - -- /* -- * Schedule tail *should* be a terminal function pointer, but leave a -- * bug frame around just in case it returns, to save going back into the -- * context switching code and leaving a far more subtle crash to diagnose. -- */ -- nextd->arch.ctxt_switch->tail(next); -- BUG(); -+ reset_stack_and_jump_ind(nextd->arch.ctxt_switch->tail); - } - - void continue_running(struct vcpu *same) - { -- /* See the comment above. */ -- same->domain->arch.ctxt_switch->tail(same); -- BUG(); -+ reset_stack_and_jump_ind(same->domain->arch.ctxt_switch->tail); - } - - int __sync_local_execstate(void) ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -991,8 +991,9 @@ static void svm_ctxt_switch_to(struct vc - wrmsr_tsc_aux(v->arch.msrs->tsc_aux); - } - --static void noreturn svm_do_resume(struct vcpu *v) -+static void noreturn svm_do_resume(void) - { -+ struct vcpu *v = current; - struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb; - bool debug_state = (v->domain->debugger_attached || - v->domain->arch.monitor.software_breakpoint_enabled || ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -1850,8 +1850,9 @@ void vmx_vmentry_failure(void) - domain_crash(curr->domain); - } - --void vmx_do_resume(struct vcpu *v) -+void vmx_do_resume(void) - { -+ struct vcpu *v = current; - bool_t debug_state; - unsigned long host_cr4; - ---- a/xen/arch/x86/pv/domain.c -+++ b/xen/arch/x86/pv/domain.c -@@ -110,7 +110,7 @@ static int parse_pcid(const char *s) - return rc; - } - --static void noreturn continue_nonidle_domain(struct vcpu *v) -+static void noreturn continue_nonidle_domain(void) - { - check_wakeup_from_wait(); - reset_stack_and_jump(ret_from_intr); ---- a/xen/include/asm-x86/current.h -+++ b/xen/include/asm-x86/current.h -@@ -155,18 +155,18 @@ unsigned long get_stack_dump_bottom (uns - # define SHADOW_STACK_WORK "" - #endif - --#define reset_stack_and_jump(fn) \ -+#define switch_stack_and_jump(fn, instr, constr) \ - ({ \ - unsigned int tmp; \ - __asm__ __volatile__ ( \ - SHADOW_STACK_WORK \ - "mov %[stk], %%rsp;" \ - CHECK_FOR_LIVEPATCH_WORK \ -- "jmp %c[fun];" \ -+ instr "[fun]" \ - : [val] "=&r" (tmp), \ - [ssp] "=&r" (tmp) \ - : [stk] "r" (guest_cpu_user_regs()), \ -- [fun] "i" (fn), \ -+ [fun] constr (fn), \ - [skstk_base] "i" \ - ((PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8), \ - [stack_mask] "i" (STACK_SIZE - 1), \ -@@ -176,6 +176,13 @@ unsigned long get_stack_dump_bottom (uns - unreachable(); \ - }) - -+#define reset_stack_and_jump(fn) \ -+ switch_stack_and_jump(fn, "jmp %c", "i") -+ -+/* The constraint may only specify non-call-clobbered registers. */ -+#define reset_stack_and_jump_ind(fn) \ -+ switch_stack_and_jump(fn, "INDIRECT_JMP %", "b") -+ - /* - * Which VCPU's state is currently running on each CPU? - * This is not necesasrily the same as 'current' as a CPU may be ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -337,7 +337,7 @@ struct arch_domain - const struct arch_csw { - void (*from)(struct vcpu *); - void (*to)(struct vcpu *); -- void (*tail)(struct vcpu *); -+ void noreturn (*tail)(void); - } *ctxt_switch; - - #ifdef CONFIG_HVM ---- a/xen/include/asm-x86/hvm/vmx/vmx.h -+++ b/xen/include/asm-x86/hvm/vmx/vmx.h -@@ -95,7 +95,7 @@ typedef enum { - void vmx_asm_vmexit_handler(struct cpu_user_regs); - void vmx_asm_do_vmentry(void); - void vmx_intr_assist(void); --void noreturn vmx_do_resume(struct vcpu *); -+void noreturn vmx_do_resume(void); - void vmx_vlapic_msr_changed(struct vcpu *v); - struct hvm_emulate_ctxt; - void vmx_realmode_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt); diff --git a/xsa351-arm.patch b/xsa351-arm.patch deleted file mode 100644 index d0d1941..0000000 --- a/xsa351-arm.patch +++ /dev/null @@ -1,58 +0,0 @@ -From: Julien Grall -Subject: xen/arm: Always trap AMU system registers - -The Activity Monitors Unit (AMU) has been introduced by ARMv8.4. It is -considered to be unsafe to be expose to guests as they might expose -information about code executed by other guests or the host. - -Arm provided a way to trap all the AMU system registers by setting -CPTR_EL2.TAM to 1. - -Unfortunately, on older revision of the specification, the bit 30 (now -CPTR_EL1.TAM) was RES0. Because of that, Xen is setting it to 0 and -therefore the system registers would be exposed to the guest when it is -run on processors with AMU. - -As the bit is mark as UNKNOWN at boot in Armv8.4, the only safe solution -for us is to always set CPTR_EL1.TAM to 1. - -Guest trying to access the AMU system registers will now receive an -undefined instruction. Unfortunately, this means that even well-behaved -guest may fail to boot because we don't sanitize the ID registers. - -This is a known issues with other Armv8.0+ features (e.g. SVE, Pointer -Auth). This will taken care separately. - -This is part of XSA-351 (or XSA-93 re-born). - -Signed-off-by: Julien Grall -Reviewed-by: Andre Przywara -Reviewed-by: Stefano Stabellini -Reviewed-by: Bertrand Marquis - -diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c -index a36f145e67..22bd1bd4c6 100644 ---- a/xen/arch/arm/traps.c -+++ b/xen/arch/arm/traps.c -@@ -151,7 +151,8 @@ void init_traps(void) - * On ARM64 the TCPx bits which we set here (0..9,12,13) are all - * RES1, i.e. they would trap whether we did this write or not. - */ -- WRITE_SYSREG((HCPTR_CP_MASK & ~(HCPTR_CP(10) | HCPTR_CP(11))) | HCPTR_TTA, -+ WRITE_SYSREG((HCPTR_CP_MASK & ~(HCPTR_CP(10) | HCPTR_CP(11))) | -+ HCPTR_TTA | HCPTR_TAM, - CPTR_EL2); - - /* -diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h -index 3ca67f8157..d3d12a9d19 100644 ---- a/xen/include/asm-arm/processor.h -+++ b/xen/include/asm-arm/processor.h -@@ -351,6 +351,7 @@ - #define VTCR_RES1 (_AC(1,UL)<<31) - - /* HCPTR Hyp. Coprocessor Trap Register */ -+#define HCPTR_TAM ((_AC(1,U)<<30)) - #define HCPTR_TTA ((_AC(1,U)<<20)) /* Trap trace registers */ - #define HCPTR_CP(x) ((_AC(1,U)<<(x))) /* Trap Coprocessor x */ - #define HCPTR_CP_MASK ((_AC(1,U)<<14)-1) diff --git a/xsa351-x86-4.14-1.patch b/xsa351-x86-4.14-1.patch deleted file mode 100644 index 1be14fb..0000000 --- a/xsa351-x86-4.14-1.patch +++ /dev/null @@ -1,155 +0,0 @@ -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Subject: x86/msr: fix handling of MSR_IA32_PERF_{STATUS/CTL} -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Currently a PV hardware domain can also be given control over the CPU -frequency, and such guest is allowed to write to MSR_IA32_PERF_CTL. -However since commit 322ec7c89f6 the default behavior has been changed -to reject accesses to not explicitly handled MSRs, preventing PV -guests that manage CPU frequency from reading -MSR_IA32_PERF_{STATUS/CTL}. - -Additionally some HVM guests (Windows at least) will attempt to read -MSR_IA32_PERF_CTL and will panic if given back a #GP fault: - - vmx.c:3035:d8v0 RDMSR 0x00000199 unimplemented - d8v0 VIRIDIAN CRASH: 3b c0000096 fffff806871c1651 ffffda0253683720 0 - -Move the handling of MSR_IA32_PERF_{STATUS/CTL} to the common MSR -handling shared between HVM and PV guests, and add an explicit case -for reads to MSR_IA32_PERF_{STATUS/CTL}. - -Restore previous behavior and allow PV guests with the required -permissions to read the contents of the mentioned MSRs. Non privileged -guests will get 0 when trying to read those registers, as writes to -MSR_IA32_PERF_CTL by such guest will already be silently dropped. - -Fixes: 322ec7c89f6 ('x86/pv: disallow access to unknown MSRs') -Fixes: 84e848fd7a1 ('x86/hvm: disallow access to unknown MSRs') -Signed-off-by: Roger Pau Monné -Signed-off-by: Andrew Cooper -Reviewed-by: Roger Pau Monné -Reviewed-by: Jan Beulich -(cherry picked from commit 3059178798a23ba870ff86ff54d442a07e6651fc) - -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index d72ab0fa1f..3db26faf08 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -245,6 +245,25 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) - *val = msrs->misc_features_enables.raw; - break; - -+ /* -+ * These MSRs are not enumerated in CPUID. They have been around -+ * since the Pentium 4, and implemented by other vendors. -+ * -+ * Some versions of Windows try reading these before setting up a #GP -+ * handler, and Linux has several unguarded reads as well. Provide -+ * RAZ semantics, in general, but permit a cpufreq controller dom0 to -+ * have full access. -+ */ -+ case MSR_IA32_PERF_STATUS: -+ case MSR_IA32_PERF_CTL: -+ if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) ) -+ goto gp_fault; -+ -+ *val = 0; -+ if ( likely(!is_cpufreq_controller(d)) || rdmsr_safe(msr, *val) == 0 ) -+ break; -+ goto gp_fault; -+ - case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST: - if ( !is_hvm_domain(d) || v != curr ) - goto gp_fault; -@@ -343,6 +362,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) - case MSR_INTEL_CORE_THREAD_COUNT: - case MSR_INTEL_PLATFORM_INFO: - case MSR_ARCH_CAPABILITIES: -+ case MSR_IA32_PERF_STATUS: - /* Read-only */ - case MSR_TEST_CTRL: - case MSR_TSX_FORCE_ABORT: -@@ -454,6 +474,21 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) - break; - } - -+ /* -+ * This MSR is not enumerated in CPUID. It has been around since the -+ * Pentium 4, and implemented by other vendors. -+ * -+ * To match the RAZ semantics, implement as write-discard, except for -+ * a cpufreq controller dom0 which has full access. -+ */ -+ case MSR_IA32_PERF_CTL: -+ if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) ) -+ goto gp_fault; -+ -+ if ( likely(!is_cpufreq_controller(d)) || wrmsr_safe(msr, val) == 0 ) -+ break; -+ goto gp_fault; -+ - case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST: - if ( !is_hvm_domain(d) || v != curr ) - goto gp_fault; -diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c -index 85a9fd4767..5c7b9117ae 100644 ---- a/xen/arch/x86/pv/emul-priv-op.c -+++ b/xen/arch/x86/pv/emul-priv-op.c -@@ -820,12 +820,6 @@ static inline uint64_t guest_misc_enable(uint64_t val) - return val; - } - --static inline bool is_cpufreq_controller(const struct domain *d) --{ -- return ((cpufreq_controller == FREQCTL_dom0_kernel) && -- is_hardware_domain(d)); --} -- - static int read_msr(unsigned int reg, uint64_t *val, - struct x86_emulate_ctxt *ctxt) - { -@@ -1070,14 +1064,6 @@ static int write_msr(unsigned int reg, uint64_t val, - return X86EMUL_OKAY; - break; - -- case MSR_IA32_PERF_CTL: -- if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) -- break; -- if ( likely(!is_cpufreq_controller(currd)) || -- wrmsr_safe(reg, val) == 0 ) -- return X86EMUL_OKAY; -- break; -- - case MSR_IA32_THERM_CONTROL: - case MSR_IA32_ENERGY_PERF_BIAS: - if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) -diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h -index a0d87ef9d0..97ba8e0795 100644 ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -1071,6 +1071,22 @@ extern enum cpufreq_controller { - FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen - } cpufreq_controller; - -+static always_inline bool is_cpufreq_controller(const struct domain *d) -+{ -+ /* -+ * A PV dom0 can be nominated as the cpufreq controller, instead of using -+ * Xen's cpufreq driver, at which point dom0 gets direct access to certain -+ * MSRs. -+ * -+ * This interface only works when dom0 is identity pinned and has the same -+ * number of vCPUs as pCPUs on the system. -+ * -+ * It would be far better to paravirtualise the interface. -+ */ -+ return (is_pv_domain(d) && is_hardware_domain(d) && -+ cpufreq_controller == FREQCTL_dom0_kernel); -+} -+ - int cpupool_move_domain(struct domain *d, struct cpupool *c); - int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op); - int cpupool_get_id(const struct domain *d); diff --git a/xsa351-x86-4.14-2.patch b/xsa351-x86-4.14-2.patch deleted file mode 100644 index 2c0f4e7..0000000 --- a/xsa351-x86-4.14-2.patch +++ /dev/null @@ -1,126 +0,0 @@ -From: Andrew Cooper -Subject: x86/msr: Disallow guest access to the RAPL MSRs - -Researchers have demonstrated using the RAPL interface to perform a -differential power analysis attack to recover AES keys used by other cores in -the system. - -Furthermore, even privileged guests cannot use this interface correctly, due -to MSR scope and vcpu scheduling issues. The interface would want to be -paravirtualised to be used sensibly. - -Disallow access to the RAPL MSRs completely, as well as other MSRs which -potentially access fine grain power information. - -This is part of XSA-351. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index 3db26faf08..aa107823ac 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -185,6 +185,13 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) - case MSR_TSX_CTRL: - case MSR_MCU_OPT_CTRL: - case MSR_RTIT_OUTPUT_BASE ... MSR_RTIT_ADDR_B(7): -+ case MSR_RAPL_POWER_UNIT: -+ case MSR_PKG_POWER_LIMIT ... MSR_PKG_POWER_INFO: -+ case MSR_DRAM_POWER_LIMIT ... MSR_DRAM_POWER_INFO: -+ case MSR_PP0_POWER_LIMIT ... MSR_PP0_POLICY: -+ case MSR_PP1_POWER_LIMIT ... MSR_PP1_POLICY: -+ case MSR_PLATFORM_ENERGY_COUNTER: -+ case MSR_PLATFORM_POWER_LIMIT: - case MSR_U_CET: - case MSR_S_CET: - case MSR_PL0_SSP ... MSR_INTERRUPT_SSP_TABLE: -@@ -192,6 +199,8 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) - case MSR_AMD64_LWP_CBADDR: - case MSR_PPIN_CTL: - case MSR_PPIN: -+ case MSR_F15H_CU_POWER ... MSR_F15H_CU_MAX_POWER: -+ case MSR_AMD_RAPL_POWER_UNIT ... MSR_AMD_PKG_ENERGY_STATUS: - case MSR_AMD_PPIN_CTL: - case MSR_AMD_PPIN: - /* Not offered to guests. */ -@@ -369,6 +378,13 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) - case MSR_TSX_CTRL: - case MSR_MCU_OPT_CTRL: - case MSR_RTIT_OUTPUT_BASE ... MSR_RTIT_ADDR_B(7): -+ case MSR_RAPL_POWER_UNIT: -+ case MSR_PKG_POWER_LIMIT ... MSR_PKG_POWER_INFO: -+ case MSR_DRAM_POWER_LIMIT ... MSR_DRAM_POWER_INFO: -+ case MSR_PP0_POWER_LIMIT ... MSR_PP0_POLICY: -+ case MSR_PP1_POWER_LIMIT ... MSR_PP1_POLICY: -+ case MSR_PLATFORM_ENERGY_COUNTER: -+ case MSR_PLATFORM_POWER_LIMIT: - case MSR_U_CET: - case MSR_S_CET: - case MSR_PL0_SSP ... MSR_INTERRUPT_SSP_TABLE: -@@ -376,6 +392,8 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) - case MSR_AMD64_LWP_CBADDR: - case MSR_PPIN_CTL: - case MSR_PPIN: -+ case MSR_F15H_CU_POWER ... MSR_F15H_CU_MAX_POWER: -+ case MSR_AMD_RAPL_POWER_UNIT ... MSR_AMD_PKG_ENERGY_STATUS: - case MSR_AMD_PPIN_CTL: - case MSR_AMD_PPIN: - /* Not offered to guests. */ -diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h -index 0fe98af923..5e64ecff91 100644 ---- a/xen/include/asm-x86/msr-index.h -+++ b/xen/include/asm-x86/msr-index.h -@@ -77,6 +77,38 @@ - #define MSR_RTIT_ADDR_A(n) (0x00000580 + (n) * 2) - #define MSR_RTIT_ADDR_B(n) (0x00000581 + (n) * 2) - -+/* -+ * Intel Runtime Average Power Limiting (RAPL) interface. Power plane base -+ * addresses (MSR_*_POWER_LIMIT) are model specific, but have so-far been -+ * consistent since their introduction in SandyBridge. -+ * -+ * Offsets of functionality from the power plane base is architectural, but -+ * not all power planes support all functionality. -+ */ -+#define MSR_RAPL_POWER_UNIT 0x00000606 -+ -+#define MSR_PKG_POWER_LIMIT 0x00000610 -+#define MSR_PKG_ENERGY_STATUS 0x00000611 -+#define MSR_PKG_PERF_STATUS 0x00000613 -+#define MSR_PKG_POWER_INFO 0x00000614 -+ -+#define MSR_DRAM_POWER_LIMIT 0x00000618 -+#define MSR_DRAM_ENERGY_STATUS 0x00000619 -+#define MSR_DRAM_PERF_STATUS 0x0000061b -+#define MSR_DRAM_POWER_INFO 0x0000061c -+ -+#define MSR_PP0_POWER_LIMIT 0x00000638 -+#define MSR_PP0_ENERGY_STATUS 0x00000639 -+#define MSR_PP0_POLICY 0x0000063a -+ -+#define MSR_PP1_POWER_LIMIT 0x00000640 -+#define MSR_PP1_ENERGY_STATUS 0x00000641 -+#define MSR_PP1_POLICY 0x00000642 -+ -+/* Intel Platform-wide power interface. */ -+#define MSR_PLATFORM_ENERGY_COUNTER 0x0000064d -+#define MSR_PLATFORM_POWER_LIMIT 0x0000065c -+ - #define MSR_U_CET 0x000006a0 - #define MSR_S_CET 0x000006a2 - #define CET_SHSTK_EN (_AC(1, ULL) << 0) -@@ -92,6 +124,13 @@ - #define PASID_PASID_MASK 0x000fffff - #define PASID_VALID (_AC(1, ULL) << 31) - -+#define MSR_F15H_CU_POWER 0xc001007a -+#define MSR_F15H_CU_MAX_POWER 0xc001007b -+ -+#define MSR_AMD_RAPL_POWER_UNIT 0xc0010299 -+#define MSR_AMD_CORE_ENERGY_STATUS 0xc001029a -+#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b -+ - /* - * Legacy MSR constants in need of cleanup. No new MSRs below this comment. - */ diff --git a/xsa352.patch b/xsa352.patch deleted file mode 100644 index e21d21a..0000000 --- a/xsa352.patch +++ /dev/null @@ -1,42 +0,0 @@ -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= -Subject: tools/ocaml/xenstored: only Dom0 can change node owner -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Otherwise we can give quota away to another domain, either causing it to run -out of quota, or in case of Dom0 use unbounded amounts of memory and bypass -the quota system entirely. - -This was fixed in the C version of xenstored in 2006 (c/s db34d2aaa5f5, -predating the XSA process by 5 years). - -It was also fixed in the mirage version of xenstore in 2012, with a unit test -demonstrating the vulnerability: - - https://github.com/mirage/ocaml-xenstore/commit/6b91f3ac46b885d0530a51d57a9b3a57d64923a7 - https://github.com/mirage/ocaml-xenstore/commit/22ee5417c90b8fda905c38de0d534506152eace6 - -but possibly without realising that the vulnerability still affected the -in-tree oxenstored (added c/s f44af660412 in 2010). - -This is XSA-352. - -Signed-off-by: Edwin Török -Acked-by: Christian Lindig -Reviewed-by: Andrew Cooper - -diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml -index 3b05128f1b..5f915f2bbe 100644 ---- a/tools/ocaml/xenstored/store.ml -+++ b/tools/ocaml/xenstored/store.ml -@@ -407,7 +407,8 @@ let setperms store perm path nperms = - | Some node -> - let old_owner = Node.get_owner node in - let new_owner = Perms.Node.get_owner nperms in -- if not ((old_owner = new_owner) || (Perms.Connection.is_dom0 perm)) then Quota.check store.quota new_owner 0; -+ if not ((old_owner = new_owner) || (Perms.Connection.is_dom0 perm)) then -+ raise Define.Permission_denied; - store.root <- path_setperms store perm path nperms; - Quota.del_entry store.quota old_owner; - Quota.add_entry store.quota new_owner diff --git a/xsa353.patch b/xsa353.patch deleted file mode 100644 index 764f93c..0000000 --- a/xsa353.patch +++ /dev/null @@ -1,89 +0,0 @@ -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= -Subject: tools/ocaml/xenstored: do permission checks on xenstore root -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This was lacking in a disappointing number of places. - -The xenstore root node is treated differently from all other nodes, because it -doesn't have a parent, and mutation requires changing the parent. - -Unfortunately this lead to open-coding the special case for root into every -single xenstore operation, and out of all the xenstore operations only read -did a permission check when handling the root node. - -This means that an unprivileged guest can: - - * xenstore-chmod / to its liking and subsequently write new arbitrary nodes - there (subject to quota) - * xenstore-rm -r / deletes almost the entire xenstore tree (xenopsd quickly - refills some, but you are left with a broken system) - * DIRECTORY on / lists all children when called through python - bindings (xenstore-ls stops at /local because it tries to list recursively) - * get-perms on / works too, but that is just a minor information leak - -Add the missing permission checks, but this should really be refactored to do -the root handling and permission checks on the node only once from a single -function, instead of getting it wrong nearly everywhere. - -This is XSA-353. - -Signed-off-by: Edwin Török -Acked-by: Christian Lindig -Reviewed-by: Andrew Cooper - -diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml -index f299ec6461..92b6289b5e 100644 ---- a/tools/ocaml/xenstored/store.ml -+++ b/tools/ocaml/xenstored/store.ml -@@ -273,15 +273,17 @@ let path_rm store perm path = - Node.del_childname node name - with Not_found -> - raise Define.Doesnt_exist in -- if path = [] then -+ if path = [] then ( -+ Node.check_perm store.root perm Perms.WRITE; - Node.del_all_children store.root -- else -+ ) else - Path.apply_modify store.root path do_rm - - let path_setperms store perm path perms = -- if path = [] then -+ if path = [] then ( -+ Node.check_perm store.root perm Perms.WRITE; - Node.set_perms store.root perms -- else -+ ) else - let do_setperms node name = - let c = Node.find node name in - Node.check_owner c perm; -@@ -313,9 +315,10 @@ let read store perm path = - - let ls store perm path = - let children = -- if path = [] then -- (Node.get_children store.root) -- else -+ if path = [] then ( -+ Node.check_perm store.root perm Perms.READ; -+ Node.get_children store.root -+ ) else - let do_ls node name = - let cnode = Node.find node name in - Node.check_perm cnode perm Perms.READ; -@@ -324,9 +327,10 @@ let ls store perm path = - List.rev (List.map (fun n -> Symbol.to_string n.Node.name) children) - - let getperms store perm path = -- if path = [] then -- (Node.get_perms store.root) -- else -+ if path = [] then ( -+ Node.check_perm store.root perm Perms.READ; -+ Node.get_perms store.root -+ ) else - let fct n name = - let c = Node.find n name in - Node.check_perm c perm Perms.READ; diff --git a/xsa355.patch b/xsa355.patch deleted file mode 100644 index 491dd05..0000000 --- a/xsa355.patch +++ /dev/null @@ -1,23 +0,0 @@ -From: Jan Beulich -Subject: memory: fix off-by-one in XSA-346 change - -The comparison against ARRAY_SIZE() needs to be >= in order to avoid -overrunning the pages[] array. - -This is XSA-355. - -Fixes: 5777a3742d88 ("IOMMU: hold page ref until after deferred TLB flush") -Signed-off-by: Jan Beulich -Reviewed-by: Julien Grall - ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -854,7 +854,7 @@ int xenmem_add_to_physmap(struct domain - ++extra.ppage; - - /* Check for continuation if it's not the last iteration. */ -- if ( (++done > ARRAY_SIZE(pages) && extra.ppage) || -+ if ( (++done >= ARRAY_SIZE(pages) && extra.ppage) || - (xatp->size > done && hypercall_preempt_check()) ) - { - rc = start + done; diff --git a/xsa356.patch b/xsa356.patch deleted file mode 100644 index f18d220..0000000 --- a/xsa356.patch +++ /dev/null @@ -1,65 +0,0 @@ -From: Roger Pau Monné -Subject: x86/irq: fix infinite loop in irq_move_cleanup_interrupt - -If Xen enters irq_move_cleanup_interrupt with a dynamic vector below -IRQ_MOVE_CLEANUP_VECTOR pending in IRR (0x20 or 0x21) that's also -designated for a cleanup it will enter a loop where -irq_move_cleanup_interrupt continuously sends a cleanup IPI (vector -0x22) to itself while waiting for the vector with lower priority to be -injected - which will never happen because IRQ_MOVE_CLEANUP_VECTOR -takes precedence and it's always injected first. - -Fix this by making sure vectors below IRQ_MOVE_CLEANUP_VECTOR are -marked as used and thus not available for APs. Also add some logic to -assert and prevent irq_move_cleanup_interrupt from entering such an -infinite loop, albeit that should never happen given the current code. - -This is XSA-356 / CVE-2020-29567. - -Fixes: 3fba06ba9f8 ('x86/IRQ: re-use legacy vector ranges on APs') -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -441,8 +441,15 @@ int __init init_irq_data(void) - set_bit(HYPERCALL_VECTOR, used_vectors); - #endif - -- /* IRQ_MOVE_CLEANUP_VECTOR used for clean up vectors */ -- set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); -+ /* -+ * Mark vectors up to the cleanup one as used, to prevent an infinite loop -+ * invoking irq_move_cleanup_interrupt. -+ */ -+ BUILD_BUG_ON(IRQ_MOVE_CLEANUP_VECTOR < FIRST_DYNAMIC_VECTOR); -+ for ( vector = FIRST_DYNAMIC_VECTOR; -+ vector <= IRQ_MOVE_CLEANUP_VECTOR; -+ vector++ ) -+ __set_bit(vector, used_vectors); - - return 0; - } -@@ -727,10 +734,6 @@ void irq_move_cleanup_interrupt(struct cpu_user_regs *regs) - { - unsigned vector, me; - -- /* This interrupt should not nest inside others. */ -- BUILD_BUG_ON(APIC_PRIO_CLASS(IRQ_MOVE_CLEANUP_VECTOR) != -- APIC_PRIO_CLASS(FIRST_DYNAMIC_VECTOR)); -- - ack_APIC_irq(); - - me = smp_processor_id(); -@@ -774,6 +777,11 @@ void irq_move_cleanup_interrupt(struct cpu_user_regs *regs) - */ - if ( irr & (1u << (vector % 32)) ) - { -+ if ( vector < IRQ_MOVE_CLEANUP_VECTOR ) -+ { -+ ASSERT_UNREACHABLE(); -+ goto unlock; -+ } - send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); - TRACE_3D(TRC_HW_IRQ_MOVE_CLEANUP_DELAY, - irq, vector, smp_processor_id()); diff --git a/xsa358.patch b/xsa358.patch deleted file mode 100644 index a9ff89a..0000000 --- a/xsa358.patch +++ /dev/null @@ -1,57 +0,0 @@ -From: Jan Beulich -Subject: evtchn/FIFO: re-order and synchronize (with) map_control_block() - -For evtchn_fifo_set_pending()'s check of the control block having been -set to be effective, ordering of respective reads and writes needs to be -ensured: The control block pointer needs to be recorded strictly after -the setting of all the queue heads, and it needs checking strictly -before any uses of them (this latter aspect was already guaranteed). - -This is XSA-358 / CVE-2020-29570. - -Reported-by: Julien Grall -Signed-off-by: Jan Beulich -Acked-by: Julien Grall ---- -v3: Drop read-side barrier again, leveraging guest_test_and_set_bit(). -v2: Re-base over queue locking re-work. - ---- a/xen/common/event_fifo.c -+++ b/xen/common/event_fifo.c -@@ -249,6 +249,10 @@ static void evtchn_fifo_set_pending(stru - goto unlock; - } - -+ /* -+ * This also acts as the read counterpart of the smp_wmb() in -+ * map_control_block(). -+ */ - if ( guest_test_and_set_bit(d, EVTCHN_FIFO_LINKED, word) ) - goto unlock; - -@@ -474,6 +478,7 @@ static int setup_control_block(struct vc - static int map_control_block(struct vcpu *v, uint64_t gfn, uint32_t offset) - { - void *virt; -+ struct evtchn_fifo_control_block *control_block; - unsigned int i; - int rc; - -@@ -484,10 +489,15 @@ static int map_control_block(struct vcpu - if ( rc < 0 ) - return rc; - -- v->evtchn_fifo->control_block = virt + offset; -+ control_block = virt + offset; - - for ( i = 0; i <= EVTCHN_FIFO_PRIORITY_MIN; i++ ) -- v->evtchn_fifo->queue[i].head = &v->evtchn_fifo->control_block->head[i]; -+ v->evtchn_fifo->queue[i].head = &control_block->head[i]; -+ -+ /* All queue heads must have been set before setting the control block. */ -+ smp_wmb(); -+ -+ v->evtchn_fifo->control_block = control_block; - - return 0; - } diff --git a/xsa359.patch b/xsa359.patch deleted file mode 100644 index 231810b..0000000 --- a/xsa359.patch +++ /dev/null @@ -1,40 +0,0 @@ -From: Jan Beulich -Subject: evtchn/FIFO: add 2nd smp_rmb() to evtchn_fifo_word_from_port() - -Besides with add_page_to_event_array() the function also needs to -synchronize with evtchn_fifo_init_control() setting both d->evtchn_fifo -and (subsequently) d->evtchn_port_ops. - -This is XSA-359 / CVE-2020-29571. - -Reported-by: Julien Grall -Signed-off-by: Jan Beulich -Reviewed-by: Julien Grall - ---- a/xen/common/event_fifo.c -+++ b/xen/common/event_fifo.c -@@ -55,6 +55,13 @@ static inline event_word_t *evtchn_fifo_ - { - unsigned int p, w; - -+ /* -+ * Callers aren't required to hold d->event_lock, so we need to synchronize -+ * with evtchn_fifo_init_control() setting d->evtchn_port_ops /after/ -+ * d->evtchn_fifo. -+ */ -+ smp_rmb(); -+ - if ( unlikely(port >= d->evtchn_fifo->num_evtchns) ) - return NULL; - -@@ -606,6 +613,10 @@ int evtchn_fifo_init_control(struct evtc - if ( rc < 0 ) - goto error; - -+ /* -+ * This call, as a side effect, synchronizes with -+ * evtchn_fifo_word_from_port(). -+ */ - rc = map_control_block(v, gfn, offset); - if ( rc < 0 ) - goto error;