From 960733fb60b6ac190cc407b13bd16707f72d9897 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Dec 15 2020 23:46:04 +0000 Subject: multiple security updates xenstore watch notifications lacking permission checks [XSA-115, CVE-2020-29480] (#1908091) Xenstore: new domains inheriting existing node permissions [XSA-322, CVE-2020-29481] (#1908095) Xenstore: wrong path length check [XSA-323, CVE-2020-29482] (#1908096) Xenstore: guests can crash xenstored via watchs [XSA-324, CVE-2020-29484] (#1908088) Xenstore: guests can disturb domain cleanup [XSA-325, CVE-2020-29483] (#1905648) oxenstored memory leak in reset_watches [XSA-330, CVE-2020-29485] (#1908000) undue recursion in x86 HVM context switch code [XSA-348, CVE-2020-29566] (#1908085) oxenstored: node ownership can be changed by unprivileged clients [XSA-352, CVE-2020-29486] (#1908003) oxenstored: permissions not checked on root node [XSA-353, CVE-2020-29479] (#1908003) infinite loop when cleaning up IRQ vectors [XSA-356, CVE-2020-29567] (#1907932) FIFO event channels control block related ordering [XSA-358, CVE-2020-29570] (#1907931) FIFO event channels control structure ordering [XSA-359, CVE-2020-29571] (#1908089) --- diff --git a/xen.git-1ad177370df2db9129c97c7305962fc5ad298728.patch b/xen.git-1ad177370df2db9129c97c7305962fc5ad298728.patch new file mode 100644 index 0000000..a713b25 --- /dev/null +++ b/xen.git-1ad177370df2db9129c97c7305962fc5ad298728.patch @@ -0,0 +1,592 @@ +From 1ad177370df2db9129c97c7305962fc5ad298728 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Tue, 1 Dec 2020 15:31:01 +0100 +Subject: [PATCH] xen/evtchn: rework per event channel lock + +Currently the lock for a single event channel needs to be taken with +interrupts off, which causes deadlocks in some cases. + +Rework the per event channel lock to be non-blocking for the case of +sending an event and removing the need for disabling interrupts for +taking the lock. + +The lock is needed for avoiding races between event channel state +changes (creation, closing, binding) against normal operations (set +pending, [un]masking, priority changes). + +Use a rwlock, but with some restrictions: + +- Changing the state of an event channel (creation, closing, binding) + needs to use write_lock(), with ASSERT()ing that the lock is taken as + writer only when the state of the event channel is either before or + after the locked region appropriate (either free or unbound). + +- Sending an event needs to use read_trylock() mostly, in case of not + obtaining the lock the operation is omitted. This is needed as + sending an event can happen with interrupts off (at least in some + cases). + +- Dumping the event channel state for debug purposes is using + read_trylock(), too, in order to avoid blocking in case the lock is + taken as writer for a long time. + +- All other cases can use read_lock(). + +Fixes: e045199c7c9c54 ("evtchn: address races with evtchn_reset()") +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Acked-by: Julien Grall + +xen/events: fix build + +Commit 5f2df45ead7c1195 ("xen/evtchn: rework per event channel lock") +introduced a build failure for NDEBUG builds. + +Fixes: 5f2df45ead7c1195 ("xen/evtchn: rework per event channel lock") +Signed-off-by: Juergen Gross +Signed-off-by: Jan Beulich +master commit: 5f2df45ead7c1195142f68b7923047a1e9479d54 +master date: 2020-11-10 14:36:15 +0100 +master commit: 53bacb86f496fdb11560d9e3b361bca7de60d268 +master date: 2020-11-11 08:56:21 +0100 +--- + xen/arch/x86/irq.c | 6 +- + xen/arch/x86/pv/shim.c | 9 +-- + xen/common/event_channel.c | 141 ++++++++++++++++++++++--------------- + xen/include/xen/event.h | 27 +++++-- + xen/include/xen/sched.h | 5 +- + 5 files changed, 116 insertions(+), 72 deletions(-) + +diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c +index 93c4fb9a79..8d1f9a9fc6 100644 +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2495,14 +2495,12 @@ static void dump_irqs(unsigned char key) + pirq = domain_irq_to_pirq(d, irq); + info = pirq_info(d, pirq); + evtchn = evtchn_from_port(d, info->evtchn); +- local_irq_disable(); +- if ( spin_trylock(&evtchn->lock) ) ++ if ( evtchn_read_trylock(evtchn) ) + { + pending = evtchn_is_pending(d, evtchn); + masked = evtchn_is_masked(d, evtchn); +- spin_unlock(&evtchn->lock); ++ evtchn_read_unlock(evtchn); + } +- local_irq_enable(); + printk("d%d:%3d(%c%c%c)%c", + d->domain_id, pirq, "-P?"[pending], + "-M?"[masked], info->masked ? 'M' : '-', +diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c +index 9aef7a860a..b4e83e0778 100644 +--- a/xen/arch/x86/pv/shim.c ++++ b/xen/arch/x86/pv/shim.c +@@ -660,11 +660,12 @@ void pv_shim_inject_evtchn(unsigned int port) + if ( port_is_valid(guest, port) ) + { + struct evtchn *chn = evtchn_from_port(guest, port); +- unsigned long flags; + +- spin_lock_irqsave(&chn->lock, flags); +- evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn); +- spin_unlock_irqrestore(&chn->lock, flags); ++ if ( evtchn_read_trylock(chn) ) ++ { ++ evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn); ++ evtchn_read_unlock(chn); ++ } + } + } + +diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c +index 12f666cb79..181e5abaa6 100644 +--- a/xen/common/event_channel.c ++++ b/xen/common/event_channel.c +@@ -50,6 +50,40 @@ + + #define consumer_is_xen(e) (!!(e)->xen_consumer) + ++/* ++ * Lock an event channel exclusively. This is allowed only when the channel is ++ * free or unbound either when taking or when releasing the lock, as any ++ * concurrent operation on the event channel using evtchn_read_trylock() will ++ * just assume the event channel is free or unbound at the moment when the ++ * evtchn_read_trylock() returns false. ++ */ ++static inline void evtchn_write_lock(struct evtchn *evtchn) ++{ ++ write_lock(&evtchn->lock); ++ ++#ifndef NDEBUG ++ evtchn->old_state = evtchn->state; ++#endif ++} ++ ++static inline unsigned int old_state(const struct evtchn *evtchn) ++{ ++#ifndef NDEBUG ++ return evtchn->old_state; ++#else ++ return ECS_RESERVED; /* Just to allow things to build. */ ++#endif ++} ++ ++static inline void evtchn_write_unlock(struct evtchn *evtchn) ++{ ++ /* Enforce lock discipline. */ ++ ASSERT(old_state(evtchn) == ECS_FREE || old_state(evtchn) == ECS_UNBOUND || ++ evtchn->state == ECS_FREE || evtchn->state == ECS_UNBOUND); ++ ++ write_unlock(&evtchn->lock); ++} ++ + /* + * The function alloc_unbound_xen_event_channel() allows an arbitrary + * notifier function to be specified. However, very few unique functions +@@ -131,7 +165,7 @@ static struct evtchn *alloc_evtchn_bucket(struct domain *d, unsigned int port) + return NULL; + } + chn[i].port = port + i; +- spin_lock_init(&chn[i].lock); ++ rwlock_init(&chn[i].lock); + } + return chn; + } +@@ -249,7 +283,6 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc) + int port; + domid_t dom = alloc->dom; + long rc; +- unsigned long flags; + + d = rcu_lock_domain_by_any_id(dom); + if ( d == NULL ) +@@ -265,14 +298,14 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc) + if ( rc ) + goto out; + +- spin_lock_irqsave(&chn->lock, flags); ++ evtchn_write_lock(chn); + + chn->state = ECS_UNBOUND; + if ( (chn->u.unbound.remote_domid = alloc->remote_dom) == DOMID_SELF ) + chn->u.unbound.remote_domid = current->domain->domain_id; + evtchn_port_init(d, chn); + +- spin_unlock_irqrestore(&chn->lock, flags); ++ evtchn_write_unlock(chn); + + alloc->port = port; + +@@ -285,32 +318,26 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc) + } + + +-static unsigned long double_evtchn_lock(struct evtchn *lchn, +- struct evtchn *rchn) ++static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn) + { +- unsigned long flags; +- + if ( lchn <= rchn ) + { +- spin_lock_irqsave(&lchn->lock, flags); ++ evtchn_write_lock(lchn); + if ( lchn != rchn ) +- spin_lock(&rchn->lock); ++ evtchn_write_lock(rchn); + } + else + { +- spin_lock_irqsave(&rchn->lock, flags); +- spin_lock(&lchn->lock); ++ evtchn_write_lock(rchn); ++ evtchn_write_lock(lchn); + } +- +- return flags; + } + +-static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn, +- unsigned long flags) ++static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn) + { + if ( lchn != rchn ) +- spin_unlock(&lchn->lock); +- spin_unlock_irqrestore(&rchn->lock, flags); ++ evtchn_write_unlock(lchn); ++ evtchn_write_unlock(rchn); + } + + static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) +@@ -320,7 +347,6 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) + int lport, rport = bind->remote_port; + domid_t rdom = bind->remote_dom; + long rc; +- unsigned long flags; + + if ( rdom == DOMID_SELF ) + rdom = current->domain->domain_id; +@@ -356,7 +382,7 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) + if ( rc ) + goto out; + +- flags = double_evtchn_lock(lchn, rchn); ++ double_evtchn_lock(lchn, rchn); + + lchn->u.interdomain.remote_dom = rd; + lchn->u.interdomain.remote_port = rport; +@@ -373,7 +399,7 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) + */ + evtchn_port_set_pending(ld, lchn->notify_vcpu_id, lchn); + +- double_evtchn_unlock(lchn, rchn, flags); ++ double_evtchn_unlock(lchn, rchn); + + bind->local_port = lport; + +@@ -396,7 +422,6 @@ int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port) + struct domain *d = current->domain; + int virq = bind->virq, vcpu = bind->vcpu; + int rc = 0; +- unsigned long flags; + + if ( (virq < 0) || (virq >= ARRAY_SIZE(v->virq_to_evtchn)) ) + return -EINVAL; +@@ -434,14 +459,14 @@ int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port) + + chn = evtchn_from_port(d, port); + +- spin_lock_irqsave(&chn->lock, flags); ++ evtchn_write_lock(chn); + + chn->state = ECS_VIRQ; + chn->notify_vcpu_id = vcpu; + chn->u.virq = virq; + evtchn_port_init(d, chn); + +- spin_unlock_irqrestore(&chn->lock, flags); ++ evtchn_write_unlock(chn); + + v->virq_to_evtchn[virq] = bind->port = port; + +@@ -458,7 +483,6 @@ static long evtchn_bind_ipi(evtchn_bind_ipi_t *bind) + struct domain *d = current->domain; + int port, vcpu = bind->vcpu; + long rc = 0; +- unsigned long flags; + + if ( domain_vcpu(d, vcpu) == NULL ) + return -ENOENT; +@@ -470,13 +494,13 @@ static long evtchn_bind_ipi(evtchn_bind_ipi_t *bind) + + chn = evtchn_from_port(d, port); + +- spin_lock_irqsave(&chn->lock, flags); ++ evtchn_write_lock(chn); + + chn->state = ECS_IPI; + chn->notify_vcpu_id = vcpu; + evtchn_port_init(d, chn); + +- spin_unlock_irqrestore(&chn->lock, flags); ++ evtchn_write_unlock(chn); + + bind->port = port; + +@@ -520,7 +544,6 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind) + struct pirq *info; + int port = 0, pirq = bind->pirq; + long rc; +- unsigned long flags; + + if ( (pirq < 0) || (pirq >= d->nr_pirqs) ) + return -EINVAL; +@@ -553,14 +576,14 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind) + goto out; + } + +- spin_lock_irqsave(&chn->lock, flags); ++ evtchn_write_lock(chn); + + chn->state = ECS_PIRQ; + chn->u.pirq.irq = pirq; + link_pirq_port(port, chn, v); + evtchn_port_init(d, chn); + +- spin_unlock_irqrestore(&chn->lock, flags); ++ evtchn_write_unlock(chn); + + bind->port = port; + +@@ -581,7 +604,6 @@ int evtchn_close(struct domain *d1, int port1, bool guest) + struct evtchn *chn1, *chn2; + int port2; + long rc = 0; +- unsigned long flags; + + again: + spin_lock(&d1->event_lock); +@@ -681,14 +703,14 @@ int evtchn_close(struct domain *d1, int port1, bool guest) + BUG_ON(chn2->state != ECS_INTERDOMAIN); + BUG_ON(chn2->u.interdomain.remote_dom != d1); + +- flags = double_evtchn_lock(chn1, chn2); ++ double_evtchn_lock(chn1, chn2); + + evtchn_free(d1, chn1); + + chn2->state = ECS_UNBOUND; + chn2->u.unbound.remote_domid = d1->domain_id; + +- double_evtchn_unlock(chn1, chn2, flags); ++ double_evtchn_unlock(chn1, chn2); + + goto out; + +@@ -696,9 +718,9 @@ int evtchn_close(struct domain *d1, int port1, bool guest) + BUG(); + } + +- spin_lock_irqsave(&chn1->lock, flags); ++ evtchn_write_lock(chn1); + evtchn_free(d1, chn1); +- spin_unlock_irqrestore(&chn1->lock, flags); ++ evtchn_write_unlock(chn1); + + out: + if ( d2 != NULL ) +@@ -718,7 +740,6 @@ int evtchn_send(struct domain *ld, unsigned int lport) + struct evtchn *lchn, *rchn; + struct domain *rd; + int rport, ret = 0; +- unsigned long flags; + + if ( !port_is_valid(ld, lport) ) + return -EINVAL; +@@ -731,7 +752,7 @@ int evtchn_send(struct domain *ld, unsigned int lport) + + lchn = evtchn_from_port(ld, lport); + +- spin_lock_irqsave(&lchn->lock, flags); ++ evtchn_read_lock(lchn); + + /* Guest cannot send via a Xen-attached event channel. */ + if ( unlikely(consumer_is_xen(lchn)) ) +@@ -766,7 +787,7 @@ int evtchn_send(struct domain *ld, unsigned int lport) + } + + out: +- spin_unlock_irqrestore(&lchn->lock, flags); ++ evtchn_read_unlock(lchn); + + return ret; + } +@@ -793,9 +814,11 @@ void send_guest_vcpu_virq(struct vcpu *v, uint32_t virq) + + d = v->domain; + chn = evtchn_from_port(d, port); +- spin_lock(&chn->lock); +- evtchn_port_set_pending(d, v->vcpu_id, chn); +- spin_unlock(&chn->lock); ++ if ( evtchn_read_trylock(chn) ) ++ { ++ evtchn_port_set_pending(d, v->vcpu_id, chn); ++ evtchn_read_unlock(chn); ++ } + + out: + spin_unlock_irqrestore(&v->virq_lock, flags); +@@ -824,9 +847,11 @@ void send_guest_global_virq(struct domain *d, uint32_t virq) + goto out; + + chn = evtchn_from_port(d, port); +- spin_lock(&chn->lock); +- evtchn_port_set_pending(d, chn->notify_vcpu_id, chn); +- spin_unlock(&chn->lock); ++ if ( evtchn_read_trylock(chn) ) ++ { ++ evtchn_port_set_pending(d, chn->notify_vcpu_id, chn); ++ evtchn_read_unlock(chn); ++ } + + out: + spin_unlock_irqrestore(&v->virq_lock, flags); +@@ -836,7 +861,6 @@ void send_guest_pirq(struct domain *d, const struct pirq *pirq) + { + int port; + struct evtchn *chn; +- unsigned long flags; + + /* + * PV guests: It should not be possible to race with __evtchn_close(). The +@@ -851,9 +875,11 @@ void send_guest_pirq(struct domain *d, const struct pirq *pirq) + } + + chn = evtchn_from_port(d, port); +- spin_lock_irqsave(&chn->lock, flags); +- evtchn_port_set_pending(d, chn->notify_vcpu_id, chn); +- spin_unlock_irqrestore(&chn->lock, flags); ++ if ( evtchn_read_trylock(chn) ) ++ { ++ evtchn_port_set_pending(d, chn->notify_vcpu_id, chn); ++ evtchn_read_unlock(chn); ++ } + } + + static struct domain *global_virq_handlers[NR_VIRQS] __read_mostly; +@@ -1050,15 +1076,17 @@ int evtchn_unmask(unsigned int port) + { + struct domain *d = current->domain; + struct evtchn *evtchn; +- unsigned long flags; + + if ( unlikely(!port_is_valid(d, port)) ) + return -EINVAL; + + evtchn = evtchn_from_port(d, port); +- spin_lock_irqsave(&evtchn->lock, flags); ++ ++ evtchn_read_lock(evtchn); ++ + evtchn_port_unmask(d, evtchn); +- spin_unlock_irqrestore(&evtchn->lock, flags); ++ ++ evtchn_read_unlock(evtchn); + + return 0; + } +@@ -1304,7 +1332,6 @@ int alloc_unbound_xen_event_channel( + { + struct evtchn *chn; + int port, rc; +- unsigned long flags; + + spin_lock(&ld->event_lock); + +@@ -1317,14 +1344,14 @@ int alloc_unbound_xen_event_channel( + if ( rc ) + goto out; + +- spin_lock_irqsave(&chn->lock, flags); ++ evtchn_write_lock(chn); + + chn->state = ECS_UNBOUND; + chn->xen_consumer = get_xen_consumer(notification_fn); + chn->notify_vcpu_id = lvcpu; + chn->u.unbound.remote_domid = remote_domid; + +- spin_unlock_irqrestore(&chn->lock, flags); ++ evtchn_write_unlock(chn); + + write_atomic(&ld->xen_evtchns, ld->xen_evtchns + 1); + +@@ -1356,7 +1383,6 @@ void notify_via_xen_event_channel(struct domain *ld, int lport) + { + struct evtchn *lchn, *rchn; + struct domain *rd; +- unsigned long flags; + + if ( !port_is_valid(ld, lport) ) + { +@@ -1371,7 +1397,8 @@ void notify_via_xen_event_channel(struct domain *ld, int lport) + + lchn = evtchn_from_port(ld, lport); + +- spin_lock_irqsave(&lchn->lock, flags); ++ if ( !evtchn_read_trylock(lchn) ) ++ return; + + if ( likely(lchn->state == ECS_INTERDOMAIN) ) + { +@@ -1381,7 +1408,7 @@ void notify_via_xen_event_channel(struct domain *ld, int lport) + evtchn_port_set_pending(rd, rchn->notify_vcpu_id, rchn); + } + +- spin_unlock_irqrestore(&lchn->lock, flags); ++ evtchn_read_unlock(lchn); + } + + void evtchn_check_pollers(struct domain *d, unsigned int port) +diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h +index fa93a3684a..6588333f42 100644 +--- a/xen/include/xen/event.h ++++ b/xen/include/xen/event.h +@@ -111,6 +111,21 @@ static inline unsigned int max_evtchns(const struct domain *d) + : BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d); + } + ++static inline void evtchn_read_lock(struct evtchn *evtchn) ++{ ++ read_lock(&evtchn->lock); ++} ++ ++static inline bool evtchn_read_trylock(struct evtchn *evtchn) ++{ ++ return read_trylock(&evtchn->lock); ++} ++ ++static inline void evtchn_read_unlock(struct evtchn *evtchn) ++{ ++ read_unlock(&evtchn->lock); ++} ++ + static inline bool_t port_is_valid(struct domain *d, unsigned int p) + { + if ( p >= read_atomic(&d->valid_evtchns) ) +@@ -244,11 +259,10 @@ static inline bool evtchn_port_is_pending(struct domain *d, evtchn_port_t port) + { + struct evtchn *evtchn = evtchn_from_port(d, port); + bool rc; +- unsigned long flags; + +- spin_lock_irqsave(&evtchn->lock, flags); ++ evtchn_read_lock(evtchn); + rc = evtchn_is_pending(d, evtchn); +- spin_unlock_irqrestore(&evtchn->lock, flags); ++ evtchn_read_unlock(evtchn); + + return rc; + } +@@ -263,11 +277,12 @@ static inline bool evtchn_port_is_masked(struct domain *d, evtchn_port_t port) + { + struct evtchn *evtchn = evtchn_from_port(d, port); + bool rc; +- unsigned long flags; + +- spin_lock_irqsave(&evtchn->lock, flags); ++ evtchn_read_lock(evtchn); ++ + rc = evtchn_is_masked(d, evtchn); +- spin_unlock_irqrestore(&evtchn->lock, flags); ++ ++ evtchn_read_unlock(evtchn); + + return rc; + } +diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h +index 97ba8e0795..f782ffeb82 100644 +--- a/xen/include/xen/sched.h ++++ b/xen/include/xen/sched.h +@@ -85,7 +85,7 @@ extern domid_t hardware_domid; + + struct evtchn + { +- spinlock_t lock; ++ rwlock_t lock; + #define ECS_FREE 0 /* Channel is available for use. */ + #define ECS_RESERVED 1 /* Channel is reserved. */ + #define ECS_UNBOUND 2 /* Channel is waiting to bind to a remote domain. */ +@@ -114,6 +114,9 @@ struct evtchn + u16 virq; /* state == ECS_VIRQ */ + } u; + u8 priority; ++#ifndef NDEBUG ++ u8 old_state; /* State when taking lock in write mode. */ ++#endif + u8 last_priority; + u16 last_vcpu_id; + #ifdef CONFIG_XSM +-- +2.20.1 + diff --git a/xen.git-1cfb9b1c5b9e4c024f5f139d7a3d0357d2417b13.patch b/xen.git-1cfb9b1c5b9e4c024f5f139d7a3d0357d2417b13.patch new file mode 100644 index 0000000..12c64ca --- /dev/null +++ b/xen.git-1cfb9b1c5b9e4c024f5f139d7a3d0357d2417b13.patch @@ -0,0 +1,97 @@ +From 1cfb9b1c5b9e4c024f5f139d7a3d0357d2417b13 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Tue, 1 Dec 2020 15:33:19 +0100 +Subject: [PATCH] xen/events: access last_priority and last_vcpu_id together + +The queue for a fifo event is depending on the vcpu_id and the +priority of the event. When sending an event it might happen the +event needs to change queues and the old queue needs to be kept for +keeping the links between queue elements intact. For this purpose +the event channel contains last_priority and last_vcpu_id values +elements for being able to identify the old queue. + +In order to avoid races always access last_priority and last_vcpu_id +with a single atomic operation avoiding any inconsistencies. + +Signed-off-by: Juergen Gross +Reviewed-by: Julien Grall +master commit: 1277cb9dc5e966f1faf665bcded02b7533e38078 +master date: 2020-11-24 11:23:42 +0100 +--- + xen/common/event_fifo.c | 25 +++++++++++++++++++------ + xen/include/xen/sched.h | 3 +-- + 2 files changed, 20 insertions(+), 8 deletions(-) + +diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c +index 27ab3a1c3f..2037b24196 100644 +--- a/xen/common/event_fifo.c ++++ b/xen/common/event_fifo.c +@@ -21,6 +21,14 @@ + + #include + ++union evtchn_fifo_lastq { ++ uint32_t raw; ++ struct { ++ uint8_t last_priority; ++ uint16_t last_vcpu_id; ++ }; ++}; ++ + static inline event_word_t *evtchn_fifo_word_from_port(const struct domain *d, + unsigned int port) + { +@@ -65,16 +73,18 @@ static struct evtchn_fifo_queue *lock_old_queue(const struct domain *d, + struct vcpu *v; + struct evtchn_fifo_queue *q, *old_q; + unsigned int try; ++ union evtchn_fifo_lastq lastq; + + for ( try = 0; try < 3; try++ ) + { +- v = d->vcpu[evtchn->last_vcpu_id]; +- old_q = &v->evtchn_fifo->queue[evtchn->last_priority]; ++ lastq.raw = read_atomic(&evtchn->fifo_lastq); ++ v = d->vcpu[lastq.last_vcpu_id]; ++ old_q = &v->evtchn_fifo->queue[lastq.last_priority]; + + spin_lock_irqsave(&old_q->lock, *flags); + +- v = d->vcpu[evtchn->last_vcpu_id]; +- q = &v->evtchn_fifo->queue[evtchn->last_priority]; ++ v = d->vcpu[lastq.last_vcpu_id]; ++ q = &v->evtchn_fifo->queue[lastq.last_priority]; + + if ( old_q == q ) + return old_q; +@@ -225,8 +235,11 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) + /* Moved to a different queue? */ + if ( old_q != q ) + { +- evtchn->last_vcpu_id = v->vcpu_id; +- evtchn->last_priority = q->priority; ++ union evtchn_fifo_lastq lastq = { }; ++ ++ lastq.last_vcpu_id = v->vcpu_id; ++ lastq.last_priority = q->priority; ++ write_atomic(&evtchn->fifo_lastq, lastq.raw); + + spin_unlock_irqrestore(&old_q->lock, flags); + spin_lock_irqsave(&q->lock, flags); +diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h +index f782ffeb82..99e2f1aac5 100644 +--- a/xen/include/xen/sched.h ++++ b/xen/include/xen/sched.h +@@ -117,8 +117,7 @@ struct evtchn + #ifndef NDEBUG + u8 old_state; /* State when taking lock in write mode. */ + #endif +- u8 last_priority; +- u16 last_vcpu_id; ++ u32 fifo_lastq; /* Data for fifo events identifying last queue. */ + #ifdef CONFIG_XSM + union { + #ifdef XSM_NEED_GENERIC_EVTCHN_SSID +-- +2.20.1 + diff --git a/xen.git-72bd989f51878bc9ba61e930b0c29b921a30dc0d.patch b/xen.git-72bd989f51878bc9ba61e930b0c29b921a30dc0d.patch new file mode 100644 index 0000000..21c5ddd --- /dev/null +++ b/xen.git-72bd989f51878bc9ba61e930b0c29b921a30dc0d.patch @@ -0,0 +1,240 @@ +From 72bd989f51878bc9ba61e930b0c29b921a30dc0d Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Tue, 1 Dec 2020 15:34:31 +0100 +Subject: [PATCH] xen/events: rework fifo queue locking + +Two cpus entering evtchn_fifo_set_pending() for the same event channel +can race in case the first one gets interrupted after setting +EVTCHN_FIFO_PENDING and when the other one manages to set +EVTCHN_FIFO_LINKED before the first one is testing that bit. This can +lead to evtchn_check_pollers() being called before the event is put +properly into the queue, resulting eventually in the guest not seeing +the event pending and thus blocking forever afterwards. + +Note that commit 5f2df45ead7c1195 ("xen/evtchn: rework per event channel +lock") made the race just more obvious, while the fifo event channel +implementation had this race forever since the introduction and use of +per-channel locks, when an unmask operation was running in parallel with +an event channel send operation. + +Using a spinlock for the per event channel lock had turned out +problematic due to some paths needing to take the lock are called with +interrupts off, so the lock would need to disable interrupts, which in +turn broke some use cases related to vm events. + +For avoiding this race the queue locking in evtchn_fifo_set_pending() +needs to be reworked to cover the test of EVTCHN_FIFO_PENDING, +EVTCHN_FIFO_MASKED and EVTCHN_FIFO_LINKED, too. Additionally when an +event channel needs to change queues both queues need to be locked +initially, in order to avoid having a window with no lock held at all. + +Reported-by: Jan Beulich +Fixes: 5f2df45ead7c1195 ("xen/evtchn: rework per event channel lock") +Fixes: de6acb78bf0e137c ("evtchn: use a per-event channel lock for sending events") +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +master commit: 71ac522909e9302350a88bc378be99affa87067c +master date: 2020-11-30 14:05:39 +0100 +--- + xen/common/event_fifo.c | 128 ++++++++++++++++++++++------------------ + 1 file changed, 70 insertions(+), 58 deletions(-) + +diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c +index 2037b24196..2f5e868b7a 100644 +--- a/xen/common/event_fifo.c ++++ b/xen/common/event_fifo.c +@@ -66,38 +66,6 @@ static void evtchn_fifo_init(struct domain *d, struct evtchn *evtchn) + d->domain_id, evtchn->port); + } + +-static struct evtchn_fifo_queue *lock_old_queue(const struct domain *d, +- struct evtchn *evtchn, +- unsigned long *flags) +-{ +- struct vcpu *v; +- struct evtchn_fifo_queue *q, *old_q; +- unsigned int try; +- union evtchn_fifo_lastq lastq; +- +- for ( try = 0; try < 3; try++ ) +- { +- lastq.raw = read_atomic(&evtchn->fifo_lastq); +- v = d->vcpu[lastq.last_vcpu_id]; +- old_q = &v->evtchn_fifo->queue[lastq.last_priority]; +- +- spin_lock_irqsave(&old_q->lock, *flags); +- +- v = d->vcpu[lastq.last_vcpu_id]; +- q = &v->evtchn_fifo->queue[lastq.last_priority]; +- +- if ( old_q == q ) +- return old_q; +- +- spin_unlock_irqrestore(&old_q->lock, *flags); +- } +- +- gprintk(XENLOG_WARNING, +- "dom%d port %d lost event (too many queue changes)\n", +- d->domain_id, evtchn->port); +- return NULL; +-} +- + static int try_set_link(event_word_t *word, event_word_t *w, uint32_t link) + { + event_word_t new, old; +@@ -169,6 +137,9 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) + event_word_t *word; + unsigned long flags; + bool_t was_pending; ++ struct evtchn_fifo_queue *q, *old_q; ++ unsigned int try; ++ bool linked = true; + + port = evtchn->port; + word = evtchn_fifo_word_from_port(d, port); +@@ -183,17 +154,67 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) + return; + } + ++ /* ++ * Lock all queues related to the event channel (in case of a queue change ++ * this might be two). ++ * It is mandatory to do that before setting and testing the PENDING bit ++ * and to hold the current queue lock until the event has been put into the ++ * list of pending events in order to avoid waking up a guest without the ++ * event being visibly pending in the guest. ++ */ ++ for ( try = 0; try < 3; try++ ) ++ { ++ union evtchn_fifo_lastq lastq; ++ const struct vcpu *old_v; ++ ++ lastq.raw = read_atomic(&evtchn->fifo_lastq); ++ old_v = d->vcpu[lastq.last_vcpu_id]; ++ ++ q = &v->evtchn_fifo->queue[evtchn->priority]; ++ old_q = &old_v->evtchn_fifo->queue[lastq.last_priority]; ++ ++ if ( q == old_q ) ++ spin_lock_irqsave(&q->lock, flags); ++ else if ( q < old_q ) ++ { ++ spin_lock_irqsave(&q->lock, flags); ++ spin_lock(&old_q->lock); ++ } ++ else ++ { ++ spin_lock_irqsave(&old_q->lock, flags); ++ spin_lock(&q->lock); ++ } ++ ++ lastq.raw = read_atomic(&evtchn->fifo_lastq); ++ old_v = d->vcpu[lastq.last_vcpu_id]; ++ if ( q == &v->evtchn_fifo->queue[evtchn->priority] && ++ old_q == &old_v->evtchn_fifo->queue[lastq.last_priority] ) ++ break; ++ ++ if ( q != old_q ) ++ spin_unlock(&old_q->lock); ++ spin_unlock_irqrestore(&q->lock, flags); ++ } ++ + was_pending = guest_test_and_set_bit(d, EVTCHN_FIFO_PENDING, word); + ++ /* If we didn't get the lock bail out. */ ++ if ( try == 3 ) ++ { ++ gprintk(XENLOG_WARNING, ++ "%pd port %u lost event (too many queue changes)\n", ++ d, evtchn->port); ++ goto done; ++ } ++ + /* + * Link the event if it unmasked and not already linked. + */ + if ( !guest_test_bit(d, EVTCHN_FIFO_MASKED, word) && + !guest_test_bit(d, EVTCHN_FIFO_LINKED, word) ) + { +- struct evtchn_fifo_queue *q, *old_q; + event_word_t *tail_word; +- bool_t linked = 0; + + /* + * Control block not mapped. The guest must not unmask an +@@ -204,25 +225,11 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) + { + printk(XENLOG_G_WARNING + "%pv has no FIFO event channel control block\n", v); +- goto done; ++ goto unlock; + } + +- /* +- * No locking around getting the queue. This may race with +- * changing the priority but we are allowed to signal the +- * event once on the old priority. +- */ +- q = &v->evtchn_fifo->queue[evtchn->priority]; +- +- old_q = lock_old_queue(d, evtchn, &flags); +- if ( !old_q ) +- goto done; +- + if ( guest_test_and_set_bit(d, EVTCHN_FIFO_LINKED, word) ) +- { +- spin_unlock_irqrestore(&old_q->lock, flags); +- goto done; +- } ++ goto unlock; + + /* + * If this event was a tail, the old queue is now empty and +@@ -241,8 +248,8 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) + lastq.last_priority = q->priority; + write_atomic(&evtchn->fifo_lastq, lastq.raw); + +- spin_unlock_irqrestore(&old_q->lock, flags); +- spin_lock_irqsave(&q->lock, flags); ++ spin_unlock(&old_q->lock); ++ old_q = q; + } + + /* +@@ -255,6 +262,7 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) + * If the queue is empty (i.e., we haven't linked to the new + * event), head must be updated. + */ ++ linked = false; + if ( q->tail ) + { + tail_word = evtchn_fifo_word_from_port(d, q->tail); +@@ -263,15 +271,19 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) + if ( !linked ) + write_atomic(q->head, port); + q->tail = port; ++ } + +- spin_unlock_irqrestore(&q->lock, flags); ++ unlock: ++ if ( q != old_q ) ++ spin_unlock(&old_q->lock); ++ spin_unlock_irqrestore(&q->lock, flags); + +- if ( !linked +- && !guest_test_and_set_bit(d, q->priority, +- &v->evtchn_fifo->control_block->ready) ) +- vcpu_mark_events_pending(v); +- } + done: ++ if ( !linked && ++ !guest_test_and_set_bit(d, q->priority, ++ &v->evtchn_fifo->control_block->ready) ) ++ vcpu_mark_events_pending(v); ++ + if ( !was_pending ) + evtchn_check_pollers(d, port); + } +-- +2.20.1 + diff --git a/xen.git-8d148003fdf7bd9e28137e6683ef46902af39146.patch b/xen.git-8d148003fdf7bd9e28137e6683ef46902af39146.patch new file mode 100644 index 0000000..dfff098 --- /dev/null +++ b/xen.git-8d148003fdf7bd9e28137e6683ef46902af39146.patch @@ -0,0 +1,41 @@ +From 8d148003fdf7bd9e28137e6683ef46902af39146 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Tue, 20 Oct 2020 14:42:52 +0200 +Subject: [PATCH] evtchn/fifo: use stable fields when recording "last queue" + information + +Both evtchn->priority and evtchn->notify_vcpu_id could change behind the +back of evtchn_fifo_set_pending(), as for it - in the case of +interdomain channels - only the remote side's per-channel lock is held. +Neither the queue's priority nor the vCPU's vcpu_id fields have similar +properties, so they seem better suited for the purpose. In particular +they reflect the respective evtchn fields' values at the time they were +used to determine queue and vCPU. + +Signed-off-by: Jan Beulich +Reviewed-by: Julien Grall +Reviewed-by: Paul Durrant +master commit: 6f6f07b64cbe90e54f8e62b4d6f2404cf5306536 +master date: 2020-10-02 08:37:35 +0200 +--- + xen/common/event_fifo.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c +index 68d0c7a632..27ab3a1c3f 100644 +--- a/xen/common/event_fifo.c ++++ b/xen/common/event_fifo.c +@@ -225,8 +225,8 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) + /* Moved to a different queue? */ + if ( old_q != q ) + { +- evtchn->last_vcpu_id = evtchn->notify_vcpu_id; +- evtchn->last_priority = evtchn->priority; ++ evtchn->last_vcpu_id = v->vcpu_id; ++ evtchn->last_priority = q->priority; + + spin_unlock_irqrestore(&old_q->lock, flags); + spin_lock_irqsave(&q->lock, flags); +-- +2.20.1 + diff --git a/xen.spec b/xen.spec index 2c7ddc0..4325a81 100644 --- a/xen.spec +++ b/xen.spec @@ -58,7 +58,7 @@ Summary: Xen is a virtual machine monitor Name: xen Version: 4.14.0 -Release: 13%{?dist} +Release: 14%{?dist} License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz @@ -143,6 +143,40 @@ Patch74: xsa351-x86-4.14-1.patch Patch75: xsa351-x86-4.14-2.patch Patch76: zstd-dom0.patch Patch77: xsa355.patch +Patch78: xsa115-4.14-c-0001-tools-xenstore-allow-removing-child-of-a-node-exceed.patch +Patch79: xsa115-4.14-c-0002-tools-xenstore-ignore-transaction-id-for-un-watch.patch +Patch80: xsa115-4.14-c-0003-tools-xenstore-fix-node-accounting-after-failed-node.patch +Patch81: xsa115-4.14-c-0004-tools-xenstore-simplify-and-rename-check_event_node.patch +Patch82: xsa115-4.14-c-0005-tools-xenstore-check-privilege-for-XS_IS_DOMAIN_INTR.patch +Patch83: xsa115-4.14-c-0006-tools-xenstore-rework-node-removal.patch +Patch84: xsa115-4.14-c-0007-tools-xenstore-fire-watches-only-when-removing-a-spe.patch +Patch85: xsa115-4.14-c-0008-tools-xenstore-introduce-node_perms-structure.patch +Patch86: xsa115-4.14-c-0009-tools-xenstore-allow-special-watches-for-privileged-.patch +Patch87: xsa115-4.14-c-0010-tools-xenstore-avoid-watch-events-for-nodes-without-.patch +Patch88: xsa115-o-0001-tools-ocaml-xenstored-ignore-transaction-id-for-un-w.patch +Patch89: xsa115-o-0002-tools-ocaml-xenstored-check-privilege-for-XS_IS_DOMA.patch +Patch90: xsa115-o-0003-tools-ocaml-xenstored-unify-watch-firing.patch +Patch91: xsa115-o-0004-tools-ocaml-xenstored-introduce-permissions-for-spec.patch +Patch92: xsa115-o-0005-tools-ocaml-xenstored-avoid-watch-events-for-nodes-w.patch +Patch93: xsa115-o-0006-tools-ocaml-xenstored-add-xenstored.conf-flag-to-tur.patch +Patch94: xsa322-4.14-c.patch +Patch95: xsa322-o.patch +Patch96: xsa323.patch +Patch97: xsa324.patch +Patch98: xsa325-4.14.patch +Patch99: xsa330.patch +Patch100: xsa348-1.patch +Patch101: xsa348-2.patch +Patch102: xsa348-3.patch +Patch103: xsa352.patch +Patch104: xsa353.patch +Patch105: xsa356.patch +Patch106: xen.git-8d148003fdf7bd9e28137e6683ef46902af39146.patch +Patch107: xen.git-1ad177370df2db9129c97c7305962fc5ad298728.patch +Patch108: xen.git-1cfb9b1c5b9e4c024f5f139d7a3d0357d2417b13.patch +Patch109: xen.git-72bd989f51878bc9ba61e930b0c29b921a30dc0d.patch +Patch110: xsa358.patch +Patch111: xsa359.patch %if %build_qemutrad @@ -375,6 +409,40 @@ manage Xen virtual machines. %patch75 -p1 %patch76 -p1 %patch77 -p1 +%patch78 -p1 +%patch79 -p1 +%patch80 -p1 +%patch81 -p1 +%patch82 -p1 +%patch83 -p1 +%patch84 -p1 +%patch85 -p1 +%patch86 -p1 +%patch87 -p1 +%patch88 -p1 +%patch89 -p1 +%patch90 -p1 +%patch91 -p1 +%patch92 -p1 +%patch93 -p1 +%patch94 -p1 +%patch95 -p1 +%patch96 -p1 +%patch97 -p1 +%patch98 -p1 +%patch99 -p1 +%patch100 -p1 +%patch101 -p1 +%patch102 -p1 +%patch103 -p1 +%patch104 -p1 +%patch105 -p1 +%patch106 -p1 +%patch107 -p1 +%patch108 -p1 +%patch109 -p1 +%patch110 -p1 +%patch111 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -968,6 +1036,31 @@ fi %endif %changelog +* Tue Dec 15 2020 Michael Young - 4.14.0-14 +- xenstore watch notifications lacking permission checks [XSA-115, + CVE-2020-29480] (#1908091) +- Xenstore: new domains inheriting existing node permissions [XSA-322, + CVE-2020-29481] (#1908095) +- Xenstore: wrong path length check [XSA-323, CVE-2020-29482] (#1908096) +- Xenstore: guests can crash xenstored via watchs [XSA-324, CVE-2020-29484] + (#1908088) +- Xenstore: guests can disturb domain cleanup [XSA-325, CVE-2020-29483] + (#1905648) +- oxenstored memory leak in reset_watches [XSA-330, CVE-2020-29485] + (#1908000) +- undue recursion in x86 HVM context switch code [XSA-348, CVE-2020-29566] + (#1908085) +- oxenstored: node ownership can be changed by unprivileged clients + [XSA-352, CVE-2020-29486] (#1908003) +- oxenstored: permissions not checked on root node [XSA-353, CVE-2020-29479] + (#1908003) +- infinite loop when cleaning up IRQ vectors [XSA-356, CVE-2020-29567] + (#1907932) +- FIFO event channels control block related ordering [XSA-358, + CVE-2020-29570] (#1907931) +- FIFO event channels control structure ordering [XSA-359, CVE-2020-29571] + (#1908089) + * Sat Dec 05 2020 Jeff Law - 4.14.0-13 - Work around another gcc-11 stringop-overflow diagnostic diff --git a/xsa115-4.14-c-0001-tools-xenstore-allow-removing-child-of-a-node-exceed.patch b/xsa115-4.14-c-0001-tools-xenstore-allow-removing-child-of-a-node-exceed.patch new file mode 100644 index 0000000..fb29db7 --- /dev/null +++ b/xsa115-4.14-c-0001-tools-xenstore-allow-removing-child-of-a-node-exceed.patch @@ -0,0 +1,157 @@ +From 71623492f7b1b6d63ed76e2bf970c113b88ffa0b Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 11 Jun 2020 16:12:37 +0200 +Subject: [PATCH 01/10] tools/xenstore: allow removing child of a node + exceeding quota + +An unprivileged user of Xenstore is not allowed to write nodes with a +size exceeding a global quota, while privileged users like dom0 are +allowed to write such nodes. The size of a node is the needed space +to store all node specific data, this includes the names of all +children of the node. + +When deleting a node its parent has to be modified by removing the +name of the to be deleted child from it. + +This results in the strange situation that an unprivileged owner of a +node might not succeed in deleting that node in case its parent is +exceeding the quota of that unprivileged user (it might have been +written by dom0), as the user is not allowed to write the updated +parent node. + +Fix that by not checking the quota when writing a node for the +purpose of removing a child's name only. + +The same applies to transaction handling: a node being read during a +transaction is written to the transaction specific area and it should +not be tested for exceeding the quota, as it might not be owned by +the reader and presumably the original write would have failed if the +node is owned by the reader. + +This is part of XSA-115. + +Signed-off-by: Juergen Gross +Reviewed-by: Julien Grall +Reviewed-by: Paul Durrant +--- + tools/xenstore/xenstored_core.c | 20 +++++++++++--------- + tools/xenstore/xenstored_core.h | 3 ++- + tools/xenstore/xenstored_transaction.c | 2 +- + 3 files changed, 14 insertions(+), 11 deletions(-) + +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index 7bd959f28b39..62a17a686edc 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -419,7 +419,8 @@ static struct node *read_node(struct connection *conn, const void *ctx, + return node; + } + +-int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node) ++int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node, ++ bool no_quota_check) + { + TDB_DATA data; + void *p; +@@ -429,7 +430,7 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node) + + node->num_perms*sizeof(node->perms[0]) + + node->datalen + node->childlen; + +- if (domain_is_unprivileged(conn) && ++ if (!no_quota_check && domain_is_unprivileged(conn) && + data.dsize >= quota_max_entry_size) { + errno = ENOSPC; + return errno; +@@ -457,14 +458,15 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node) + return 0; + } + +-static int write_node(struct connection *conn, struct node *node) ++static int write_node(struct connection *conn, struct node *node, ++ bool no_quota_check) + { + TDB_DATA key; + + if (access_node(conn, node, NODE_ACCESS_WRITE, &key)) + return errno; + +- return write_node_raw(conn, &key, node); ++ return write_node_raw(conn, &key, node, no_quota_check); + } + + static enum xs_perm_type perm_for_conn(struct connection *conn, +@@ -1001,7 +1003,7 @@ static struct node *create_node(struct connection *conn, const void *ctx, + /* We write out the nodes down, setting destructor in case + * something goes wrong. */ + for (i = node; i; i = i->parent) { +- if (write_node(conn, i)) { ++ if (write_node(conn, i, false)) { + domain_entry_dec(conn, i); + return NULL; + } +@@ -1041,7 +1043,7 @@ static int do_write(struct connection *conn, struct buffered_data *in) + } else { + node->data = in->buffer + offset; + node->datalen = datalen; +- if (write_node(conn, node)) ++ if (write_node(conn, node, false)) + return errno; + } + +@@ -1117,7 +1119,7 @@ static int remove_child_entry(struct connection *conn, struct node *node, + size_t childlen = strlen(node->children + offset); + memdel(node->children, offset, childlen + 1, node->childlen); + node->childlen -= childlen + 1; +- return write_node(conn, node); ++ return write_node(conn, node, true); + } + + +@@ -1256,7 +1258,7 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) + node->num_perms = num; + domain_entry_inc(conn, node); + +- if (write_node(conn, node)) ++ if (write_node(conn, node, false)) + return errno; + + fire_watches(conn, in, name, false); +@@ -1516,7 +1518,7 @@ static void manual_node(const char *name, const char *child) + if (child) + node->childlen = strlen(child) + 1; + +- if (write_node(NULL, node)) ++ if (write_node(NULL, node, false)) + barf_perror("Could not create initial node %s", name); + talloc_free(node); + } +diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h +index c4c32bc88f0c..29d638fbc5a0 100644 +--- a/tools/xenstore/xenstored_core.h ++++ b/tools/xenstore/xenstored_core.h +@@ -149,7 +149,8 @@ void send_ack(struct connection *conn, enum xsd_sockmsg_type type); + char *xenstore_canonicalize(struct connection *conn, const void *ctx, const char *node); + + /* Write a node to the tdb data base. */ +-int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node); ++int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node, ++ bool no_quota_check); + + /* Get this node, checking we have permissions. */ + struct node *get_node(struct connection *conn, +diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c +index 2824f7b359b8..e87897573469 100644 +--- a/tools/xenstore/xenstored_transaction.c ++++ b/tools/xenstore/xenstored_transaction.c +@@ -276,7 +276,7 @@ int access_node(struct connection *conn, struct node *node, + i->check_gen = true; + if (node->generation != NO_GENERATION) { + set_tdb_key(trans_name, &local_key); +- ret = write_node_raw(conn, &local_key, node); ++ ret = write_node_raw(conn, &local_key, node, true); + if (ret) + goto err; + i->ta_node = true; +-- +2.17.1 + diff --git a/xsa115-4.14-c-0002-tools-xenstore-ignore-transaction-id-for-un-watch.patch b/xsa115-4.14-c-0002-tools-xenstore-ignore-transaction-id-for-un-watch.patch new file mode 100644 index 0000000..42ccd5a --- /dev/null +++ b/xsa115-4.14-c-0002-tools-xenstore-ignore-transaction-id-for-un-watch.patch @@ -0,0 +1,86 @@ +From 072c729cfe90b4b09cacb12d912ba088db8274fe Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 11 Jun 2020 16:12:38 +0200 +Subject: [PATCH 02/10] tools/xenstore: ignore transaction id for [un]watch + +Instead of ignoring the transaction id for XS_WATCH and XS_UNWATCH +commands as it is documented in docs/misc/xenstore.txt, it is tested +for validity today. + +Really ignore the transaction id for XS_WATCH and XS_UNWATCH. + +This is part of XSA-115. + +Signed-off-by: Juergen Gross +Reviewed-by: Julien Grall +Reviewed-by: Paul Durrant +--- + tools/xenstore/xenstored_core.c | 26 ++++++++++++++++---------- + 1 file changed, 16 insertions(+), 10 deletions(-) + +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index 62a17a686edc..2f989524b497 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -1270,13 +1270,17 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) + static struct { + const char *str; + int (*func)(struct connection *conn, struct buffered_data *in); ++ unsigned int flags; ++#define XS_FLAG_NOTID (1U << 0) /* Ignore transaction id. */ + } const wire_funcs[XS_TYPE_COUNT] = { + [XS_CONTROL] = { "CONTROL", do_control }, + [XS_DIRECTORY] = { "DIRECTORY", send_directory }, + [XS_READ] = { "READ", do_read }, + [XS_GET_PERMS] = { "GET_PERMS", do_get_perms }, +- [XS_WATCH] = { "WATCH", do_watch }, +- [XS_UNWATCH] = { "UNWATCH", do_unwatch }, ++ [XS_WATCH] = ++ { "WATCH", do_watch, XS_FLAG_NOTID }, ++ [XS_UNWATCH] = ++ { "UNWATCH", do_unwatch, XS_FLAG_NOTID }, + [XS_TRANSACTION_START] = { "TRANSACTION_START", do_transaction_start }, + [XS_TRANSACTION_END] = { "TRANSACTION_END", do_transaction_end }, + [XS_INTRODUCE] = { "INTRODUCE", do_introduce }, +@@ -1298,7 +1302,7 @@ static struct { + + static const char *sockmsg_string(enum xsd_sockmsg_type type) + { +- if ((unsigned)type < XS_TYPE_COUNT && wire_funcs[type].str) ++ if ((unsigned int)type < ARRAY_SIZE(wire_funcs) && wire_funcs[type].str) + return wire_funcs[type].str; + + return "**UNKNOWN**"; +@@ -1313,7 +1317,14 @@ static void process_message(struct connection *conn, struct buffered_data *in) + enum xsd_sockmsg_type type = in->hdr.msg.type; + int ret; + +- trans = transaction_lookup(conn, in->hdr.msg.tx_id); ++ if ((unsigned int)type >= XS_TYPE_COUNT || !wire_funcs[type].func) { ++ eprintf("Client unknown operation %i", type); ++ send_error(conn, ENOSYS); ++ return; ++ } ++ ++ trans = (wire_funcs[type].flags & XS_FLAG_NOTID) ++ ? NULL : transaction_lookup(conn, in->hdr.msg.tx_id); + if (IS_ERR(trans)) { + send_error(conn, -PTR_ERR(trans)); + return; +@@ -1322,12 +1333,7 @@ static void process_message(struct connection *conn, struct buffered_data *in) + assert(conn->transaction == NULL); + conn->transaction = trans; + +- if ((unsigned)type < XS_TYPE_COUNT && wire_funcs[type].func) +- ret = wire_funcs[type].func(conn, in); +- else { +- eprintf("Client unknown operation %i", type); +- ret = ENOSYS; +- } ++ ret = wire_funcs[type].func(conn, in); + if (ret) + send_error(conn, ret); + +-- +2.17.1 + diff --git a/xsa115-4.14-c-0003-tools-xenstore-fix-node-accounting-after-failed-node.patch b/xsa115-4.14-c-0003-tools-xenstore-fix-node-accounting-after-failed-node.patch new file mode 100644 index 0000000..94c3f1f --- /dev/null +++ b/xsa115-4.14-c-0003-tools-xenstore-fix-node-accounting-after-failed-node.patch @@ -0,0 +1,104 @@ +From a133627453898759ca73dd5c1c185c3830fed754 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 11 Jun 2020 16:12:39 +0200 +Subject: [PATCH 03/10] tools/xenstore: fix node accounting after failed node + creation + +When a node creation fails the number of nodes of the domain should be +the same as before the failed node creation. In case of failure when +trying to create a node requiring to create one or more intermediate +nodes as well (e.g. when /a/b/c/d is to be created, but /a/b isn't +existing yet) it might happen that the number of nodes of the creating +domain is not reset to the value it had before. + +So move the quota accounting out of construct_node() and into the node +write loop in create_node() in order to be able to undo the accounting +in case of an error in the intermediate node destructor. + +This is part of XSA-115. + +Signed-off-by: Juergen Gross +Reviewed-by: Paul Durrant +Acked-by: Julien Grall +--- + tools/xenstore/xenstored_core.c | 37 ++++++++++++++++++++++----------- + 1 file changed, 25 insertions(+), 12 deletions(-) + +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index 2f989524b497..c971519e542a 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -927,11 +927,6 @@ static struct node *construct_node(struct connection *conn, const void *ctx, + if (!parent) + return NULL; + +- if (domain_entry(conn) >= quota_nb_entry_per_domain) { +- errno = ENOSPC; +- return NULL; +- } +- + /* Add child to parent. */ + base = basename(name); + baselen = strlen(base) + 1; +@@ -964,7 +959,6 @@ static struct node *construct_node(struct connection *conn, const void *ctx, + node->children = node->data = NULL; + node->childlen = node->datalen = 0; + node->parent = parent; +- domain_entry_inc(conn, node); + return node; + + nomem: +@@ -984,6 +978,9 @@ static int destroy_node(void *_node) + key.dsize = strlen(node->name); + + tdb_delete(tdb_ctx, key); ++ ++ domain_entry_dec(talloc_parent(node), node); ++ + return 0; + } + +@@ -1000,18 +997,34 @@ static struct node *create_node(struct connection *conn, const void *ctx, + node->data = data; + node->datalen = datalen; + +- /* We write out the nodes down, setting destructor in case +- * something goes wrong. */ ++ /* ++ * We write out the nodes bottom up. ++ * All new created nodes will have i->parent set, while the final ++ * node will be already existing and won't have i->parent set. ++ * New nodes are subject to quota handling. ++ * Initially set a destructor for all new nodes removing them from ++ * TDB again and undoing quota accounting for the case of an error ++ * during the write loop. ++ */ + for (i = node; i; i = i->parent) { +- if (write_node(conn, i, false)) { +- domain_entry_dec(conn, i); ++ /* i->parent is set for each new node, so check quota. */ ++ if (i->parent && ++ domain_entry(conn) >= quota_nb_entry_per_domain) { ++ errno = ENOSPC; + return NULL; + } +- talloc_set_destructor(i, destroy_node); ++ if (write_node(conn, i, false)) ++ return NULL; ++ ++ /* Account for new node, set destructor for error case. */ ++ if (i->parent) { ++ domain_entry_inc(conn, i); ++ talloc_set_destructor(i, destroy_node); ++ } + } + + /* OK, now remove destructors so they stay around */ +- for (i = node; i; i = i->parent) ++ for (i = node; i->parent; i = i->parent) + talloc_set_destructor(i, NULL); + return node; + } +-- +2.17.1 + diff --git a/xsa115-4.14-c-0004-tools-xenstore-simplify-and-rename-check_event_node.patch b/xsa115-4.14-c-0004-tools-xenstore-simplify-and-rename-check_event_node.patch new file mode 100644 index 0000000..5a7d705 --- /dev/null +++ b/xsa115-4.14-c-0004-tools-xenstore-simplify-and-rename-check_event_node.patch @@ -0,0 +1,55 @@ +From dc6cf381bdeca4013b6bfe25c27e57f010e7ca84 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 11 Jun 2020 16:12:40 +0200 +Subject: [PATCH 04/10] tools/xenstore: simplify and rename check_event_node() + +There is no path which allows to call check_event_node() without a +event name. So don't let the result depend on the name being NULL and +add an assert() covering that case. + +Rename the function to check_special_event() to better match the +semantics. + +This is part of XSA-115. + +Signed-off-by: Juergen Gross +Reviewed-by: Julien Grall +Reviewed-by: Paul Durrant +--- + tools/xenstore/xenstored_watch.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c +index 7dedca60dfd6..f2f1bed47cc6 100644 +--- a/tools/xenstore/xenstored_watch.c ++++ b/tools/xenstore/xenstored_watch.c +@@ -47,13 +47,11 @@ struct watch + char *node; + }; + +-static bool check_event_node(const char *node) ++static bool check_special_event(const char *name) + { +- if (!node || !strstarts(node, "@")) { +- errno = EINVAL; +- return false; +- } +- return true; ++ assert(name); ++ ++ return strstarts(name, "@"); + } + + /* Is child a subnode of parent, or equal? */ +@@ -87,7 +85,7 @@ static void add_event(struct connection *conn, + unsigned int len; + char *data; + +- if (!check_event_node(name)) { ++ if (!check_special_event(name)) { + /* Can this conn load node, or see that it doesn't exist? */ + struct node *node = get_node(conn, ctx, name, XS_PERM_READ); + /* +-- +2.17.1 + diff --git a/xsa115-4.14-c-0005-tools-xenstore-check-privilege-for-XS_IS_DOMAIN_INTR.patch b/xsa115-4.14-c-0005-tools-xenstore-check-privilege-for-XS_IS_DOMAIN_INTR.patch new file mode 100644 index 0000000..8025401 --- /dev/null +++ b/xsa115-4.14-c-0005-tools-xenstore-check-privilege-for-XS_IS_DOMAIN_INTR.patch @@ -0,0 +1,115 @@ +From cd456dd7e3c4bbe229a0307a469c2fc3b8e7b590 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 11 Jun 2020 16:12:41 +0200 +Subject: [PATCH 05/10] tools/xenstore: check privilege for + XS_IS_DOMAIN_INTRODUCED + +The Xenstore command XS_IS_DOMAIN_INTRODUCED should be possible for +privileged domains only (the only user in the tree is the xenpaging +daemon). + +Instead of having the privilege test for each command introduce a +per-command flag for that purpose. + +This is part of XSA-115. + +Signed-off-by: Juergen Gross +Reviewed-by: Julien Grall +Reviewed-by: Paul Durrant +--- + tools/xenstore/xenstored_core.c | 24 ++++++++++++++++++------ + tools/xenstore/xenstored_domain.c | 7 ++----- + 2 files changed, 20 insertions(+), 11 deletions(-) + +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index c971519e542a..f38196ae2825 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -1285,8 +1285,10 @@ static struct { + int (*func)(struct connection *conn, struct buffered_data *in); + unsigned int flags; + #define XS_FLAG_NOTID (1U << 0) /* Ignore transaction id. */ ++#define XS_FLAG_PRIV (1U << 1) /* Privileged domain only. */ + } const wire_funcs[XS_TYPE_COUNT] = { +- [XS_CONTROL] = { "CONTROL", do_control }, ++ [XS_CONTROL] = ++ { "CONTROL", do_control, XS_FLAG_PRIV }, + [XS_DIRECTORY] = { "DIRECTORY", send_directory }, + [XS_READ] = { "READ", do_read }, + [XS_GET_PERMS] = { "GET_PERMS", do_get_perms }, +@@ -1296,8 +1298,10 @@ static struct { + { "UNWATCH", do_unwatch, XS_FLAG_NOTID }, + [XS_TRANSACTION_START] = { "TRANSACTION_START", do_transaction_start }, + [XS_TRANSACTION_END] = { "TRANSACTION_END", do_transaction_end }, +- [XS_INTRODUCE] = { "INTRODUCE", do_introduce }, +- [XS_RELEASE] = { "RELEASE", do_release }, ++ [XS_INTRODUCE] = ++ { "INTRODUCE", do_introduce, XS_FLAG_PRIV }, ++ [XS_RELEASE] = ++ { "RELEASE", do_release, XS_FLAG_PRIV }, + [XS_GET_DOMAIN_PATH] = { "GET_DOMAIN_PATH", do_get_domain_path }, + [XS_WRITE] = { "WRITE", do_write }, + [XS_MKDIR] = { "MKDIR", do_mkdir }, +@@ -1306,9 +1310,11 @@ static struct { + [XS_WATCH_EVENT] = { "WATCH_EVENT", NULL }, + [XS_ERROR] = { "ERROR", NULL }, + [XS_IS_DOMAIN_INTRODUCED] = +- { "IS_DOMAIN_INTRODUCED", do_is_domain_introduced }, +- [XS_RESUME] = { "RESUME", do_resume }, +- [XS_SET_TARGET] = { "SET_TARGET", do_set_target }, ++ { "IS_DOMAIN_INTRODUCED", do_is_domain_introduced, XS_FLAG_PRIV }, ++ [XS_RESUME] = ++ { "RESUME", do_resume, XS_FLAG_PRIV }, ++ [XS_SET_TARGET] = ++ { "SET_TARGET", do_set_target, XS_FLAG_PRIV }, + [XS_RESET_WATCHES] = { "RESET_WATCHES", do_reset_watches }, + [XS_DIRECTORY_PART] = { "DIRECTORY_PART", send_directory_part }, + }; +@@ -1336,6 +1342,12 @@ static void process_message(struct connection *conn, struct buffered_data *in) + return; + } + ++ if ((wire_funcs[type].flags & XS_FLAG_PRIV) && ++ domain_is_unprivileged(conn)) { ++ send_error(conn, EACCES); ++ return; ++ } ++ + trans = (wire_funcs[type].flags & XS_FLAG_NOTID) + ? NULL : transaction_lookup(conn, in->hdr.msg.tx_id); + if (IS_ERR(trans)) { +diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c +index 06359503f091..2d0d87ee89e1 100644 +--- a/tools/xenstore/xenstored_domain.c ++++ b/tools/xenstore/xenstored_domain.c +@@ -372,7 +372,7 @@ int do_introduce(struct connection *conn, struct buffered_data *in) + if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) + return EINVAL; + +- if (domain_is_unprivileged(conn) || !conn->can_write) ++ if (!conn->can_write) + return EACCES; + + domid = atoi(vec[0]); +@@ -438,7 +438,7 @@ int do_set_target(struct connection *conn, struct buffered_data *in) + if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) + return EINVAL; + +- if (domain_is_unprivileged(conn) || !conn->can_write) ++ if (!conn->can_write) + return EACCES; + + domid = atoi(vec[0]); +@@ -473,9 +473,6 @@ static struct domain *onearg_domain(struct connection *conn, + if (!domid) + return ERR_PTR(-EINVAL); + +- if (domain_is_unprivileged(conn)) +- return ERR_PTR(-EACCES); +- + return find_connected_domain(domid); + } + +-- +2.17.1 + diff --git a/xsa115-4.14-c-0006-tools-xenstore-rework-node-removal.patch b/xsa115-4.14-c-0006-tools-xenstore-rework-node-removal.patch new file mode 100644 index 0000000..f2357b8 --- /dev/null +++ b/xsa115-4.14-c-0006-tools-xenstore-rework-node-removal.patch @@ -0,0 +1,217 @@ +From a3d8089532ae573c03e1cdb2fc3c5ee5ebb52a60 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 11 Jun 2020 16:12:42 +0200 +Subject: [PATCH 06/10] tools/xenstore: rework node removal + +Today a Xenstore node is being removed by deleting it from the parent +first and then deleting itself and all its children. This results in +stale entries remaining in the data base in case e.g. a memory +allocation is failing during processing. This would result in the +rather strange behavior to be able to read a node (as its still in the +data base) while not being visible in the tree view of Xenstore. + +Fix that by deleting the nodes from the leaf side instead of starting +at the root. + +As fire_watches() is now called from _rm() the ctx parameter needs a +const attribute. + +This is part of XSA-115. + +Signed-off-by: Juergen Gross +Reviewed-by: Julien Grall +Reviewed-by: Paul Durrant +--- + tools/xenstore/xenstored_core.c | 99 ++++++++++++++++---------------- + tools/xenstore/xenstored_watch.c | 4 +- + tools/xenstore/xenstored_watch.h | 2 +- + 3 files changed, 54 insertions(+), 51 deletions(-) + +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index f38196ae2825..dfdb64f3ee60 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -1089,74 +1089,76 @@ static int do_mkdir(struct connection *conn, struct buffered_data *in) + return 0; + } + +-static void delete_node(struct connection *conn, struct node *node) +-{ +- unsigned int i; +- char *name; +- +- /* Delete self, then delete children. If we crash, then the worst +- that can happen is the children will continue to take up space, but +- will otherwise be unreachable. */ +- delete_node_single(conn, node); +- +- /* Delete children, too. */ +- for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) { +- struct node *child; +- +- name = talloc_asprintf(node, "%s/%s", node->name, +- node->children + i); +- child = name ? read_node(conn, node, name) : NULL; +- if (child) { +- delete_node(conn, child); +- } +- else { +- trace("delete_node: Error deleting child '%s/%s'!\n", +- node->name, node->children + i); +- /* Skip it, we've already deleted the parent. */ +- } +- talloc_free(name); +- } +-} +- +- + /* Delete memory using memmove. */ + static void memdel(void *mem, unsigned off, unsigned len, unsigned total) + { + memmove(mem + off, mem + off + len, total - off - len); + } + +- +-static int remove_child_entry(struct connection *conn, struct node *node, +- size_t offset) ++static void remove_child_entry(struct connection *conn, struct node *node, ++ size_t offset) + { + size_t childlen = strlen(node->children + offset); ++ + memdel(node->children, offset, childlen + 1, node->childlen); + node->childlen -= childlen + 1; +- return write_node(conn, node, true); ++ if (write_node(conn, node, true)) ++ corrupt(conn, "Can't update parent node '%s'", node->name); + } + +- +-static int delete_child(struct connection *conn, +- struct node *node, const char *childname) ++static void delete_child(struct connection *conn, ++ struct node *node, const char *childname) + { + unsigned int i; + + for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) { + if (streq(node->children+i, childname)) { +- return remove_child_entry(conn, node, i); ++ remove_child_entry(conn, node, i); ++ return; + } + } + corrupt(conn, "Can't find child '%s' in %s", childname, node->name); +- return ENOENT; + } + ++static int delete_node(struct connection *conn, struct node *parent, ++ struct node *node) ++{ ++ char *name; ++ ++ /* Delete children. */ ++ while (node->childlen) { ++ struct node *child; ++ ++ name = talloc_asprintf(node, "%s/%s", node->name, ++ node->children); ++ child = name ? read_node(conn, node, name) : NULL; ++ if (child) { ++ if (delete_node(conn, node, child)) ++ return errno; ++ } else { ++ trace("delete_node: Error deleting child '%s/%s'!\n", ++ node->name, node->children); ++ /* Quit deleting. */ ++ errno = ENOMEM; ++ return errno; ++ } ++ talloc_free(name); ++ } ++ ++ delete_node_single(conn, node); ++ delete_child(conn, parent, basename(node->name)); ++ talloc_free(node); ++ ++ return 0; ++} + + static int _rm(struct connection *conn, const void *ctx, struct node *node, + const char *name) + { +- /* Delete from parent first, then if we crash, the worst that can +- happen is the child will continue to take up space, but will +- otherwise be unreachable. */ ++ /* ++ * Deleting node by node, so the result is always consistent even in ++ * case of a failure. ++ */ + struct node *parent; + char *parentname = get_parent(ctx, name); + +@@ -1167,11 +1169,13 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node, + if (!parent) + return (errno == ENOMEM) ? ENOMEM : EINVAL; + +- if (delete_child(conn, parent, basename(name))) +- return EINVAL; +- +- delete_node(conn, node); +- return 0; ++ /* ++ * Fire the watches now, when we can still see the node permissions. ++ * This fine as we are single threaded and the next possible read will ++ * be handled only after the node has been really removed. ++ */ ++ fire_watches(conn, ctx, name, true); ++ return delete_node(conn, parent, node); + } + + +@@ -1209,7 +1213,6 @@ static int do_rm(struct connection *conn, struct buffered_data *in) + if (ret) + return ret; + +- fire_watches(conn, in, name, true); + send_ack(conn, XS_RM); + + return 0; +diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c +index f2f1bed47cc6..f0bbfe7a6dc6 100644 +--- a/tools/xenstore/xenstored_watch.c ++++ b/tools/xenstore/xenstored_watch.c +@@ -77,7 +77,7 @@ static bool is_child(const char *child, const char *parent) + * Temporary memory allocations are done with ctx. + */ + static void add_event(struct connection *conn, +- void *ctx, ++ const void *ctx, + struct watch *watch, + const char *name) + { +@@ -121,7 +121,7 @@ static void add_event(struct connection *conn, + * Check whether any watch events are to be sent. + * Temporary memory allocations are done with ctx. + */ +-void fire_watches(struct connection *conn, void *ctx, const char *name, ++void fire_watches(struct connection *conn, const void *ctx, const char *name, + bool recurse) + { + struct connection *i; +diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h +index c72ea6a68542..54d4ea7e0d41 100644 +--- a/tools/xenstore/xenstored_watch.h ++++ b/tools/xenstore/xenstored_watch.h +@@ -25,7 +25,7 @@ int do_watch(struct connection *conn, struct buffered_data *in); + int do_unwatch(struct connection *conn, struct buffered_data *in); + + /* Fire all watches: recurse means all the children are affected (ie. rm). */ +-void fire_watches(struct connection *conn, void *tmp, const char *name, ++void fire_watches(struct connection *conn, const void *tmp, const char *name, + bool recurse); + + void conn_delete_all_watches(struct connection *conn); +-- +2.17.1 + diff --git a/xsa115-4.14-c-0007-tools-xenstore-fire-watches-only-when-removing-a-spe.patch b/xsa115-4.14-c-0007-tools-xenstore-fire-watches-only-when-removing-a-spe.patch new file mode 100644 index 0000000..008ce01 --- /dev/null +++ b/xsa115-4.14-c-0007-tools-xenstore-fire-watches-only-when-removing-a-spe.patch @@ -0,0 +1,118 @@ +From 3d4e3fd6c78795bf426947fbfbfa9af6568ece9f Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 11 Jun 2020 16:12:43 +0200 +Subject: [PATCH 07/10] tools/xenstore: fire watches only when removing a + specific node + +Instead of firing all watches for removing a subtree in one go, do so +only when the related node is being removed. + +The watches for the top-most node being removed include all watches +including that node, while watches for nodes below that are only fired +if they are matching exactly. This avoids firing any watch more than +once when removing a subtree. + +This is part of XSA-115. + +Signed-off-by: Juergen Gross +Reviewed-by: Julien Grall +Reviewed-by: Paul Durrant +--- + tools/xenstore/xenstored_core.c | 11 ++++++----- + tools/xenstore/xenstored_watch.c | 13 ++++++++----- + tools/xenstore/xenstored_watch.h | 4 ++-- + 3 files changed, 16 insertions(+), 12 deletions(-) + +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index dfdb64f3ee60..20a7a3581555 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -1120,8 +1120,8 @@ static void delete_child(struct connection *conn, + corrupt(conn, "Can't find child '%s' in %s", childname, node->name); + } + +-static int delete_node(struct connection *conn, struct node *parent, +- struct node *node) ++static int delete_node(struct connection *conn, const void *ctx, ++ struct node *parent, struct node *node) + { + char *name; + +@@ -1133,7 +1133,7 @@ static int delete_node(struct connection *conn, struct node *parent, + node->children); + child = name ? read_node(conn, node, name) : NULL; + if (child) { +- if (delete_node(conn, node, child)) ++ if (delete_node(conn, ctx, node, child)) + return errno; + } else { + trace("delete_node: Error deleting child '%s/%s'!\n", +@@ -1145,6 +1145,7 @@ static int delete_node(struct connection *conn, struct node *parent, + talloc_free(name); + } + ++ fire_watches(conn, ctx, node->name, true); + delete_node_single(conn, node); + delete_child(conn, parent, basename(node->name)); + talloc_free(node); +@@ -1174,8 +1175,8 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node, + * This fine as we are single threaded and the next possible read will + * be handled only after the node has been really removed. + */ +- fire_watches(conn, ctx, name, true); +- return delete_node(conn, parent, node); ++ fire_watches(conn, ctx, name, false); ++ return delete_node(conn, ctx, parent, node); + } + + +diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c +index f0bbfe7a6dc6..3836675459fa 100644 +--- a/tools/xenstore/xenstored_watch.c ++++ b/tools/xenstore/xenstored_watch.c +@@ -122,7 +122,7 @@ static void add_event(struct connection *conn, + * Temporary memory allocations are done with ctx. + */ + void fire_watches(struct connection *conn, const void *ctx, const char *name, +- bool recurse) ++ bool exact) + { + struct connection *i; + struct watch *watch; +@@ -134,10 +134,13 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name, + /* Create an event for each watch. */ + list_for_each_entry(i, &connections, list) { + list_for_each_entry(watch, &i->watches, list) { +- if (is_child(name, watch->node)) +- add_event(i, ctx, watch, name); +- else if (recurse && is_child(watch->node, name)) +- add_event(i, ctx, watch, watch->node); ++ if (exact) { ++ if (streq(name, watch->node)) ++ add_event(i, ctx, watch, name); ++ } else { ++ if (is_child(name, watch->node)) ++ add_event(i, ctx, watch, name); ++ } + } + } + } +diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h +index 54d4ea7e0d41..1b3c80d3dda1 100644 +--- a/tools/xenstore/xenstored_watch.h ++++ b/tools/xenstore/xenstored_watch.h +@@ -24,9 +24,9 @@ + int do_watch(struct connection *conn, struct buffered_data *in); + int do_unwatch(struct connection *conn, struct buffered_data *in); + +-/* Fire all watches: recurse means all the children are affected (ie. rm). */ ++/* Fire all watches: !exact means all the children are affected (ie. rm). */ + void fire_watches(struct connection *conn, const void *tmp, const char *name, +- bool recurse); ++ bool exact); + + void conn_delete_all_watches(struct connection *conn); + +-- +2.17.1 + diff --git a/xsa115-4.14-c-0008-tools-xenstore-introduce-node_perms-structure.patch b/xsa115-4.14-c-0008-tools-xenstore-introduce-node_perms-structure.patch new file mode 100644 index 0000000..c295e8c --- /dev/null +++ b/xsa115-4.14-c-0008-tools-xenstore-introduce-node_perms-structure.patch @@ -0,0 +1,289 @@ +From 1069c600f85ff583c461cfbfee1afb1a0731796e Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 11 Jun 2020 16:12:44 +0200 +Subject: [PATCH 08/10] tools/xenstore: introduce node_perms structure + +There are several places in xenstored using a permission array and the +size of that array. Introduce a new struct node_perms containing both. + +This is part of XSA-115. + +Signed-off-by: Juergen Gross +Acked-by: Julien Grall +Reviewed-by: Paul Durrant +--- + tools/xenstore/xenstored_core.c | 79 +++++++++++++++---------------- + tools/xenstore/xenstored_core.h | 8 +++- + tools/xenstore/xenstored_domain.c | 12 ++--- + 3 files changed, 50 insertions(+), 49 deletions(-) + +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index 20a7a3581555..79d305fbbe58 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -403,14 +403,14 @@ static struct node *read_node(struct connection *conn, const void *ctx, + /* Datalen, childlen, number of permissions */ + hdr = (void *)data.dptr; + node->generation = hdr->generation; +- node->num_perms = hdr->num_perms; ++ node->perms.num = hdr->num_perms; + node->datalen = hdr->datalen; + node->childlen = hdr->childlen; + + /* Permissions are struct xs_permissions. */ +- node->perms = hdr->perms; ++ node->perms.p = hdr->perms; + /* Data is binary blob (usually ascii, no nul). */ +- node->data = node->perms + node->num_perms; ++ node->data = node->perms.p + node->perms.num; + /* Children is strings, nul separated. */ + node->children = node->data + node->datalen; + +@@ -427,7 +427,7 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node, + struct xs_tdb_record_hdr *hdr; + + data.dsize = sizeof(*hdr) +- + node->num_perms*sizeof(node->perms[0]) ++ + node->perms.num * sizeof(node->perms.p[0]) + + node->datalen + node->childlen; + + if (!no_quota_check && domain_is_unprivileged(conn) && +@@ -439,12 +439,13 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node, + data.dptr = talloc_size(node, data.dsize); + hdr = (void *)data.dptr; + hdr->generation = node->generation; +- hdr->num_perms = node->num_perms; ++ hdr->num_perms = node->perms.num; + hdr->datalen = node->datalen; + hdr->childlen = node->childlen; + +- memcpy(hdr->perms, node->perms, node->num_perms*sizeof(node->perms[0])); +- p = hdr->perms + node->num_perms; ++ memcpy(hdr->perms, node->perms.p, ++ node->perms.num * sizeof(*node->perms.p)); ++ p = hdr->perms + node->perms.num; + memcpy(p, node->data, node->datalen); + p += node->datalen; + memcpy(p, node->children, node->childlen); +@@ -470,8 +471,7 @@ static int write_node(struct connection *conn, struct node *node, + } + + static enum xs_perm_type perm_for_conn(struct connection *conn, +- struct xs_permissions *perms, +- unsigned int num) ++ const struct node_perms *perms) + { + unsigned int i; + enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER; +@@ -480,16 +480,16 @@ static enum xs_perm_type perm_for_conn(struct connection *conn, + mask &= ~XS_PERM_WRITE; + + /* Owners and tools get it all... */ +- if (!domain_is_unprivileged(conn) || perms[0].id == conn->id +- || (conn->target && perms[0].id == conn->target->id)) ++ if (!domain_is_unprivileged(conn) || perms->p[0].id == conn->id ++ || (conn->target && perms->p[0].id == conn->target->id)) + return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask; + +- for (i = 1; i < num; i++) +- if (perms[i].id == conn->id +- || (conn->target && perms[i].id == conn->target->id)) +- return perms[i].perms & mask; ++ for (i = 1; i < perms->num; i++) ++ if (perms->p[i].id == conn->id ++ || (conn->target && perms->p[i].id == conn->target->id)) ++ return perms->p[i].perms & mask; + +- return perms[0].perms & mask; ++ return perms->p[0].perms & mask; + } + + /* +@@ -536,7 +536,7 @@ static int ask_parents(struct connection *conn, const void *ctx, + return 0; + } + +- *perm = perm_for_conn(conn, node->perms, node->num_perms); ++ *perm = perm_for_conn(conn, &node->perms); + return 0; + } + +@@ -582,8 +582,7 @@ struct node *get_node(struct connection *conn, + node = read_node(conn, ctx, name); + /* If we don't have permission, we don't have node. */ + if (node) { +- if ((perm_for_conn(conn, node->perms, node->num_perms) & perm) +- != perm) { ++ if ((perm_for_conn(conn, &node->perms) & perm) != perm) { + errno = EACCES; + node = NULL; + } +@@ -759,16 +758,15 @@ const char *onearg(struct buffered_data *in) + return in->buffer; + } + +-static char *perms_to_strings(const void *ctx, +- struct xs_permissions *perms, unsigned int num, ++static char *perms_to_strings(const void *ctx, const struct node_perms *perms, + unsigned int *len) + { + unsigned int i; + char *strings = NULL; + char buffer[MAX_STRLEN(unsigned int) + 1]; + +- for (*len = 0, i = 0; i < num; i++) { +- if (!xs_perm_to_string(&perms[i], buffer, sizeof(buffer))) ++ for (*len = 0, i = 0; i < perms->num; i++) { ++ if (!xs_perm_to_string(&perms->p[i], buffer, sizeof(buffer))) + return NULL; + + strings = talloc_realloc(ctx, strings, char, +@@ -947,13 +945,13 @@ static struct node *construct_node(struct connection *conn, const void *ctx, + goto nomem; + + /* Inherit permissions, except unprivileged domains own what they create */ +- node->num_perms = parent->num_perms; +- node->perms = talloc_memdup(node, parent->perms, +- node->num_perms * sizeof(node->perms[0])); +- if (!node->perms) ++ node->perms.num = parent->perms.num; ++ node->perms.p = talloc_memdup(node, parent->perms.p, ++ node->perms.num * sizeof(*node->perms.p)); ++ if (!node->perms.p) + goto nomem; + if (domain_is_unprivileged(conn)) +- node->perms[0].id = conn->id; ++ node->perms.p[0].id = conn->id; + + /* No children, no data */ + node->children = node->data = NULL; +@@ -1230,7 +1228,7 @@ static int do_get_perms(struct connection *conn, struct buffered_data *in) + if (!node) + return errno; + +- strings = perms_to_strings(node, node->perms, node->num_perms, &len); ++ strings = perms_to_strings(node, &node->perms, &len); + if (!strings) + return errno; + +@@ -1241,13 +1239,12 @@ static int do_get_perms(struct connection *conn, struct buffered_data *in) + + static int do_set_perms(struct connection *conn, struct buffered_data *in) + { +- unsigned int num; +- struct xs_permissions *perms; ++ struct node_perms perms; + char *name, *permstr; + struct node *node; + +- num = xs_count_strings(in->buffer, in->used); +- if (num < 2) ++ perms.num = xs_count_strings(in->buffer, in->used); ++ if (perms.num < 2) + return EINVAL; + + /* First arg is node name. */ +@@ -1258,21 +1255,21 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) + return errno; + + permstr = in->buffer + strlen(in->buffer) + 1; +- num--; ++ perms.num--; + +- perms = talloc_array(node, struct xs_permissions, num); +- if (!perms) ++ perms.p = talloc_array(node, struct xs_permissions, perms.num); ++ if (!perms.p) + return ENOMEM; +- if (!xs_strings_to_perms(perms, num, permstr)) ++ if (!xs_strings_to_perms(perms.p, perms.num, permstr)) + return errno; + + /* Unprivileged domains may not change the owner. */ +- if (domain_is_unprivileged(conn) && perms[0].id != node->perms[0].id) ++ if (domain_is_unprivileged(conn) && ++ perms.p[0].id != node->perms.p[0].id) + return EPERM; + + domain_entry_dec(conn, node); + node->perms = perms; +- node->num_perms = num; + domain_entry_inc(conn, node); + + if (write_node(conn, node, false)) +@@ -1547,8 +1544,8 @@ static void manual_node(const char *name, const char *child) + barf_perror("Could not allocate initial node %s", name); + + node->name = name; +- node->perms = &perms; +- node->num_perms = 1; ++ node->perms.p = &perms; ++ node->perms.num = 1; + node->children = (char *)child; + if (child) + node->childlen = strlen(child) + 1; +diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h +index 29d638fbc5a0..47ba0916dbe2 100644 +--- a/tools/xenstore/xenstored_core.h ++++ b/tools/xenstore/xenstored_core.h +@@ -109,6 +109,11 @@ struct connection + }; + extern struct list_head connections; + ++struct node_perms { ++ unsigned int num; ++ struct xs_permissions *p; ++}; ++ + struct node { + const char *name; + +@@ -120,8 +125,7 @@ struct node { + #define NO_GENERATION ~((uint64_t)0) + + /* Permissions. */ +- unsigned int num_perms; +- struct xs_permissions *perms; ++ struct node_perms perms; + + /* Contents. */ + unsigned int datalen; +diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c +index 2d0d87ee89e1..aa9942fcc267 100644 +--- a/tools/xenstore/xenstored_domain.c ++++ b/tools/xenstore/xenstored_domain.c +@@ -650,12 +650,12 @@ void domain_entry_inc(struct connection *conn, struct node *node) + if (!conn) + return; + +- if (node->perms && node->perms[0].id != conn->id) { ++ if (node->perms.p && node->perms.p[0].id != conn->id) { + if (conn->transaction) { + transaction_entry_inc(conn->transaction, +- node->perms[0].id); ++ node->perms.p[0].id); + } else { +- d = find_domain_by_domid(node->perms[0].id); ++ d = find_domain_by_domid(node->perms.p[0].id); + if (d) + d->nbentry++; + } +@@ -676,12 +676,12 @@ void domain_entry_dec(struct connection *conn, struct node *node) + if (!conn) + return; + +- if (node->perms && node->perms[0].id != conn->id) { ++ if (node->perms.p && node->perms.p[0].id != conn->id) { + if (conn->transaction) { + transaction_entry_dec(conn->transaction, +- node->perms[0].id); ++ node->perms.p[0].id); + } else { +- d = find_domain_by_domid(node->perms[0].id); ++ d = find_domain_by_domid(node->perms.p[0].id); + if (d && d->nbentry) + d->nbentry--; + } +-- +2.17.1 + diff --git a/xsa115-4.14-c-0009-tools-xenstore-allow-special-watches-for-privileged-.patch b/xsa115-4.14-c-0009-tools-xenstore-allow-special-watches-for-privileged-.patch new file mode 100644 index 0000000..e1e8942 --- /dev/null +++ b/xsa115-4.14-c-0009-tools-xenstore-allow-special-watches-for-privileged-.patch @@ -0,0 +1,237 @@ +From b9fff4b7ad6b41db860a43d35c401847fef789cb Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 11 Jun 2020 16:12:45 +0200 +Subject: [PATCH 09/10] tools/xenstore: allow special watches for privileged + callers only + +The special watches "@introduceDomain" and "@releaseDomain" should be +allowed for privileged callers only, as they allow to gain information +about presence of other guests on the host. So send watch events for +those watches via privileged connections only. + +In order to allow for disaggregated setups where e.g. driver domains +need to make use of those special watches add support for calling +"set permissions" for those special nodes, too. + +This is part of XSA-115. + +Signed-off-by: Juergen Gross +Reviewed-by: Julien Grall +Reviewed-by: Paul Durrant +--- + docs/misc/xenstore.txt | 5 +++ + tools/xenstore/xenstored_core.c | 27 ++++++++------ + tools/xenstore/xenstored_core.h | 2 ++ + tools/xenstore/xenstored_domain.c | 60 +++++++++++++++++++++++++++++++ + tools/xenstore/xenstored_domain.h | 5 +++ + tools/xenstore/xenstored_watch.c | 4 +++ + 6 files changed, 93 insertions(+), 10 deletions(-) + +diff --git a/docs/misc/xenstore.txt b/docs/misc/xenstore.txt +index cb8009cb686d..2081f20f55e4 100644 +--- a/docs/misc/xenstore.txt ++++ b/docs/misc/xenstore.txt +@@ -170,6 +170,9 @@ SET_PERMS ||+? + n no access + See https://wiki.xen.org/wiki/XenBus section + `Permissions' for details of the permissions system. ++ It is possible to set permissions for the special watch paths ++ "@introduceDomain" and "@releaseDomain" to enable receiving those ++ watches in unprivileged domains. + + ---------- Watches ---------- + +@@ -194,6 +197,8 @@ WATCH ||? + @releaseDomain occurs on any domain crash or + shutdown, and also on RELEASE + and domain destruction ++ events are sent to privileged callers or explicitly ++ via SET_PERMS enabled domains only. + + When a watch is first set up it is triggered once straight + away, with equal to . Watches may be triggered +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index 79d305fbbe58..15ffbeb30f19 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -470,8 +470,8 @@ static int write_node(struct connection *conn, struct node *node, + return write_node_raw(conn, &key, node, no_quota_check); + } + +-static enum xs_perm_type perm_for_conn(struct connection *conn, +- const struct node_perms *perms) ++enum xs_perm_type perm_for_conn(struct connection *conn, ++ const struct node_perms *perms) + { + unsigned int i; + enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER; +@@ -1247,22 +1247,29 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) + if (perms.num < 2) + return EINVAL; + +- /* First arg is node name. */ +- /* We must own node to do this (tools can do this too). */ +- node = get_node_canonicalized(conn, in, in->buffer, &name, +- XS_PERM_WRITE | XS_PERM_OWNER); +- if (!node) +- return errno; +- + permstr = in->buffer + strlen(in->buffer) + 1; + perms.num--; + +- perms.p = talloc_array(node, struct xs_permissions, perms.num); ++ perms.p = talloc_array(in, struct xs_permissions, perms.num); + if (!perms.p) + return ENOMEM; + if (!xs_strings_to_perms(perms.p, perms.num, permstr)) + return errno; + ++ /* First arg is node name. */ ++ if (strstarts(in->buffer, "@")) { ++ if (set_perms_special(conn, in->buffer, &perms)) ++ return errno; ++ send_ack(conn, XS_SET_PERMS); ++ return 0; ++ } ++ ++ /* We must own node to do this (tools can do this too). */ ++ node = get_node_canonicalized(conn, in, in->buffer, &name, ++ XS_PERM_WRITE | XS_PERM_OWNER); ++ if (!node) ++ return errno; ++ + /* Unprivileged domains may not change the owner. */ + if (domain_is_unprivileged(conn) && + perms.p[0].id != node->perms.p[0].id) +diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h +index 47ba0916dbe2..53f1050859fc 100644 +--- a/tools/xenstore/xenstored_core.h ++++ b/tools/xenstore/xenstored_core.h +@@ -165,6 +165,8 @@ struct node *get_node(struct connection *conn, + struct connection *new_connection(connwritefn_t *write, connreadfn_t *read); + void check_store(void); + void corrupt(struct connection *conn, const char *fmt, ...); ++enum xs_perm_type perm_for_conn(struct connection *conn, ++ const struct node_perms *perms); + + /* Is this a valid node name? */ + bool is_valid_nodename(const char *node); +diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c +index aa9942fcc267..a0d1a11c837f 100644 +--- a/tools/xenstore/xenstored_domain.c ++++ b/tools/xenstore/xenstored_domain.c +@@ -41,6 +41,9 @@ static evtchn_port_t virq_port; + + xenevtchn_handle *xce_handle = NULL; + ++static struct node_perms dom_release_perms; ++static struct node_perms dom_introduce_perms; ++ + struct domain + { + struct list_head list; +@@ -582,6 +585,59 @@ void restore_existing_connections(void) + { + } + ++static int set_dom_perms_default(struct node_perms *perms) ++{ ++ perms->num = 1; ++ perms->p = talloc_array(NULL, struct xs_permissions, perms->num); ++ if (!perms->p) ++ return -1; ++ perms->p->id = 0; ++ perms->p->perms = XS_PERM_NONE; ++ ++ return 0; ++} ++ ++static struct node_perms *get_perms_special(const char *name) ++{ ++ if (!strcmp(name, "@releaseDomain")) ++ return &dom_release_perms; ++ if (!strcmp(name, "@introduceDomain")) ++ return &dom_introduce_perms; ++ return NULL; ++} ++ ++int set_perms_special(struct connection *conn, const char *name, ++ struct node_perms *perms) ++{ ++ struct node_perms *p; ++ ++ p = get_perms_special(name); ++ if (!p) ++ return EINVAL; ++ ++ if ((perm_for_conn(conn, p) & (XS_PERM_WRITE | XS_PERM_OWNER)) != ++ (XS_PERM_WRITE | XS_PERM_OWNER)) ++ return EACCES; ++ ++ p->num = perms->num; ++ talloc_free(p->p); ++ p->p = perms->p; ++ talloc_steal(NULL, perms->p); ++ ++ return 0; ++} ++ ++bool check_perms_special(const char *name, struct connection *conn) ++{ ++ struct node_perms *p; ++ ++ p = get_perms_special(name); ++ if (!p) ++ return false; ++ ++ return perm_for_conn(conn, p) & XS_PERM_READ; ++} ++ + static int dom0_init(void) + { + evtchn_port_t port; +@@ -603,6 +659,10 @@ static int dom0_init(void) + + xenevtchn_notify(xce_handle, dom0->port); + ++ if (set_dom_perms_default(&dom_release_perms) || ++ set_dom_perms_default(&dom_introduce_perms)) ++ return -1; ++ + return 0; + } + +diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h +index 56ae01597475..259183962a9c 100644 +--- a/tools/xenstore/xenstored_domain.h ++++ b/tools/xenstore/xenstored_domain.h +@@ -65,6 +65,11 @@ void domain_watch_inc(struct connection *conn); + void domain_watch_dec(struct connection *conn); + int domain_watch(struct connection *conn); + ++/* Special node permission handling. */ ++int set_perms_special(struct connection *conn, const char *name, ++ struct node_perms *perms); ++bool check_perms_special(const char *name, struct connection *conn); ++ + /* Write rate limiting */ + + #define WRL_FACTOR 1000 /* for fixed-point arithmetic */ +diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c +index 3836675459fa..f4e289362eb6 100644 +--- a/tools/xenstore/xenstored_watch.c ++++ b/tools/xenstore/xenstored_watch.c +@@ -133,6 +133,10 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name, + + /* Create an event for each watch. */ + list_for_each_entry(i, &connections, list) { ++ /* introduce/release domain watches */ ++ if (check_special_event(name) && !check_perms_special(name, i)) ++ continue; ++ + list_for_each_entry(watch, &i->watches, list) { + if (exact) { + if (streq(name, watch->node)) +-- +2.17.1 + diff --git a/xsa115-4.14-c-0010-tools-xenstore-avoid-watch-events-for-nodes-without-.patch b/xsa115-4.14-c-0010-tools-xenstore-avoid-watch-events-for-nodes-without-.patch new file mode 100644 index 0000000..b09153c --- /dev/null +++ b/xsa115-4.14-c-0010-tools-xenstore-avoid-watch-events-for-nodes-without-.patch @@ -0,0 +1,374 @@ +From f1cc47b0572b337269af7e34bd019584f4b8c98e Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 11 Jun 2020 16:12:46 +0200 +Subject: [PATCH 10/10] tools/xenstore: avoid watch events for nodes without + access + +Today watch events are sent regardless of the access rights of the +node the event is sent for. This enables any guest to e.g. setup a +watch for "/" in order to have a detailed record of all Xenstore +modifications. + +Modify that by sending only watch events for nodes that the watcher +has a chance to see otherwise (either via direct reads or by querying +the children of a node). This includes cases where the visibility of +a node for a watcher is changing (permissions being removed). + +This is part of XSA-115. + +Signed-off-by: Juergen Gross +Reviewed-by: Julien Grall +Reviewed-by: Paul Durrant +--- + tools/xenstore/xenstored_core.c | 28 +++++----- + tools/xenstore/xenstored_core.h | 15 ++++-- + tools/xenstore/xenstored_domain.c | 6 +-- + tools/xenstore/xenstored_transaction.c | 21 +++++++- + tools/xenstore/xenstored_watch.c | 75 +++++++++++++++++++------- + tools/xenstore/xenstored_watch.h | 2 +- + 6 files changed, 104 insertions(+), 43 deletions(-) + +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index 15ffbeb30f19..92bfd54cff62 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -360,8 +360,8 @@ static void initialize_fds(int *p_sock_pollfd_idx, int *p_ro_sock_pollfd_idx, + * If it fails, returns NULL and sets errno. + * Temporary memory allocations will be done with ctx. + */ +-static struct node *read_node(struct connection *conn, const void *ctx, +- const char *name) ++struct node *read_node(struct connection *conn, const void *ctx, ++ const char *name) + { + TDB_DATA key, data; + struct xs_tdb_record_hdr *hdr; +@@ -496,7 +496,7 @@ enum xs_perm_type perm_for_conn(struct connection *conn, + * Get name of node parent. + * Temporary memory allocations are done with ctx. + */ +-static char *get_parent(const void *ctx, const char *node) ++char *get_parent(const void *ctx, const char *node) + { + char *parent; + char *slash = strrchr(node + 1, '/'); +@@ -568,10 +568,10 @@ static int errno_from_parents(struct connection *conn, const void *ctx, + * If it fails, returns NULL and sets errno. + * Temporary memory allocations are done with ctx. + */ +-struct node *get_node(struct connection *conn, +- const void *ctx, +- const char *name, +- enum xs_perm_type perm) ++static struct node *get_node(struct connection *conn, ++ const void *ctx, ++ const char *name, ++ enum xs_perm_type perm) + { + struct node *node; + +@@ -1058,7 +1058,7 @@ static int do_write(struct connection *conn, struct buffered_data *in) + return errno; + } + +- fire_watches(conn, in, name, false); ++ fire_watches(conn, in, name, node, false, NULL); + send_ack(conn, XS_WRITE); + + return 0; +@@ -1080,7 +1080,7 @@ static int do_mkdir(struct connection *conn, struct buffered_data *in) + node = create_node(conn, in, name, NULL, 0); + if (!node) + return errno; +- fire_watches(conn, in, name, false); ++ fire_watches(conn, in, name, node, false, NULL); + } + send_ack(conn, XS_MKDIR); + +@@ -1143,7 +1143,7 @@ static int delete_node(struct connection *conn, const void *ctx, + talloc_free(name); + } + +- fire_watches(conn, ctx, node->name, true); ++ fire_watches(conn, ctx, node->name, node, true, NULL); + delete_node_single(conn, node); + delete_child(conn, parent, basename(node->name)); + talloc_free(node); +@@ -1167,13 +1167,14 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node, + parent = read_node(conn, ctx, parentname); + if (!parent) + return (errno == ENOMEM) ? ENOMEM : EINVAL; ++ node->parent = parent; + + /* + * Fire the watches now, when we can still see the node permissions. + * This fine as we are single threaded and the next possible read will + * be handled only after the node has been really removed. + */ +- fire_watches(conn, ctx, name, false); ++ fire_watches(conn, ctx, name, node, false, NULL); + return delete_node(conn, ctx, parent, node); + } + +@@ -1239,7 +1240,7 @@ static int do_get_perms(struct connection *conn, struct buffered_data *in) + + static int do_set_perms(struct connection *conn, struct buffered_data *in) + { +- struct node_perms perms; ++ struct node_perms perms, old_perms; + char *name, *permstr; + struct node *node; + +@@ -1275,6 +1276,7 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) + perms.p[0].id != node->perms.p[0].id) + return EPERM; + ++ old_perms = node->perms; + domain_entry_dec(conn, node); + node->perms = perms; + domain_entry_inc(conn, node); +@@ -1282,7 +1284,7 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) + if (write_node(conn, node, false)) + return errno; + +- fire_watches(conn, in, name, false); ++ fire_watches(conn, in, name, node, false, &old_perms); + send_ack(conn, XS_SET_PERMS); + + return 0; +diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h +index 53f1050859fc..eb19b71f5f46 100644 +--- a/tools/xenstore/xenstored_core.h ++++ b/tools/xenstore/xenstored_core.h +@@ -152,15 +152,17 @@ void send_ack(struct connection *conn, enum xsd_sockmsg_type type); + /* Canonicalize this path if possible. */ + char *xenstore_canonicalize(struct connection *conn, const void *ctx, const char *node); + ++/* Get access permissions. */ ++enum xs_perm_type perm_for_conn(struct connection *conn, ++ const struct node_perms *perms); ++ + /* Write a node to the tdb data base. */ + int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node, + bool no_quota_check); + +-/* Get this node, checking we have permissions. */ +-struct node *get_node(struct connection *conn, +- const void *ctx, +- const char *name, +- enum xs_perm_type perm); ++/* Get a node from the tdb data base. */ ++struct node *read_node(struct connection *conn, const void *ctx, ++ const char *name); + + struct connection *new_connection(connwritefn_t *write, connreadfn_t *read); + void check_store(void); +@@ -171,6 +173,9 @@ enum xs_perm_type perm_for_conn(struct connection *conn, + /* Is this a valid node name? */ + bool is_valid_nodename(const char *node); + ++/* Get name of parent node. */ ++char *get_parent(const void *ctx, const char *node); ++ + /* Tracing infrastructure. */ + void trace_create(const void *data, const char *type); + void trace_destroy(const void *data, const char *type); +diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c +index a0d1a11c837f..9fad470f8331 100644 +--- a/tools/xenstore/xenstored_domain.c ++++ b/tools/xenstore/xenstored_domain.c +@@ -202,7 +202,7 @@ static int destroy_domain(void *_domain) + unmap_interface(domain->interface); + } + +- fire_watches(NULL, domain, "@releaseDomain", false); ++ fire_watches(NULL, domain, "@releaseDomain", NULL, false, NULL); + + wrl_domain_destroy(domain); + +@@ -240,7 +240,7 @@ static void domain_cleanup(void) + } + + if (notify) +- fire_watches(NULL, NULL, "@releaseDomain", false); ++ fire_watches(NULL, NULL, "@releaseDomain", NULL, false, NULL); + } + + /* We scan all domains rather than use the information given here. */ +@@ -404,7 +404,7 @@ int do_introduce(struct connection *conn, struct buffered_data *in) + /* Now domain belongs to its connection. */ + talloc_steal(domain->conn, domain); + +- fire_watches(NULL, in, "@introduceDomain", false); ++ fire_watches(NULL, in, "@introduceDomain", NULL, false, NULL); + } else { + /* Use XS_INTRODUCE for recreating the xenbus event-channel. */ + if (domain->port) +diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c +index e87897573469..a7d8c5d475ec 100644 +--- a/tools/xenstore/xenstored_transaction.c ++++ b/tools/xenstore/xenstored_transaction.c +@@ -114,6 +114,9 @@ struct accessed_node + /* Generation count (or NO_GENERATION) for conflict checking. */ + uint64_t generation; + ++ /* Original node permissions. */ ++ struct node_perms perms; ++ + /* Generation count checking required? */ + bool check_gen; + +@@ -260,6 +263,15 @@ int access_node(struct connection *conn, struct node *node, + i->node = talloc_strdup(i, node->name); + if (!i->node) + goto nomem; ++ if (node->generation != NO_GENERATION && node->perms.num) { ++ i->perms.p = talloc_array(i, struct xs_permissions, ++ node->perms.num); ++ if (!i->perms.p) ++ goto nomem; ++ i->perms.num = node->perms.num; ++ memcpy(i->perms.p, node->perms.p, ++ i->perms.num * sizeof(*i->perms.p)); ++ } + + introduce = true; + i->ta_node = false; +@@ -368,9 +380,14 @@ static int finalize_transaction(struct connection *conn, + talloc_free(data.dptr); + if (ret) + goto err; +- } else if (tdb_delete(tdb_ctx, key)) ++ fire_watches(conn, trans, i->node, NULL, false, ++ i->perms.p ? &i->perms : NULL); ++ } else { ++ fire_watches(conn, trans, i->node, NULL, false, ++ i->perms.p ? &i->perms : NULL); ++ if (tdb_delete(tdb_ctx, key)) + goto err; +- fire_watches(conn, trans, i->node, false); ++ } + } + + if (i->ta_node && tdb_delete(tdb_ctx, ta_key)) +diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c +index f4e289362eb6..71c108ea99f1 100644 +--- a/tools/xenstore/xenstored_watch.c ++++ b/tools/xenstore/xenstored_watch.c +@@ -85,22 +85,6 @@ static void add_event(struct connection *conn, + unsigned int len; + char *data; + +- if (!check_special_event(name)) { +- /* Can this conn load node, or see that it doesn't exist? */ +- struct node *node = get_node(conn, ctx, name, XS_PERM_READ); +- /* +- * XXX We allow EACCES here because otherwise a non-dom0 +- * backend driver cannot watch for disappearance of a frontend +- * xenstore directory. When the directory disappears, we +- * revert to permissions of the parent directory for that path, +- * which will typically disallow access for the backend. +- * But this breaks device-channel teardown! +- * Really we should fix this better... +- */ +- if (!node && errno != ENOENT && errno != EACCES) +- return; +- } +- + if (watch->relative_path) { + name += strlen(watch->relative_path); + if (*name == '/') /* Could be "" */ +@@ -117,12 +101,60 @@ static void add_event(struct connection *conn, + talloc_free(data); + } + ++/* ++ * Check permissions of a specific watch to fire: ++ * Either the node itself or its parent have to be readable by the connection ++ * the watch has been setup for. In case a watch event is created due to ++ * changed permissions we need to take the old permissions into account, too. ++ */ ++static bool watch_permitted(struct connection *conn, const void *ctx, ++ const char *name, struct node *node, ++ struct node_perms *perms) ++{ ++ enum xs_perm_type perm; ++ struct node *parent; ++ char *parent_name; ++ ++ if (perms) { ++ perm = perm_for_conn(conn, perms); ++ if (perm & XS_PERM_READ) ++ return true; ++ } ++ ++ if (!node) { ++ node = read_node(conn, ctx, name); ++ if (!node) ++ return false; ++ } ++ ++ perm = perm_for_conn(conn, &node->perms); ++ if (perm & XS_PERM_READ) ++ return true; ++ ++ parent = node->parent; ++ if (!parent) { ++ parent_name = get_parent(ctx, node->name); ++ if (!parent_name) ++ return false; ++ parent = read_node(conn, ctx, parent_name); ++ if (!parent) ++ return false; ++ } ++ ++ perm = perm_for_conn(conn, &parent->perms); ++ ++ return perm & XS_PERM_READ; ++} ++ + /* + * Check whether any watch events are to be sent. + * Temporary memory allocations are done with ctx. ++ * We need to take the (potential) old permissions of the node into account ++ * as a watcher losing permissions to access a node should receive the ++ * watch event, too. + */ + void fire_watches(struct connection *conn, const void *ctx, const char *name, +- bool exact) ++ struct node *node, bool exact, struct node_perms *perms) + { + struct connection *i; + struct watch *watch; +@@ -134,8 +166,13 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name, + /* Create an event for each watch. */ + list_for_each_entry(i, &connections, list) { + /* introduce/release domain watches */ +- if (check_special_event(name) && !check_perms_special(name, i)) +- continue; ++ if (check_special_event(name)) { ++ if (!check_perms_special(name, i)) ++ continue; ++ } else { ++ if (!watch_permitted(i, ctx, name, node, perms)) ++ continue; ++ } + + list_for_each_entry(watch, &i->watches, list) { + if (exact) { +diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h +index 1b3c80d3dda1..03094374f379 100644 +--- a/tools/xenstore/xenstored_watch.h ++++ b/tools/xenstore/xenstored_watch.h +@@ -26,7 +26,7 @@ int do_unwatch(struct connection *conn, struct buffered_data *in); + + /* Fire all watches: !exact means all the children are affected (ie. rm). */ + void fire_watches(struct connection *conn, const void *tmp, const char *name, +- bool exact); ++ struct node *node, bool exact, struct node_perms *perms); + + void conn_delete_all_watches(struct connection *conn); + +-- +2.17.1 + diff --git a/xsa115-o-0001-tools-ocaml-xenstored-ignore-transaction-id-for-un-w.patch b/xsa115-o-0001-tools-ocaml-xenstored-ignore-transaction-id-for-un-w.patch new file mode 100644 index 0000000..0072c68 --- /dev/null +++ b/xsa115-o-0001-tools-ocaml-xenstored-ignore-transaction-id-for-un-w.patch @@ -0,0 +1,43 @@ +From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= +Subject: tools/ocaml/xenstored: ignore transaction id for [un]watch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Instead of ignoring the transaction id for XS_WATCH and XS_UNWATCH +commands as it is documented in docs/misc/xenstore.txt, it is tested +for validity today. + +Really ignore the transaction id for XS_WATCH and XS_UNWATCH. + +This is part of XSA-115. + +Signed-off-by: Edwin Török +Acked-by: Christian Lindig +Reviewed-by: Andrew Cooper + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index ff5c9484fc..2fa6798e3b 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -498,12 +498,19 @@ let retain_op_in_history ty = + | Xenbus.Xb.Op.Reset_watches + | Xenbus.Xb.Op.Invalid -> false + ++let maybe_ignore_transaction = function ++ | Xenbus.Xb.Op.Watch | Xenbus.Xb.Op.Unwatch -> fun tid -> ++ if tid <> Transaction.none then ++ debug "Ignoring transaction ID %d for watch/unwatch" tid; ++ Transaction.none ++ | _ -> fun x -> x ++ + (** + * Nothrow guarantee. + *) + let process_packet ~store ~cons ~doms ~con ~req = + let ty = req.Packet.ty in +- let tid = req.Packet.tid in ++ let tid = maybe_ignore_transaction ty req.Packet.tid in + let rid = req.Packet.rid in + try + let fct = function_of_type ty in diff --git a/xsa115-o-0002-tools-ocaml-xenstored-check-privilege-for-XS_IS_DOMA.patch b/xsa115-o-0002-tools-ocaml-xenstored-check-privilege-for-XS_IS_DOMA.patch new file mode 100644 index 0000000..26033c7 --- /dev/null +++ b/xsa115-o-0002-tools-ocaml-xenstored-check-privilege-for-XS_IS_DOMA.patch @@ -0,0 +1,30 @@ +From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= +Subject: tools/ocaml/xenstored: check privilege for XS_IS_DOMAIN_INTRODUCED +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The Xenstore command XS_IS_DOMAIN_INTRODUCED should be possible for privileged +domains only (the only user in the tree is the xenpaging daemon). + +This is part of XSA-115. + +Signed-off-by: Edwin Török +Acked-by: Christian Lindig +Reviewed-by: Andrew Cooper + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 2fa6798e3b..fd79ef564f 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -166,7 +166,9 @@ let do_setperms con t _domains _cons data = + let do_error _con _t _domains _cons _data = + raise Define.Unknown_operation + +-let do_isintroduced _con _t domains _cons data = ++let do_isintroduced con _t domains _cons data = ++ if not (Connection.is_dom0 con) ++ then raise Define.Permission_denied; + let domid = + match (split None '\000' data) with + | domid :: _ -> int_of_string domid diff --git a/xsa115-o-0003-tools-ocaml-xenstored-unify-watch-firing.patch b/xsa115-o-0003-tools-ocaml-xenstored-unify-watch-firing.patch new file mode 100644 index 0000000..fea94a9 --- /dev/null +++ b/xsa115-o-0003-tools-ocaml-xenstored-unify-watch-firing.patch @@ -0,0 +1,29 @@ +From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= +Subject: tools/ocaml/xenstored: unify watch firing +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This will make it easier insert additional checks in a follow-up patch. +All watches are now fired from a single function. + +This is part of XSA-115. + +Signed-off-by: Edwin Török +Acked-by: Christian Lindig +Reviewed-by: Andrew Cooper + +diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml +index 24750ada43..e5df62d9e7 100644 +--- a/tools/ocaml/xenstored/connection.ml ++++ b/tools/ocaml/xenstored/connection.ml +@@ -210,8 +210,7 @@ let fire_watch watch path = + end else + path + in +- let data = Utils.join_by_null [ new_path; watch.token; "" ] in +- send_reply watch.con Transaction.none 0 Xenbus.Xb.Op.Watchevent data ++ fire_single_watch { watch with path = new_path } + + (* Search for a valid unused transaction id. *) + let rec valid_transaction_id con proposed_id = diff --git a/xsa115-o-0004-tools-ocaml-xenstored-introduce-permissions-for-spec.patch b/xsa115-o-0004-tools-ocaml-xenstored-introduce-permissions-for-spec.patch new file mode 100644 index 0000000..76f98e9 --- /dev/null +++ b/xsa115-o-0004-tools-ocaml-xenstored-introduce-permissions-for-spec.patch @@ -0,0 +1,117 @@ +From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= +Subject: tools/ocaml/xenstored: introduce permissions for special watches +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The special watches "@introduceDomain" and "@releaseDomain" should be +allowed for privileged callers only, as they allow to gain information +about presence of other guests on the host. So send watch events for +those watches via privileged connections only. + +Start to address this by treating the special watches as regular nodes +in the tree, which gives them normal semantics for permissions. A later +change will restrict the handling, so that they can't be listed, etc. + +This is part of XSA-115. + +Signed-off-by: Edwin Török +Acked-by: Christian Lindig +Reviewed-by: Andrew Cooper + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index fd79ef564f..e528d1ecb2 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -420,7 +420,7 @@ let do_introduce con _t domains cons data = + else try + let ndom = Domains.create domains domid mfn port in + Connections.add_domain cons ndom; +- Connections.fire_spec_watches cons "@introduceDomain"; ++ Connections.fire_spec_watches cons Store.Path.introduce_domain; + ndom + with _ -> raise Invalid_Cmd_Args + in +@@ -439,7 +439,7 @@ let do_release con _t domains cons data = + Domains.del domains domid; + Connections.del_domain cons domid; + if fire_spec_watches +- then Connections.fire_spec_watches cons "@releaseDomain" ++ then Connections.fire_spec_watches cons Store.Path.release_domain + else raise Invalid_Cmd_Args + + let do_resume con _t domains _cons data = +diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml +index 92b6289b5e..52b88b3ee1 100644 +--- a/tools/ocaml/xenstored/store.ml ++++ b/tools/ocaml/xenstored/store.ml +@@ -214,6 +214,11 @@ let rec lookup node path fct = + + let apply rnode path fct = + lookup rnode path fct ++ ++let introduce_domain = "@introduceDomain" ++let release_domain = "@releaseDomain" ++let specials = List.map of_string [ introduce_domain; release_domain ] ++ + end + + (* The Store.t type *) +diff --git a/tools/ocaml/xenstored/utils.ml b/tools/ocaml/xenstored/utils.ml +index b252db799b..e8c9fe4e94 100644 +--- a/tools/ocaml/xenstored/utils.ml ++++ b/tools/ocaml/xenstored/utils.ml +@@ -88,19 +88,17 @@ let read_file_single_integer filename = + Unix.close fd; + int_of_string (Bytes.sub_string buf 0 sz) + +-let path_complete path connection_path = +- if String.get path 0 <> '/' then +- connection_path ^ path +- else +- path +- ++(* @path may be guest data and needs its length validating. @connection_path ++ * is generated locally in xenstored and always of the form "/local/domain/$N/" *) + let path_validate path connection_path = +- if String.length path = 0 || String.length path > 1024 then +- raise Define.Invalid_path +- else +- let cpath = path_complete path connection_path in +- if String.get cpath 0 <> '/' then +- raise Define.Invalid_path +- else +- cpath ++ let len = String.length path in ++ ++ if len = 0 || len > 1024 then raise Define.Invalid_path; ++ ++ let abs_path = ++ match String.get path 0 with ++ | '/' | '@' -> path ++ | _ -> connection_path ^ path ++ in + ++ abs_path +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 7e7824761b..8d0c50bfa4 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -286,6 +286,8 @@ let _ = + let quit = ref false in + + Logging.init_xenstored_log(); ++ List.iter (fun path -> ++ Store.write store Perms.Connection.full_rights path "") Store.Path.specials; + + let filename = Paths.xen_run_stored ^ "/db" in + if cf.restart && Sys.file_exists filename then ( +@@ -335,7 +337,7 @@ let _ = + let (notify, deaddom) = Domains.cleanup domains in + List.iter (Connections.del_domain cons) deaddom; + if deaddom <> [] || notify then +- Connections.fire_spec_watches cons "@releaseDomain" ++ Connections.fire_spec_watches cons Store.Path.release_domain + ) + else + let c = Connections.find_domain_by_port cons port in diff --git a/xsa115-o-0005-tools-ocaml-xenstored-avoid-watch-events-for-nodes-w.patch b/xsa115-o-0005-tools-ocaml-xenstored-avoid-watch-events-for-nodes-w.patch new file mode 100644 index 0000000..866d415 --- /dev/null +++ b/xsa115-o-0005-tools-ocaml-xenstored-avoid-watch-events-for-nodes-w.patch @@ -0,0 +1,406 @@ +From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= +Subject: tools/ocaml/xenstored: avoid watch events for nodes without access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Today watch events are sent regardless of the access rights of the +node the event is sent for. This enables any guest to e.g. setup a +watch for "/" in order to have a detailed record of all Xenstore +modifications. + +Modify that by sending only watch events for nodes that the watcher +has a chance to see otherwise (either via direct reads or by querying +the children of a node). This includes cases where the visibility of +a node for a watcher is changing (permissions being removed). + +Permissions for nodes are looked up either in the old (pre +transaction/command) or current trees (post transaction). If +permissions are changed multiple times in a transaction only the final +version is checked, because considering a transaction atomic the +individual permission changes would not be noticable to an outside +observer. + +Two trees are only needed for set_perms: here we can either notice the +node disappearing (if we loose permission), appearing +(if we gain permission), or changing (if we preserve permission). + +RM needs to only look at the old tree: in the new tree the node would be +gone, or could have different permissions if it was recreated (the +recreation would get its own watch fired). + +Inside a tree we lookup the watch path's parent, and then the watch path +child itself. This gets us 4 sets of permissions in worst case, and if +either of these allows a watch, then we permit it to fire. The +permission lookups are done without logging the failures, otherwise we'd +get confusing errors about permission denied for some paths, but a watch +still firing. The actual result is logged in xenstored-access log: + + 'w event ...' as usual if watch was fired + 'w notfired...' if the watch was not fired, together with path and + permission set to help in troubleshooting + +Adding a watch bypasses permission checks and always fires the watch +once immediately. This is consistent with the specification, and no +information is gained (the watch is fired both if the path exists or +doesn't, and both if you have or don't have access, i.e. it reflects the +path a domain gave it back to that domain). + +There are some semantic changes here: + + * Write+rm in a single transaction of the same path is unobservable + now via watches: both before and after a transaction the path + doesn't exist, thus both tree lookups come up with the empty + permission set, and noone, not even Dom0 can see this. This is + consistent with transaction atomicity though. + * Similar to above if we temporarily grant and then revoke permission + on a path any watches fired inbetween are ignored as well + * There is a new log event (w notfired) which shows the permission set + of the path, and the path. + * Watches on paths that a domain doesn't have access to are now not + seen, which is the purpose of the security fix. + +This is part of XSA-115. + +Signed-off-by: Edwin Török +Acked-by: Christian Lindig +Reviewed-by: Andrew Cooper + +diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml +index e5df62d9e7..644a448f2e 100644 +--- a/tools/ocaml/xenstored/connection.ml ++++ b/tools/ocaml/xenstored/connection.ml +@@ -196,11 +196,36 @@ let list_watches con = + con.watches [] in + List.concat ll + +-let fire_single_watch watch = ++let dbg fmt = Logging.debug "connection" fmt ++let info fmt = Logging.info "connection" fmt ++ ++let lookup_watch_perm path = function ++| None -> [] ++| Some root -> ++ try Store.Path.apply root path @@ fun parent name -> ++ Store.Node.get_perms parent :: ++ try [Store.Node.get_perms (Store.Node.find parent name)] ++ with Not_found -> [] ++ with Define.Invalid_path | Not_found -> [] ++ ++let lookup_watch_perms oldroot root path = ++ lookup_watch_perm path oldroot @ lookup_watch_perm path (Some root) ++ ++let fire_single_watch_unchecked watch = + let data = Utils.join_by_null [watch.path; watch.token; ""] in + send_reply watch.con Transaction.none 0 Xenbus.Xb.Op.Watchevent data + +-let fire_watch watch path = ++let fire_single_watch (oldroot, root) watch = ++ let abspath = get_watch_path watch.con watch.path |> Store.Path.of_string in ++ let perms = lookup_watch_perms oldroot root abspath in ++ if List.exists (Perms.has watch.con.perm READ) perms then ++ fire_single_watch_unchecked watch ++ else ++ let perms = perms |> List.map (Perms.Node.to_string ~sep:" ") |> String.concat ", " in ++ let con = get_domstr watch.con in ++ Logging.watch_not_fired ~con perms (Store.Path.to_string abspath) ++ ++let fire_watch roots watch path = + let new_path = + if watch.is_relative && path.[0] = '/' + then begin +@@ -210,7 +235,7 @@ let fire_watch watch path = + end else + path + in +- fire_single_watch { watch with path = new_path } ++ fire_single_watch roots { watch with path = new_path } + + (* Search for a valid unused transaction id. *) + let rec valid_transaction_id con proposed_id = +diff --git a/tools/ocaml/xenstored/connections.ml b/tools/ocaml/xenstored/connections.ml +index f2c4318c88..9f9f7ee2f0 100644 +--- a/tools/ocaml/xenstored/connections.ml ++++ b/tools/ocaml/xenstored/connections.ml +@@ -135,25 +135,26 @@ let del_watch cons con path token = + watch + + (* path is absolute *) +-let fire_watches cons path recurse = ++let fire_watches ?oldroot root cons path recurse = + let key = key_of_path path in + let path = Store.Path.to_string path in ++ let roots = oldroot, root in + let fire_watch _ = function + | None -> () +- | Some watches -> List.iter (fun w -> Connection.fire_watch w path) watches ++ | Some watches -> List.iter (fun w -> Connection.fire_watch roots w path) watches + in + let fire_rec _x = function + | None -> () + | Some watches -> +- List.iter (fun w -> Connection.fire_single_watch w) watches ++ List.iter (Connection.fire_single_watch roots) watches + in + Trie.iter_path fire_watch cons.watches key; + if recurse then + Trie.iter fire_rec (Trie.sub cons.watches key) + +-let fire_spec_watches cons specpath = ++let fire_spec_watches root cons specpath = + iter cons (fun con -> +- List.iter (fun w -> Connection.fire_single_watch w) (Connection.get_watches con specpath)) ++ List.iter (Connection.fire_single_watch (None, root)) (Connection.get_watches con specpath)) + + let set_target cons domain target_domain = + let con = find_domain cons domain in +diff --git a/tools/ocaml/xenstored/logging.ml b/tools/ocaml/xenstored/logging.ml +index c5cba79e92..1ede131329 100644 +--- a/tools/ocaml/xenstored/logging.ml ++++ b/tools/ocaml/xenstored/logging.ml +@@ -161,6 +161,8 @@ let xenstored_log_nb_lines = ref 13215 + let xenstored_log_nb_chars = ref (-1) + let xenstored_logger = ref (None: logger option) + ++let debug_enabled () = !xenstored_log_level = Debug ++ + let set_xenstored_log_destination s = + xenstored_log_destination := log_destination_of_string s + +@@ -204,6 +206,7 @@ type access_type = + | Commit + | Newconn + | Endconn ++ | Watch_not_fired + | XbOp of Xenbus.Xb.Op.operation + + let string_of_tid ~con tid = +@@ -217,6 +220,7 @@ let string_of_access_type = function + | Commit -> "commit " + | Newconn -> "newconn " + | Endconn -> "endconn " ++ | Watch_not_fired -> "w notfired" + + | XbOp op -> match op with + | Xenbus.Xb.Op.Debug -> "debug " +@@ -331,3 +335,7 @@ let xb_answer ~tid ~con ~ty data = + | _ -> false, Debug + in + if print then access_logging ~tid ~con ~data (XbOp ty) ~level ++ ++let watch_not_fired ~con perms path = ++ let data = Printf.sprintf "EPERM perms=[%s] path=%s" perms path in ++ access_logging ~tid:0 ~con ~data Watch_not_fired ~level:Info +diff --git a/tools/ocaml/xenstored/perms.ml b/tools/ocaml/xenstored/perms.ml +index 3ea193ea14..23b80aba3d 100644 +--- a/tools/ocaml/xenstored/perms.ml ++++ b/tools/ocaml/xenstored/perms.ml +@@ -79,9 +79,9 @@ let of_string s = + let string_of_perm perm = + Printf.sprintf "%c%u" (char_of_permty (snd perm)) (fst perm) + +-let to_string permvec = ++let to_string ?(sep="\000") permvec = + let l = ((permvec.owner, permvec.other) :: permvec.acl) in +- String.concat "\000" (List.map string_of_perm l) ++ String.concat sep (List.map string_of_perm l) + + end + +@@ -132,8 +132,8 @@ let check_owner (connection:Connection.t) (node:Node.t) = + then Connection.is_owner connection (Node.get_owner node) + else true + +-(* check if the current connection has the requested perm on the current node *) +-let check (connection:Connection.t) request (node:Node.t) = ++(* check if the current connection lacks the requested perm on the current node *) ++let lacks (connection:Connection.t) request (node:Node.t) = + let check_acl domainid = + let perm = + if List.mem_assoc domainid (Node.get_acl node) +@@ -154,11 +154,19 @@ let check (connection:Connection.t) request (node:Node.t) = + info "Permission denied: Domain %d has write only access" domainid; + false + in +- if !activate ++ !activate + && not (Connection.is_dom0 connection) + && not (check_owner connection node) + && not (List.exists check_acl (Connection.get_owners connection)) ++ ++(* check if the current connection has the requested perm on the current node. ++* Raises an exception if it doesn't. *) ++let check connection request node = ++ if lacks connection request node + then raise Define.Permission_denied + ++(* check if the current connection has the requested perm on the current node *) ++let has connection request node = not (lacks connection request node) ++ + let equiv perm1 perm2 = + (Node.to_string perm1) = (Node.to_string perm2) +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index e528d1ecb2..f99b9e935c 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -56,15 +56,17 @@ let split_one_path data con = + | path :: "" :: [] -> Store.Path.create path (Connection.get_path con) + | _ -> raise Invalid_Cmd_Args + +-let process_watch ops cons = ++let process_watch t cons = ++ let oldroot = t.Transaction.oldroot in ++ let newroot = Store.get_root t.store in ++ let ops = Transaction.get_paths t |> List.rev in + let do_op_watch op cons = +- let recurse = match (fst op) with +- | Xenbus.Xb.Op.Write -> false +- | Xenbus.Xb.Op.Mkdir -> false +- | Xenbus.Xb.Op.Rm -> true +- | Xenbus.Xb.Op.Setperms -> false ++ let recurse, oldroot, root = match (fst op) with ++ | Xenbus.Xb.Op.Write|Xenbus.Xb.Op.Mkdir -> false, None, newroot ++ | Xenbus.Xb.Op.Rm -> true, None, oldroot ++ | Xenbus.Xb.Op.Setperms -> false, Some oldroot, newroot + | _ -> raise (Failure "huh ?") in +- Connections.fire_watches cons (snd op) recurse in ++ Connections.fire_watches ?oldroot root cons (snd op) recurse in + List.iter (fun op -> do_op_watch op cons) ops + + let create_implicit_path t perm path = +@@ -205,7 +207,7 @@ let reply_ack fct con t doms cons data = + fct con t doms cons data; + Packet.Ack (fun () -> + if Transaction.get_id t = Transaction.none then +- process_watch (Transaction.get_paths t) cons ++ process_watch t cons + ) + + let reply_data fct con t doms cons data = +@@ -353,14 +355,17 @@ let transaction_replay c t doms cons = + ignore @@ Connection.end_transaction c tid None + ) + +-let do_watch con _t _domains cons data = ++let do_watch con t _domains cons data = + let (node, token) = + match (split None '\000' data) with + | [node; token; ""] -> node, token + | _ -> raise Invalid_Cmd_Args + in + let watch = Connections.add_watch cons con node token in +- Packet.Ack (fun () -> Connection.fire_single_watch watch) ++ Packet.Ack (fun () -> ++ (* xenstore.txt says this watch is fired immediately, ++ implying even if path doesn't exist or is unreadable *) ++ Connection.fire_single_watch_unchecked watch) + + let do_unwatch con _t _domains cons data = + let (node, token) = +@@ -391,7 +396,7 @@ let do_transaction_end con t domains cons data = + if not success then + raise Transaction_again; + if commit then begin +- process_watch (List.rev (Transaction.get_paths t)) cons; ++ process_watch t cons; + match t.Transaction.ty with + | Transaction.No -> + () (* no need to record anything *) +@@ -399,7 +404,7 @@ let do_transaction_end con t domains cons data = + record_commit ~con ~tid:id ~before:oldstore ~after:cstore + end + +-let do_introduce con _t domains cons data = ++let do_introduce con t domains cons data = + if not (Connection.is_dom0 con) + then raise Define.Permission_denied; + let (domid, mfn, port) = +@@ -420,14 +425,14 @@ let do_introduce con _t domains cons data = + else try + let ndom = Domains.create domains domid mfn port in + Connections.add_domain cons ndom; +- Connections.fire_spec_watches cons Store.Path.introduce_domain; ++ Connections.fire_spec_watches (Transaction.get_root t) cons Store.Path.introduce_domain; + ndom + with _ -> raise Invalid_Cmd_Args + in + if (Domain.get_remote_port dom) <> port || (Domain.get_mfn dom) <> mfn then + raise Domain_not_match + +-let do_release con _t domains cons data = ++let do_release con t domains cons data = + if not (Connection.is_dom0 con) + then raise Define.Permission_denied; + let domid = +@@ -439,7 +444,7 @@ let do_release con _t domains cons data = + Domains.del domains domid; + Connections.del_domain cons domid; + if fire_spec_watches +- then Connections.fire_spec_watches cons Store.Path.release_domain ++ then Connections.fire_spec_watches (Transaction.get_root t) cons Store.Path.release_domain + else raise Invalid_Cmd_Args + + let do_resume con _t domains _cons data = +@@ -507,6 +512,8 @@ let maybe_ignore_transaction = function + Transaction.none + | _ -> fun x -> x + ++ ++let () = Printexc.record_backtrace true + (** + * Nothrow guarantee. + *) +@@ -548,7 +555,8 @@ let process_packet ~store ~cons ~doms ~con ~req = + (* Put the response on the wire *) + send_response ty con t rid response + with exn -> +- error "process packet: %s" (Printexc.to_string exn); ++ let bt = Printexc.get_backtrace () in ++ error "process packet: %s. %s" (Printexc.to_string exn) bt; + Connection.send_error con tid rid "EIO" + + let do_input store cons doms con = +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index 963734a653..25bc8c3b4a 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -82,6 +82,7 @@ type t = { + start_count: int64; + store: Store.t; (* This is the store that we change in write operations. *) + quota: Quota.t; ++ oldroot: Store.Node.t; + mutable paths: (Xenbus.Xb.Op.operation * Store.Path.t) list; + mutable operations: (Packet.request * Packet.response) list; + mutable read_lowpath: Store.Path.t option; +@@ -123,6 +124,7 @@ let make ?(internal=false) id store = + start_count = !counter; + store = if id = none then store else Store.copy store; + quota = Quota.copy store.Store.quota; ++ oldroot = Store.get_root store; + paths = []; + operations = []; + read_lowpath = None; +@@ -137,6 +139,8 @@ let make ?(internal=false) id store = + let get_store t = t.store + let get_paths t = t.paths + ++let get_root t = Store.get_root t.store ++ + let is_read_only t = t.paths = [] + let add_wop t ty path = t.paths <- (ty, path) :: t.paths + let add_operation ~perm t request response = +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 8d0c50bfa4..f7b88065bb 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -337,7 +337,9 @@ let _ = + let (notify, deaddom) = Domains.cleanup domains in + List.iter (Connections.del_domain cons) deaddom; + if deaddom <> [] || notify then +- Connections.fire_spec_watches cons Store.Path.release_domain ++ Connections.fire_spec_watches ++ (Store.get_root store) ++ cons Store.Path.release_domain + ) + else + let c = Connections.find_domain_by_port cons port in diff --git a/xsa115-o-0006-tools-ocaml-xenstored-add-xenstored.conf-flag-to-tur.patch b/xsa115-o-0006-tools-ocaml-xenstored-add-xenstored.conf-flag-to-tur.patch new file mode 100644 index 0000000..d1fa8b2 --- /dev/null +++ b/xsa115-o-0006-tools-ocaml-xenstored-add-xenstored.conf-flag-to-tur.patch @@ -0,0 +1,84 @@ +From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= +Subject: tools/ocaml/xenstored: add xenstored.conf flag to turn off watch + permission checks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +There are flags to turn off quotas and the permission system, so add one +that turns off the newly introduced watch permission checks as well. + +This is part of XSA-115. + +Signed-off-by: Edwin Török +Acked-by: Christian Lindig +Reviewed-by: Andrew Cooper + +diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml +index 644a448f2e..fa0d3c4d92 100644 +--- a/tools/ocaml/xenstored/connection.ml ++++ b/tools/ocaml/xenstored/connection.ml +@@ -218,7 +218,7 @@ let fire_single_watch_unchecked watch = + let fire_single_watch (oldroot, root) watch = + let abspath = get_watch_path watch.con watch.path |> Store.Path.of_string in + let perms = lookup_watch_perms oldroot root abspath in +- if List.exists (Perms.has watch.con.perm READ) perms then ++ if Perms.can_fire_watch watch.con.perm perms then + fire_single_watch_unchecked watch + else + let perms = perms |> List.map (Perms.Node.to_string ~sep:" ") |> String.concat ", " in +diff --git a/tools/ocaml/xenstored/oxenstored.conf.in b/tools/ocaml/xenstored/oxenstored.conf.in +index 151b65b72d..f843482981 100644 +--- a/tools/ocaml/xenstored/oxenstored.conf.in ++++ b/tools/ocaml/xenstored/oxenstored.conf.in +@@ -44,6 +44,16 @@ conflict-rate-limit-is-aggregate = true + # Activate node permission system + perms-activate = true + ++# Activate the watch permission system ++# When this is enabled unprivileged guests can only get watch events ++# for xenstore entries that they would've been able to read. ++# ++# When this is disabled unprivileged guests may get watch events ++# for xenstore entries that they cannot read. The watch event contains ++# only the entry name, not the value. ++# This restores behaviour prior to XSA-115. ++perms-watch-activate = true ++ + # Activate quota + quota-activate = true + quota-maxentity = 1000 +diff --git a/tools/ocaml/xenstored/perms.ml b/tools/ocaml/xenstored/perms.ml +index 23b80aba3d..ee7fee6bda 100644 +--- a/tools/ocaml/xenstored/perms.ml ++++ b/tools/ocaml/xenstored/perms.ml +@@ -20,6 +20,7 @@ let info fmt = Logging.info "perms" fmt + open Stdext + + let activate = ref true ++let watch_activate = ref true + + type permty = READ | WRITE | RDWR | NONE + +@@ -168,5 +169,9 @@ let check connection request node = + (* check if the current connection has the requested perm on the current node *) + let has connection request node = not (lacks connection request node) + ++let can_fire_watch connection perms = ++ not !watch_activate ++ || List.exists (has connection READ) perms ++ + let equiv perm1 perm2 = + (Node.to_string perm1) = (Node.to_string perm2) +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index f7b88065bb..0d355bbcb8 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -95,6 +95,7 @@ let parse_config filename = + ("conflict-max-history-seconds", Config.Set_float Define.conflict_max_history_seconds); + ("conflict-rate-limit-is-aggregate", Config.Set_bool Define.conflict_rate_limit_is_aggregate); + ("perms-activate", Config.Set_bool Perms.activate); ++ ("perms-watch-activate", Config.Set_bool Perms.watch_activate); + ("quota-activate", Config.Set_bool Quota.activate); + ("quota-maxwatch", Config.Set_int Define.maxwatch); + ("quota-transaction", Config.Set_int Define.maxtransaction); diff --git a/xsa322-4.14-c.patch b/xsa322-4.14-c.patch new file mode 100644 index 0000000..5059f24 --- /dev/null +++ b/xsa322-4.14-c.patch @@ -0,0 +1,532 @@ +From: Juergen Gross +Subject: tools/xenstore: revoke access rights for removed domains + +Access rights of Xenstore nodes are per domid. Unfortunately existing +granted access rights are not removed when a domain is being destroyed. +This means that a new domain created with the same domid will inherit +the access rights to Xenstore nodes from the previous domain(s) with +the same domid. + +This can be avoided by adding a generation counter to each domain. +The generation counter of the domain is set to the global generation +counter when a domain structure is being allocated. When reading or +writing a node all permissions of domains which are younger than the +node itself are dropped. This is done by flagging the related entry +as invalid in order to avoid modifying permissions in a way the user +could detect. + +A special case has to be considered: for a new domain the first +Xenstore entries are already written before the domain is officially +introduced in Xenstore. In order not to drop the permissions for the +new domain a domain struct is allocated even before introduction if +the hypervisor is aware of the domain. This requires adding another +bool "introduced" to struct domain in xenstored. In order to avoid +additional padding holes convert the shutdown flag to bool, too. + +As verifying permissions has its price regarding runtime add a new +quota for limiting the number of permissions an unprivileged domain +can set for a node. The default for that new quota is 5. + +This is part of XSA-322. + +Signed-off-by: Juergen Gross +Reviewed-by: Paul Durrant +Acked-by: Julien Grall + +diff --git a/tools/xenstore/include/xenstore_lib.h b/tools/xenstore/include/xenstore_lib.h +index 0ffbae9eb5..4c9b6d1685 100644 +--- a/tools/xenstore/include/xenstore_lib.h ++++ b/tools/xenstore/include/xenstore_lib.h +@@ -34,6 +34,7 @@ enum xs_perm_type { + /* Internal use. */ + XS_PERM_ENOENT_OK = 4, + XS_PERM_OWNER = 8, ++ XS_PERM_IGNORE = 16, + }; + + struct xs_permissions +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index 92bfd54cff..505560a5de 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -104,6 +104,7 @@ int quota_nb_entry_per_domain = 1000; + int quota_nb_watch_per_domain = 128; + int quota_max_entry_size = 2048; /* 2K */ + int quota_max_transaction = 10; ++int quota_nb_perms_per_node = 5; + + void trace(const char *fmt, ...) + { +@@ -409,8 +410,13 @@ struct node *read_node(struct connection *conn, const void *ctx, + + /* Permissions are struct xs_permissions. */ + node->perms.p = hdr->perms; ++ if (domain_adjust_node_perms(node)) { ++ talloc_free(node); ++ return NULL; ++ } ++ + /* Data is binary blob (usually ascii, no nul). */ +- node->data = node->perms.p + node->perms.num; ++ node->data = node->perms.p + hdr->num_perms; + /* Children is strings, nul separated. */ + node->children = node->data + node->datalen; + +@@ -426,6 +432,9 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node, + void *p; + struct xs_tdb_record_hdr *hdr; + ++ if (domain_adjust_node_perms(node)) ++ return errno; ++ + data.dsize = sizeof(*hdr) + + node->perms.num * sizeof(node->perms.p[0]) + + node->datalen + node->childlen; +@@ -485,8 +494,9 @@ enum xs_perm_type perm_for_conn(struct connection *conn, + return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask; + + for (i = 1; i < perms->num; i++) +- if (perms->p[i].id == conn->id +- || (conn->target && perms->p[i].id == conn->target->id)) ++ if (!(perms->p[i].perms & XS_PERM_IGNORE) && ++ (perms->p[i].id == conn->id || ++ (conn->target && perms->p[i].id == conn->target->id))) + return perms->p[i].perms & mask; + + return perms->p[0].perms & mask; +@@ -1248,8 +1258,12 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in) + if (perms.num < 2) + return EINVAL; + +- permstr = in->buffer + strlen(in->buffer) + 1; + perms.num--; ++ if (domain_is_unprivileged(conn) && ++ perms.num > quota_nb_perms_per_node) ++ return ENOSPC; ++ ++ permstr = in->buffer + strlen(in->buffer) + 1; + + perms.p = talloc_array(in, struct xs_permissions, perms.num); + if (!perms.p) +@@ -1904,6 +1918,7 @@ static void usage(void) + " -S, --entry-size limit the size of entry per domain, and\n" + " -W, --watch-nb limit the number of watches per domain,\n" + " -t, --transaction limit the number of transaction allowed per domain,\n" ++" -A, --perm-nb limit the number of permissions per node,\n" + " -R, --no-recovery to request that no recovery should be attempted when\n" + " the store is corrupted (debug only),\n" + " -I, --internal-db store database in memory, not on disk\n" +@@ -1924,6 +1939,7 @@ static struct option options[] = { + { "entry-size", 1, NULL, 'S' }, + { "trace-file", 1, NULL, 'T' }, + { "transaction", 1, NULL, 't' }, ++ { "perm-nb", 1, NULL, 'A' }, + { "no-recovery", 0, NULL, 'R' }, + { "internal-db", 0, NULL, 'I' }, + { "verbose", 0, NULL, 'V' }, +@@ -1946,7 +1962,7 @@ int main(int argc, char *argv[]) + int timeout; + + +- while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RVW:", options, ++ while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:T:RVW:", options, + NULL)) != -1) { + switch (opt) { + case 'D': +@@ -1988,6 +2004,9 @@ int main(int argc, char *argv[]) + case 'W': + quota_nb_watch_per_domain = strtol(optarg, NULL, 10); + break; ++ case 'A': ++ quota_nb_perms_per_node = strtol(optarg, NULL, 10); ++ break; + case 'e': + dom0_event = strtol(optarg, NULL, 10); + break; +diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c +index 9fad470f83..dc635e9be3 100644 +--- a/tools/xenstore/xenstored_domain.c ++++ b/tools/xenstore/xenstored_domain.c +@@ -67,8 +67,14 @@ struct domain + /* The connection associated with this. */ + struct connection *conn; + ++ /* Generation count at domain introduction time. */ ++ uint64_t generation; ++ + /* Have we noticed that this domain is shutdown? */ +- int shutdown; ++ bool shutdown; ++ ++ /* Has domain been officially introduced? */ ++ bool introduced; + + /* number of entry from this domain in the store */ + int nbentry; +@@ -188,6 +194,9 @@ static int destroy_domain(void *_domain) + + list_del(&domain->list); + ++ if (!domain->introduced) ++ return 0; ++ + if (domain->port) { + if (xenevtchn_unbind(xce_handle, domain->port) == -1) + eprintf("> Unbinding port %i failed!\n", domain->port); +@@ -209,21 +218,34 @@ static int destroy_domain(void *_domain) + return 0; + } + ++static bool get_domain_info(unsigned int domid, xc_dominfo_t *dominfo) ++{ ++ return xc_domain_getinfo(*xc_handle, domid, 1, dominfo) == 1 && ++ dominfo->domid == domid; ++} ++ + static void domain_cleanup(void) + { + xc_dominfo_t dominfo; + struct domain *domain; + struct connection *conn; + int notify = 0; ++ bool dom_valid; + + again: + list_for_each_entry(domain, &domains, list) { +- if (xc_domain_getinfo(*xc_handle, domain->domid, 1, +- &dominfo) == 1 && +- dominfo.domid == domain->domid) { ++ dom_valid = get_domain_info(domain->domid, &dominfo); ++ if (!domain->introduced) { ++ if (!dom_valid) { ++ talloc_free(domain); ++ goto again; ++ } ++ continue; ++ } ++ if (dom_valid) { + if ((dominfo.crashed || dominfo.shutdown) + && !domain->shutdown) { +- domain->shutdown = 1; ++ domain->shutdown = true; + notify = 1; + } + if (!dominfo.dying) +@@ -289,58 +311,84 @@ static char *talloc_domain_path(void *context, unsigned int domid) + return talloc_asprintf(context, "/local/domain/%u", domid); + } + +-static struct domain *new_domain(void *context, unsigned int domid, +- int port) ++static struct domain *find_domain_struct(unsigned int domid) ++{ ++ struct domain *i; ++ ++ list_for_each_entry(i, &domains, list) { ++ if (i->domid == domid) ++ return i; ++ } ++ return NULL; ++} ++ ++static struct domain *alloc_domain(void *context, unsigned int domid) + { + struct domain *domain; +- int rc; + + domain = talloc(context, struct domain); +- if (!domain) ++ if (!domain) { ++ errno = ENOMEM; + return NULL; ++ } + +- domain->port = 0; +- domain->shutdown = 0; + domain->domid = domid; +- domain->path = talloc_domain_path(domain, domid); +- if (!domain->path) +- return NULL; ++ domain->generation = generation; ++ domain->introduced = false; + +- wrl_domain_new(domain); ++ talloc_set_destructor(domain, destroy_domain); + + list_add(&domain->list, &domains); +- talloc_set_destructor(domain, destroy_domain); ++ ++ return domain; ++} ++ ++static int new_domain(struct domain *domain, int port) ++{ ++ int rc; ++ ++ domain->port = 0; ++ domain->shutdown = false; ++ domain->path = talloc_domain_path(domain, domain->domid); ++ if (!domain->path) { ++ errno = ENOMEM; ++ return errno; ++ } ++ ++ wrl_domain_new(domain); + + /* Tell kernel we're interested in this event. */ +- rc = xenevtchn_bind_interdomain(xce_handle, domid, port); ++ rc = xenevtchn_bind_interdomain(xce_handle, domain->domid, port); + if (rc == -1) +- return NULL; ++ return errno; + domain->port = rc; + ++ domain->introduced = true; ++ + domain->conn = new_connection(writechn, readchn); +- if (!domain->conn) +- return NULL; ++ if (!domain->conn) { ++ errno = ENOMEM; ++ return errno; ++ } + + domain->conn->domain = domain; +- domain->conn->id = domid; ++ domain->conn->id = domain->domid; + + domain->remote_port = port; + domain->nbentry = 0; + domain->nbwatch = 0; + +- return domain; ++ return 0; + } + + + static struct domain *find_domain_by_domid(unsigned int domid) + { +- struct domain *i; ++ struct domain *d; + +- list_for_each_entry(i, &domains, list) { +- if (i->domid == domid) +- return i; +- } +- return NULL; ++ d = find_domain_struct(domid); ++ ++ return (d && d->introduced) ? d : NULL; + } + + static void domain_conn_reset(struct domain *domain) +@@ -386,15 +434,21 @@ int do_introduce(struct connection *conn, struct buffered_data *in) + if (port <= 0) + return EINVAL; + +- domain = find_domain_by_domid(domid); ++ domain = find_domain_struct(domid); + + if (domain == NULL) { ++ /* Hang domain off "in" until we're finished. */ ++ domain = alloc_domain(in, domid); ++ if (domain == NULL) ++ return ENOMEM; ++ } ++ ++ if (!domain->introduced) { + interface = map_interface(domid); + if (!interface) + return errno; + /* Hang domain off "in" until we're finished. */ +- domain = new_domain(in, domid, port); +- if (!domain) { ++ if (new_domain(domain, port)) { + rc = errno; + unmap_interface(interface); + return rc; +@@ -503,8 +557,8 @@ int do_resume(struct connection *conn, struct buffered_data *in) + if (IS_ERR(domain)) + return -PTR_ERR(domain); + +- domain->shutdown = 0; +- ++ domain->shutdown = false; ++ + send_ack(conn, XS_RESUME); + + return 0; +@@ -647,8 +701,10 @@ static int dom0_init(void) + if (port == -1) + return -1; + +- dom0 = new_domain(NULL, xenbus_master_domid(), port); +- if (dom0 == NULL) ++ dom0 = alloc_domain(NULL, xenbus_master_domid()); ++ if (!dom0) ++ return -1; ++ if (new_domain(dom0, port)) + return -1; + + dom0->interface = xenbus_map(); +@@ -729,6 +785,66 @@ void domain_entry_inc(struct connection *conn, struct node *node) + } + } + ++/* ++ * Check whether a domain was created before or after a specific generation ++ * count (used for testing whether a node permission is older than a domain). ++ * ++ * Return values: ++ * -1: error ++ * 0: domain has higher generation count (it is younger than a node with the ++ * given count), or domain isn't existing any longer ++ * 1: domain is older than the node ++ */ ++static int chk_domain_generation(unsigned int domid, uint64_t gen) ++{ ++ struct domain *d; ++ xc_dominfo_t dominfo; ++ ++ if (!xc_handle && domid == 0) ++ return 1; ++ ++ d = find_domain_struct(domid); ++ if (d) ++ return (d->generation <= gen) ? 1 : 0; ++ ++ if (!get_domain_info(domid, &dominfo)) ++ return 0; ++ ++ d = alloc_domain(NULL, domid); ++ return d ? 1 : -1; ++} ++ ++/* ++ * Remove permissions for no longer existing domains in order to avoid a new ++ * domain with the same domid inheriting the permissions. ++ */ ++int domain_adjust_node_perms(struct node *node) ++{ ++ unsigned int i; ++ int ret; ++ ++ ret = chk_domain_generation(node->perms.p[0].id, node->generation); ++ if (ret < 0) ++ return errno; ++ ++ /* If the owner doesn't exist any longer give it to priv domain. */ ++ if (!ret) ++ node->perms.p[0].id = priv_domid; ++ ++ for (i = 1; i < node->perms.num; i++) { ++ if (node->perms.p[i].perms & XS_PERM_IGNORE) ++ continue; ++ ret = chk_domain_generation(node->perms.p[i].id, ++ node->generation); ++ if (ret < 0) ++ return errno; ++ if (!ret) ++ node->perms.p[i].perms |= XS_PERM_IGNORE; ++ } ++ ++ return 0; ++} ++ + void domain_entry_dec(struct connection *conn, struct node *node) + { + struct domain *d; +diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h +index 259183962a..5e00087206 100644 +--- a/tools/xenstore/xenstored_domain.h ++++ b/tools/xenstore/xenstored_domain.h +@@ -56,6 +56,9 @@ bool domain_can_write(struct connection *conn); + + bool domain_is_unprivileged(struct connection *conn); + ++/* Remove node permissions for no longer existing domains. */ ++int domain_adjust_node_perms(struct node *node); ++ + /* Quota manipulation */ + void domain_entry_inc(struct connection *conn, struct node *); + void domain_entry_dec(struct connection *conn, struct node *); +diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c +index a7d8c5d475..2881f3b2e4 100644 +--- a/tools/xenstore/xenstored_transaction.c ++++ b/tools/xenstore/xenstored_transaction.c +@@ -47,7 +47,12 @@ + * transaction. + * Each time the global generation count is copied to either a node or a + * transaction it is incremented. This ensures all nodes and/or transactions +- * are having a unique generation count. ++ * are having a unique generation count. The increment is done _before_ the ++ * copy as that is needed for checking whether a domain was created before ++ * or after a node has been written (the domain's generation is set with the ++ * actual generation count without incrementing it, in order to support ++ * writing a node for a domain before the domain has been officially ++ * introduced). + * + * Transaction conflicts are detected by checking the generation count of all + * nodes read in the transaction to match with the generation count in the +@@ -161,7 +166,7 @@ struct transaction + }; + + extern int quota_max_transaction; +-static uint64_t generation; ++uint64_t generation; + + static void set_tdb_key(const char *name, TDB_DATA *key) + { +@@ -237,7 +242,7 @@ int access_node(struct connection *conn, struct node *node, + bool introduce = false; + + if (type != NODE_ACCESS_READ) { +- node->generation = generation++; ++ node->generation = ++generation; + if (conn && !conn->transaction) + wrl_apply_debit_direct(conn); + } +@@ -374,7 +379,7 @@ static int finalize_transaction(struct connection *conn, + if (!data.dptr) + goto err; + hdr = (void *)data.dptr; +- hdr->generation = generation++; ++ hdr->generation = ++generation; + ret = tdb_store(tdb_ctx, key, data, + TDB_REPLACE); + talloc_free(data.dptr); +@@ -462,7 +467,7 @@ int do_transaction_start(struct connection *conn, struct buffered_data *in) + INIT_LIST_HEAD(&trans->accessed); + INIT_LIST_HEAD(&trans->changed_domains); + trans->fail = false; +- trans->generation = generation++; ++ trans->generation = ++generation; + + /* Pick an unused transaction identifier. */ + do { +diff --git a/tools/xenstore/xenstored_transaction.h b/tools/xenstore/xenstored_transaction.h +index 3386bac565..43a162bea3 100644 +--- a/tools/xenstore/xenstored_transaction.h ++++ b/tools/xenstore/xenstored_transaction.h +@@ -27,6 +27,8 @@ enum node_access_type { + + struct transaction; + ++extern uint64_t generation; ++ + int do_transaction_start(struct connection *conn, struct buffered_data *node); + int do_transaction_end(struct connection *conn, struct buffered_data *in); + +diff --git a/tools/xenstore/xs_lib.c b/tools/xenstore/xs_lib.c +index 3e43f8809d..d407d5713a 100644 +--- a/tools/xenstore/xs_lib.c ++++ b/tools/xenstore/xs_lib.c +@@ -152,7 +152,7 @@ bool xs_strings_to_perms(struct xs_permissions *perms, unsigned int num, + bool xs_perm_to_string(const struct xs_permissions *perm, + char *buffer, size_t buf_len) + { +- switch ((int)perm->perms) { ++ switch ((int)perm->perms & ~XS_PERM_IGNORE) { + case XS_PERM_WRITE: + *buffer = 'w'; + break; diff --git a/xsa322-o.patch b/xsa322-o.patch new file mode 100644 index 0000000..75f7c20 --- /dev/null +++ b/xsa322-o.patch @@ -0,0 +1,110 @@ +From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= +Subject: tools/ocaml/xenstored: clean up permissions for dead domains +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +domain ids are prone to wrapping (15-bits), and with sufficient number +of VMs in a reboot loop it is possible to trigger it. Xenstore entries +may linger after a domain dies, until a toolstack cleans it up. During +this time there is a window where a wrapped domid could access these +xenstore keys (that belonged to another VM). + +To prevent this do a cleanup when a domain dies: + * walk the entire xenstore tree and update permissions for all nodes + * if the dead domain had an ACL entry: remove it + * if the dead domain was the owner: change the owner to Dom0 + +This is done without quota checks or a transaction. Quota checks would +be a no-op (either the domain is dead, or it is Dom0 where they are not +enforced). Transactions are not needed, because this is all done +atomically by oxenstored's single thread. + +The xenstore entries owned by the dead domain are not deleted, because +that could confuse a toolstack / backends that are still bound to it +(or generate unexpected watch events). It is the responsibility of a +toolstack to remove the xenstore entries themselves. + +This is part of XSA-322. + +Signed-off-by: Edwin Török +Acked-by: Christian Lindig + +diff --git a/tools/ocaml/xenstored/perms.ml b/tools/ocaml/xenstored/perms.ml +index ee7fee6bda..e8a16221f8 100644 +--- a/tools/ocaml/xenstored/perms.ml ++++ b/tools/ocaml/xenstored/perms.ml +@@ -58,6 +58,15 @@ let get_other perms = perms.other + let get_acl perms = perms.acl + let get_owner perm = perm.owner + ++(** [remote_domid ~domid perm] removes all ACLs for [domid] from perm. ++* If [domid] was the owner then it is changed to Dom0. ++* This is used for cleaning up after dead domains. ++* *) ++let remove_domid ~domid perm = ++ let acl = List.filter (fun (acl_domid, _) -> acl_domid <> domid) perm.acl in ++ let owner = if perm.owner = domid then 0 else perm.owner in ++ { perm with acl; owner } ++ + let default0 = create 0 NONE [] + + let perm_of_string s = +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index f99b9e935c..73e04cc18b 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -443,6 +443,7 @@ let do_release con t domains cons data = + let fire_spec_watches = Domains.exist domains domid in + Domains.del domains domid; + Connections.del_domain cons domid; ++ Store.reset_permissions (Transaction.get_store t) domid; + if fire_spec_watches + then Connections.fire_spec_watches (Transaction.get_root t) cons Store.Path.release_domain + else raise Invalid_Cmd_Args +diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml +index 6b6e440e98..3b05128f1b 100644 +--- a/tools/ocaml/xenstored/store.ml ++++ b/tools/ocaml/xenstored/store.ml +@@ -89,6 +89,13 @@ let check_owner node connection = + + let rec recurse fct node = fct node; List.iter (recurse fct) node.children + ++(** [recurse_map f tree] applies [f] on each node in the tree recursively *) ++let recurse_map f = ++ let rec walk node = ++ f { node with children = List.rev_map walk node.children |> List.rev } ++ in ++ walk ++ + let unpack node = (Symbol.to_string node.name, node.perms, node.value) + + end +@@ -405,6 +412,15 @@ let setperms store perm path nperms = + Quota.del_entry store.quota old_owner; + Quota.add_entry store.quota new_owner + ++let reset_permissions store domid = ++ Logging.info "store|node" "Cleaning up xenstore ACLs for domid %d" domid; ++ store.root <- Node.recurse_map (fun node -> ++ let perms = Perms.Node.remove_domid ~domid node.perms in ++ if perms <> node.perms then ++ Logging.debug "store|node" "Changed permissions for node %s" (Node.get_name node); ++ { node with perms } ++ ) store.root ++ + type ops = { + store: t; + write: Path.t -> string -> unit; +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 0d355bbcb8..ff9fbbbac2 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -336,6 +336,7 @@ let _ = + finally (fun () -> + if Some port = eventchn.Event.virq_port then ( + let (notify, deaddom) = Domains.cleanup domains in ++ List.iter (Store.reset_permissions store) deaddom; + List.iter (Connections.del_domain cons) deaddom; + if deaddom <> [] || notify then + Connections.fire_spec_watches diff --git a/xsa323.patch b/xsa323.patch new file mode 100644 index 0000000..aadf5c7 --- /dev/null +++ b/xsa323.patch @@ -0,0 +1,140 @@ +From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= +Subject: tools/ocaml/xenstored: Fix path length validation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Currently, oxenstored checks the length of paths against 1024, then +prepends "/local/domain/$DOMID/" to relative paths. This allows a domU +to create paths which can't subsequently be read by anyone, even dom0. +This also interferes with listing directories, etc. + +Define a new oxenstored.conf entry: quota-path-max, defaulting to 1024 +as before. For paths that begin with "/local/domain/$DOMID/" check the +relative path length against this quota. For all other paths check the +entire path length. + +This ensures that if the domid changes (and thus the length of a prefix +changes) a path that used to be valid stays valid (e.g. after a +live-migration). It also ensures that regardless how the client tries +to access a path (domid-relative or absolute) it will get consistent +results, since the limit is always applied on the final canonicalized +path. + +Delete the unused Domain.get_path to avoid it being confused with +Connection.get_path (which differs by a trailing slash only). + +Rewrite Util.path_validate to apply the appropriate length restriction +based on whether the path is relative or not. Remove the check for +connection_path being absolute, because it is not guest controlled data. + +This is part of XSA-323. + +Signed-off-by: Andrew Cooper +Signed-off-by: Edwin Török +Acked-by: Christian Lindig + +diff --git a/tools/ocaml/libs/xb/partial.ml b/tools/ocaml/libs/xb/partial.ml +index d4d1c7bdec..b6e2a716e2 100644 +--- a/tools/ocaml/libs/xb/partial.ml ++++ b/tools/ocaml/libs/xb/partial.ml +@@ -28,6 +28,7 @@ external header_of_string_internal: string -> int * int * int * int + = "stub_header_of_string" + + let xenstore_payload_max = 4096 (* xen/include/public/io/xs_wire.h *) ++let xenstore_rel_path_max = 2048 (* xen/include/public/io/xs_wire.h *) + + let of_string s = + let tid, rid, opint, dlen = header_of_string_internal s in +diff --git a/tools/ocaml/libs/xb/partial.mli b/tools/ocaml/libs/xb/partial.mli +index 359a75e88d..b9216018f5 100644 +--- a/tools/ocaml/libs/xb/partial.mli ++++ b/tools/ocaml/libs/xb/partial.mli +@@ -9,6 +9,7 @@ external header_size : unit -> int = "stub_header_size" + external header_of_string_internal : string -> int * int * int * int + = "stub_header_of_string" + val xenstore_payload_max : int ++val xenstore_rel_path_max : int + val of_string : string -> pkt + val append : pkt -> string -> int -> unit + val to_complete : pkt -> int +diff --git a/tools/ocaml/xenstored/define.ml b/tools/ocaml/xenstored/define.ml +index ea9e1b7620..ebe18b8e31 100644 +--- a/tools/ocaml/xenstored/define.ml ++++ b/tools/ocaml/xenstored/define.ml +@@ -31,6 +31,8 @@ let conflict_rate_limit_is_aggregate = ref true + + let domid_self = 0x7FF0 + ++let path_max = ref Xenbus.Partial.xenstore_rel_path_max ++ + exception Not_a_directory of string + exception Not_a_value of string + exception Already_exist +diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml +index aeb185ff7e..81cb59b8f1 100644 +--- a/tools/ocaml/xenstored/domain.ml ++++ b/tools/ocaml/xenstored/domain.ml +@@ -38,7 +38,6 @@ type t = + } + + let is_dom0 d = d.id = 0 +-let get_path dom = "/local/domain/" ^ (sprintf "%u" dom.id) + let get_id domain = domain.id + let get_interface d = d.interface + let get_mfn d = d.mfn +diff --git a/tools/ocaml/xenstored/oxenstored.conf.in b/tools/ocaml/xenstored/oxenstored.conf.in +index f843482981..4ae48e42d4 100644 +--- a/tools/ocaml/xenstored/oxenstored.conf.in ++++ b/tools/ocaml/xenstored/oxenstored.conf.in +@@ -61,6 +61,7 @@ quota-maxsize = 2048 + quota-maxwatch = 100 + quota-transaction = 10 + quota-maxrequests = 1024 ++quota-path-max = 1024 + + # Activate filed base backend + persistent = false +diff --git a/tools/ocaml/xenstored/utils.ml b/tools/ocaml/xenstored/utils.ml +index e8c9fe4e94..eb79bf0146 100644 +--- a/tools/ocaml/xenstored/utils.ml ++++ b/tools/ocaml/xenstored/utils.ml +@@ -93,7 +93,7 @@ let read_file_single_integer filename = + let path_validate path connection_path = + let len = String.length path in + +- if len = 0 || len > 1024 then raise Define.Invalid_path; ++ if len = 0 then raise Define.Invalid_path; + + let abs_path = + match String.get path 0 with +@@ -101,4 +101,17 @@ let path_validate path connection_path = + | _ -> connection_path ^ path + in + ++ (* Regardless whether client specified absolute or relative path, ++ canonicalize it (above) and, for domain-relative paths, check the ++ length of the relative part. ++ ++ This prevents paths becoming invalid across migrate when the length ++ of the domid changes in @param connection_path. ++ *) ++ let len = String.length abs_path in ++ let on_absolute _ _ = len in ++ let on_relative _ offset = len - offset in ++ let len = Scanf.ksscanf abs_path on_absolute "/local/domain/%d/%n" on_relative in ++ if len > !Define.path_max then raise Define.Invalid_path; ++ + abs_path +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index ff9fbbbac2..39d6d767e4 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -102,6 +102,7 @@ let parse_config filename = + ("quota-maxentity", Config.Set_int Quota.maxent); + ("quota-maxsize", Config.Set_int Quota.maxsize); + ("quota-maxrequests", Config.Set_int Define.maxrequests); ++ ("quota-path-max", Config.Set_int Define.path_max); + ("test-eagain", Config.Set_bool Transaction.test_eagain); + ("persistent", Config.Set_bool Disk.enable); + ("xenstored-log-file", Config.String Logging.set_xenstored_log_destination); diff --git a/xsa324.patch b/xsa324.patch new file mode 100644 index 0000000..c5e542d --- /dev/null +++ b/xsa324.patch @@ -0,0 +1,48 @@ +From: Juergen Gross +Subject: tools/xenstore: drop watch event messages exceeding maximum size + +By setting a watch with a very large tag it is possible to trick +xenstored to send watch event messages exceeding the maximum allowed +payload size. This might in turn lead to a crash of xenstored as the +resulting error can cause dereferencing a NULL pointer in case there +is no active request being handled by the guest the watch event is +being sent to. + +Fix that by just dropping such watch events. Additionally modify the +error handling to test the pointer to be not NULL before dereferencing +it. + +This is XSA-324. + +Signed-off-by: Juergen Gross +Acked-by: Julien Grall + +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index 33f95dcf3c..3d74dbbb40 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -674,6 +674,9 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type, + /* Replies reuse the request buffer, events need a new one. */ + if (type != XS_WATCH_EVENT) { + bdata = conn->in; ++ /* Drop asynchronous responses, e.g. errors for watch events. */ ++ if (!bdata) ++ return; + bdata->inhdr = true; + bdata->used = 0; + conn->in = NULL; +diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c +index 71c108ea99..9ff20690c0 100644 +--- a/tools/xenstore/xenstored_watch.c ++++ b/tools/xenstore/xenstored_watch.c +@@ -92,6 +92,10 @@ static void add_event(struct connection *conn, + } + + len = strlen(name) + 1 + strlen(watch->token) + 1; ++ /* Don't try to send over-long events. */ ++ if (len > XENSTORE_PAYLOAD_MAX) ++ return; ++ + data = talloc_array(ctx, char, len); + if (!data) + return; diff --git a/xsa325-4.14.patch b/xsa325-4.14.patch new file mode 100644 index 0000000..a17f546 --- /dev/null +++ b/xsa325-4.14.patch @@ -0,0 +1,192 @@ +From: Harsha Shamsundara Havanur +Subject: tools/xenstore: Preserve bad client until they are destroyed + +XenStored will kill any connection that it thinks has misbehaved, +this is currently happening in two places: + * In `handle_input()` if the sanity check on the ring and the message + fails. + * In `handle_output()` when failing to write the response in the ring. + +As the domain structure is a child of the connection, XenStored will +destroy its view of the domain when killing the connection. This will +result in sending @releaseDomain event to all the watchers. + +As the watch event doesn't carry which domain has been released, +the watcher (such as XenStored) will generally go through the list of +domains registers and check if one of them is shutting down/dying. +In the case of a client misbehaving, the domain will likely to be +running, so no action will be performed. + +When the domain is effectively destroyed, XenStored will not be aware of +the domain anymore. So the watch event is not going to be sent. +By consequence, the watchers of the event will not release mappings +they may have on the domain. This will result in a zombie domain. + +In order to send @releaseDomain event at the correct time, we want +to keep the domain structure until the domain is effectively +shutting-down/dying. + +We also want to keep the connection around so we could possibly revive +the connection in the future. + +A new flag 'is_ignored' is added to mark whether a connection should be +ignored when checking if there are work to do. Additionally any +transactions, watches, buffers associated to the connection will be +freed as you can't do much with them (restarting the connection will +likely need a reset). + +As a side note, when the device model were running in a stubdomain, a +guest would have been able to introduce a use-after-free because there +is two parents for a guest connection. + +This is XSA-325. + +Reported-by: Pawel Wieczorkiewicz +Signed-off-by: Harsha Shamsundara Havanur +Signed-off-by: Julien Grall +Reviewed-by: Juergen Gross +Reviewed-by: Paul Durrant + +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index af3d17004b3f..27d8f15b6b76 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -1355,6 +1355,32 @@ static struct { + [XS_DIRECTORY_PART] = { "DIRECTORY_PART", send_directory_part }, + }; + ++/* ++ * Keep the connection alive but stop processing any new request or sending ++ * reponse. This is to allow sending @releaseDomain watch event at the correct ++ * moment and/or to allow the connection to restart (not yet implemented). ++ * ++ * All watches, transactions, buffers will be freed. ++ */ ++static void ignore_connection(struct connection *conn) ++{ ++ struct buffered_data *out, *tmp; ++ ++ trace("CONN %p ignored\n", conn); ++ ++ conn->is_ignored = true; ++ conn_delete_all_watches(conn); ++ conn_delete_all_transactions(conn); ++ ++ list_for_each_entry_safe(out, tmp, &conn->out_list, list) { ++ list_del(&out->list); ++ talloc_free(out); ++ } ++ ++ talloc_free(conn->in); ++ conn->in = NULL; ++} ++ + static const char *sockmsg_string(enum xsd_sockmsg_type type) + { + if ((unsigned int)type < ARRAY_SIZE(wire_funcs) && wire_funcs[type].str) +@@ -1413,8 +1439,10 @@ static void consider_message(struct connection *conn) + assert(conn->in == NULL); + } + +-/* Errors in reading or allocating here mean we get out of sync, so we +- * drop the whole client connection. */ ++/* ++ * Errors in reading or allocating here means we get out of sync, so we mark ++ * the connection as ignored. ++ */ + static void handle_input(struct connection *conn) + { + int bytes; +@@ -1471,14 +1499,14 @@ static void handle_input(struct connection *conn) + return; + + bad_client: +- /* Kill it. */ +- talloc_free(conn); ++ ignore_connection(conn); + } + + static void handle_output(struct connection *conn) + { ++ /* Ignore the connection if an error occured */ + if (!write_messages(conn)) +- talloc_free(conn); ++ ignore_connection(conn); + } + + struct connection *new_connection(connwritefn_t *write, connreadfn_t *read) +@@ -1494,6 +1522,7 @@ struct connection *new_connection(connwritefn_t *write, connreadfn_t *read) + new->write = write; + new->read = read; + new->can_write = true; ++ new->is_ignored = false; + new->transaction_started = 0; + INIT_LIST_HEAD(&new->out_list); + INIT_LIST_HEAD(&new->watches); +@@ -2186,8 +2215,9 @@ int main(int argc, char *argv[]) + if (fds[conn->pollfd_idx].revents + & ~(POLLIN|POLLOUT)) + talloc_free(conn); +- else if (fds[conn->pollfd_idx].revents +- & POLLIN) ++ else if ((fds[conn->pollfd_idx].revents ++ & POLLIN) && ++ !conn->is_ignored) + handle_input(conn); + } + if (talloc_free(conn) == 0) +@@ -2199,8 +2229,9 @@ int main(int argc, char *argv[]) + if (fds[conn->pollfd_idx].revents + & ~(POLLIN|POLLOUT)) + talloc_free(conn); +- else if (fds[conn->pollfd_idx].revents +- & POLLOUT) ++ else if ((fds[conn->pollfd_idx].revents ++ & POLLOUT) && ++ !conn->is_ignored) + handle_output(conn); + } + if (talloc_free(conn) == 0) +diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h +index eb19b71f5f46..196a6fd2b0be 100644 +--- a/tools/xenstore/xenstored_core.h ++++ b/tools/xenstore/xenstored_core.h +@@ -80,6 +80,9 @@ struct connection + /* Is this a read-only connection? */ + bool can_write; + ++ /* Is this connection ignored? */ ++ bool is_ignored; ++ + /* Buffered incoming data. */ + struct buffered_data *in; + +diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c +index dc635e9be30c..d5e1e3e9d42d 100644 +--- a/tools/xenstore/xenstored_domain.c ++++ b/tools/xenstore/xenstored_domain.c +@@ -286,6 +286,10 @@ bool domain_can_read(struct connection *conn) + + if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0) + return false; ++ ++ if (conn->is_ignored) ++ return false; ++ + return (intf->req_cons != intf->req_prod); + } + +@@ -303,6 +307,10 @@ bool domain_is_unprivileged(struct connection *conn) + bool domain_can_write(struct connection *conn) + { + struct xenstore_domain_interface *intf = conn->domain->interface; ++ ++ if (conn->is_ignored) ++ return false; ++ + return ((intf->rsp_prod - intf->rsp_cons) != XENSTORE_RING_SIZE); + } + +-- +2.17.1 + diff --git a/xsa330.patch b/xsa330.patch new file mode 100644 index 0000000..c834516 --- /dev/null +++ b/xsa330.patch @@ -0,0 +1,66 @@ +From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= +Subject: tools/ocaml/xenstored: delete watch from trie too when resetting + watches +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +c/s f8c72b526129 "oxenstored: implement XS_RESET_WATCHES" from Xen 4.6 +introduced reset watches support in oxenstored by mirroring the change +in cxenstored. + +However the OCaml version has some additional data structures to +optimize watch firing, and just resetting the watches in one of the data +structures creates a security bug where a malicious guest kernel can +exceed its watch quota, driving oxenstored into OOM: + * create watches + * reset watches (this still keeps the watches lingering in another data + structure, using memory) + * create some more watches + * loop until oxenstored dies + +The guest kernel doesn't necessarily have to be malicious to trigger +this: + * if control/platform-feature-xs_reset_watches is set + * the guest kexecs (e.g. because it crashes) + * on boot more watches are set up + * this will slowly "leak" memory for watches in oxenstored, driving it + towards OOM. + +This is XSA-330. + +Fixes: f8c72b526129 ("oxenstored: implement XS_RESET_WATCHES") +Signed-off-by: Edwin Török +Acked-by: Christian Lindig +Reviewed-by: Andrew Cooper + +diff --git a/tools/ocaml/xenstored/connections.ml b/tools/ocaml/xenstored/connections.ml +index 9f9f7ee2f0..6ee3552ec2 100644 +--- a/tools/ocaml/xenstored/connections.ml ++++ b/tools/ocaml/xenstored/connections.ml +@@ -134,6 +134,10 @@ let del_watch cons con path token = + cons.watches <- Trie.set cons.watches key watches; + watch + ++let del_watches cons con = ++ Connection.del_watches con; ++ cons.watches <- Trie.map (del_watches_of_con con) cons.watches ++ + (* path is absolute *) + let fire_watches ?oldroot root cons path recurse = + let key = key_of_path path in +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 73e04cc18b..437d2dcf9e 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -179,8 +179,8 @@ let do_isintroduced con _t domains _cons data = + if domid = Define.domid_self || Domains.exist domains domid then "T\000" else "F\000" + + (* only in xen >= 4.2 *) +-let do_reset_watches con _t _domains _cons _data = +- Connection.del_watches con; ++let do_reset_watches con _t _domains cons _data = ++ Connections.del_watches cons con; + Connection.del_transactions con + + (* only in >= xen3.3 *) diff --git a/xsa348-1.patch b/xsa348-1.patch new file mode 100644 index 0000000..721c047 --- /dev/null +++ b/xsa348-1.patch @@ -0,0 +1,113 @@ +From: Jan Beulich +Subject: x86: replace reset_stack_and_jump_nolp() + +Move the necessary check into check_for_livepatch_work(), rather than +mostly duplicating reset_stack_and_jump() for this purpose. This is to +prevent an inflation of reset_stack_and_jump() flavors. + +Signed-off-by: Jan Beulich +Reviewed-by: Juergen Gross +--- +Of course instead of adding the check right into +check_for_livepatch_work(), a wrapper could be introduced. + +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -192,7 +192,7 @@ static void noreturn continue_idle_domai + { + /* Idle vcpus might be attached to non-idle units! */ + if ( !is_idle_domain(v->sched_unit->domain) ) +- reset_stack_and_jump_nolp(guest_idle_loop); ++ reset_stack_and_jump(guest_idle_loop); + + reset_stack_and_jump(idle_loop); + } +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -1036,7 +1036,7 @@ static void noreturn svm_do_resume(struc + + hvm_do_resume(v); + +- reset_stack_and_jump_nolp(svm_asm_do_resume); ++ reset_stack_and_jump(svm_asm_do_resume); + } + + void svm_vmenter_helper(const struct cpu_user_regs *regs) +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -1909,7 +1909,7 @@ void vmx_do_resume(struct vcpu *v) + if ( host_cr4 != read_cr4() ) + __vmwrite(HOST_CR4, read_cr4()); + +- reset_stack_and_jump_nolp(vmx_asm_do_vmentry); ++ reset_stack_and_jump(vmx_asm_do_vmentry); + } + + static inline unsigned long vmr(unsigned long field) +--- a/xen/arch/x86/pv/domain.c ++++ b/xen/arch/x86/pv/domain.c +@@ -113,7 +113,7 @@ static int parse_pcid(const char *s) + static void noreturn continue_nonidle_domain(struct vcpu *v) + { + check_wakeup_from_wait(); +- reset_stack_and_jump_nolp(ret_from_intr); ++ reset_stack_and_jump(ret_from_intr); + } + + static int setup_compat_l4(struct vcpu *v) +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -676,7 +676,7 @@ static void __init noreturn reinit_bsp_s + asm volatile ("setssbsy" ::: "memory"); + } + +- reset_stack_and_jump_nolp(init_done); ++ reset_stack_and_jump(init_done); + } + + /* +--- a/xen/common/livepatch.c ++++ b/xen/common/livepatch.c +@@ -1635,6 +1635,11 @@ void check_for_livepatch_work(void) + s_time_t timeout; + unsigned long flags; + ++ /* Only do any work when invoked in truly idle state. */ ++ if ( system_state != SYS_STATE_active || ++ !is_idle_domain(current->sched_unit->domain) ) ++ return; ++ + /* Fast path: no work to do. */ + if ( !per_cpu(work_to_do, cpu ) ) + return; +--- a/xen/include/asm-x86/current.h ++++ b/xen/include/asm-x86/current.h +@@ -155,13 +155,13 @@ unsigned long get_stack_dump_bottom (uns + # define SHADOW_STACK_WORK "" + #endif + +-#define switch_stack_and_jump(fn, instr) \ ++#define reset_stack_and_jump(fn) \ + ({ \ + unsigned int tmp; \ + __asm__ __volatile__ ( \ + SHADOW_STACK_WORK \ + "mov %[stk], %%rsp;" \ +- instr \ ++ CHECK_FOR_LIVEPATCH_WORK \ + "jmp %c[fun];" \ + : [val] "=&r" (tmp), \ + [ssp] "=&r" (tmp) \ +@@ -176,12 +176,6 @@ unsigned long get_stack_dump_bottom (uns + unreachable(); \ + }) + +-#define reset_stack_and_jump(fn) \ +- switch_stack_and_jump(fn, CHECK_FOR_LIVEPATCH_WORK) +- +-#define reset_stack_and_jump_nolp(fn) \ +- switch_stack_and_jump(fn, "") +- + /* + * Which VCPU's state is currently running on each CPU? + * This is not necesasrily the same as 'current' as a CPU may be diff --git a/xsa348-2.patch b/xsa348-2.patch new file mode 100644 index 0000000..a8106bf --- /dev/null +++ b/xsa348-2.patch @@ -0,0 +1,85 @@ +From: Jan Beulich +Subject: x86: fold guest_idle_loop() into idle_loop() + +The latter can easily be made cover both cases. This is in preparation +of using idle_loop directly for populating idle_csw.tail. + +Take the liberty and also adjust indentation / spacing in involved code. + +Signed-off-by: Jan Beulich +Reviewed-by: Juergen Gross + +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -133,14 +133,22 @@ void play_dead(void) + static void idle_loop(void) + { + unsigned int cpu = smp_processor_id(); ++ /* ++ * Idle vcpus might be attached to non-idle units! We don't do any ++ * standard idle work like tasklets or livepatching in this case. ++ */ ++ bool guest = !is_idle_domain(current->sched_unit->domain); + + for ( ; ; ) + { + if ( cpu_is_offline(cpu) ) ++ { ++ ASSERT(!guest); + play_dead(); ++ } + + /* Are we here for running vcpu context tasklets, or for idling? */ +- if ( unlikely(tasklet_work_to_do(cpu)) ) ++ if ( !guest && unlikely(tasklet_work_to_do(cpu)) ) + { + do_tasklet(); + /* Livepatch work is always kicked off via a tasklet. */ +@@ -151,28 +159,14 @@ static void idle_loop(void) + * and then, after it is done, whether softirqs became pending + * while we were scrubbing. + */ +- else if ( !softirq_pending(cpu) && !scrub_free_pages() && +- !softirq_pending(cpu) ) +- pm_idle(); +- do_softirq(); +- } +-} +- +-/* +- * Idle loop for siblings in active schedule units. +- * We don't do any standard idle work like tasklets or livepatching. +- */ +-static void guest_idle_loop(void) +-{ +- unsigned int cpu = smp_processor_id(); +- +- for ( ; ; ) +- { +- ASSERT(!cpu_is_offline(cpu)); +- +- if ( !softirq_pending(cpu) && !scrub_free_pages() && +- !softirq_pending(cpu)) +- sched_guest_idle(pm_idle, cpu); ++ else if ( !softirq_pending(cpu) && !scrub_free_pages() && ++ !softirq_pending(cpu) ) ++ { ++ if ( guest ) ++ sched_guest_idle(pm_idle, cpu); ++ else ++ pm_idle(); ++ } + do_softirq(); + } + } +@@ -190,10 +184,6 @@ void startup_cpu_idle_loop(void) + + static void noreturn continue_idle_domain(struct vcpu *v) + { +- /* Idle vcpus might be attached to non-idle units! */ +- if ( !is_idle_domain(v->sched_unit->domain) ) +- reset_stack_and_jump(guest_idle_loop); +- + reset_stack_and_jump(idle_loop); + } + diff --git a/xsa348-3.patch b/xsa348-3.patch new file mode 100644 index 0000000..43b8bbc --- /dev/null +++ b/xsa348-3.patch @@ -0,0 +1,174 @@ +From: Jan Beulich +Subject: x86: avoid calling {svm,vmx}_do_resume() + +These functions follow the following path: hvm_do_resume() -> +handle_hvm_io_completion() -> hvm_wait_for_io() -> +wait_on_xen_event_channel() -> do_softirq() -> schedule() -> +sched_context_switch() -> continue_running() and hence may +recursively invoke themselves. If this ends up happening a couple of +times, a stack overflow would result. + +Prevent this by also resetting the stack at the +->arch.ctxt_switch->tail() invocations (in both places for consistency) +and thus jumping to the functions instead of calling them. + +This is XSA-348 / CVE-2020-29566. + +Reported-by: Julien Grall +Signed-off-by: Jan Beulich +Reviewed-by: Juergen Gross +--- +v2: Fix LIVEPATCH builds crashing. + +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -130,7 +130,7 @@ void play_dead(void) + dead_idle(); + } + +-static void idle_loop(void) ++static void noreturn idle_loop(void) + { + unsigned int cpu = smp_processor_id(); + /* +@@ -182,11 +182,6 @@ void startup_cpu_idle_loop(void) + reset_stack_and_jump(idle_loop); + } + +-static void noreturn continue_idle_domain(struct vcpu *v) +-{ +- reset_stack_and_jump(idle_loop); +-} +- + void init_hypercall_page(struct domain *d, void *ptr) + { + memset(ptr, 0xcc, PAGE_SIZE); +@@ -710,7 +705,7 @@ int arch_domain_create(struct domain *d, + static const struct arch_csw idle_csw = { + .from = paravirt_ctxt_switch_from, + .to = paravirt_ctxt_switch_to, +- .tail = continue_idle_domain, ++ .tail = idle_loop, + }; + + d->arch.ctxt_switch = &idle_csw; +@@ -2047,20 +2042,12 @@ void context_switch(struct vcpu *prev, s + /* Ensure that the vcpu has an up-to-date time base. */ + update_vcpu_system_time(next); + +- /* +- * Schedule tail *should* be a terminal function pointer, but leave a +- * bug frame around just in case it returns, to save going back into the +- * context switching code and leaving a far more subtle crash to diagnose. +- */ +- nextd->arch.ctxt_switch->tail(next); +- BUG(); ++ reset_stack_and_jump_ind(nextd->arch.ctxt_switch->tail); + } + + void continue_running(struct vcpu *same) + { +- /* See the comment above. */ +- same->domain->arch.ctxt_switch->tail(same); +- BUG(); ++ reset_stack_and_jump_ind(same->domain->arch.ctxt_switch->tail); + } + + int __sync_local_execstate(void) +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -991,8 +991,9 @@ static void svm_ctxt_switch_to(struct vc + wrmsr_tsc_aux(v->arch.msrs->tsc_aux); + } + +-static void noreturn svm_do_resume(struct vcpu *v) ++static void noreturn svm_do_resume(void) + { ++ struct vcpu *v = current; + struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb; + bool debug_state = (v->domain->debugger_attached || + v->domain->arch.monitor.software_breakpoint_enabled || +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -1850,8 +1850,9 @@ void vmx_vmentry_failure(void) + domain_crash(curr->domain); + } + +-void vmx_do_resume(struct vcpu *v) ++void vmx_do_resume(void) + { ++ struct vcpu *v = current; + bool_t debug_state; + unsigned long host_cr4; + +--- a/xen/arch/x86/pv/domain.c ++++ b/xen/arch/x86/pv/domain.c +@@ -110,7 +110,7 @@ static int parse_pcid(const char *s) + return rc; + } + +-static void noreturn continue_nonidle_domain(struct vcpu *v) ++static void noreturn continue_nonidle_domain(void) + { + check_wakeup_from_wait(); + reset_stack_and_jump(ret_from_intr); +--- a/xen/include/asm-x86/current.h ++++ b/xen/include/asm-x86/current.h +@@ -155,18 +155,18 @@ unsigned long get_stack_dump_bottom (uns + # define SHADOW_STACK_WORK "" + #endif + +-#define reset_stack_and_jump(fn) \ ++#define switch_stack_and_jump(fn, instr, constr) \ + ({ \ + unsigned int tmp; \ + __asm__ __volatile__ ( \ + SHADOW_STACK_WORK \ + "mov %[stk], %%rsp;" \ + CHECK_FOR_LIVEPATCH_WORK \ +- "jmp %c[fun];" \ ++ instr "[fun]" \ + : [val] "=&r" (tmp), \ + [ssp] "=&r" (tmp) \ + : [stk] "r" (guest_cpu_user_regs()), \ +- [fun] "i" (fn), \ ++ [fun] constr (fn), \ + [skstk_base] "i" \ + ((PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8), \ + [stack_mask] "i" (STACK_SIZE - 1), \ +@@ -176,6 +176,13 @@ unsigned long get_stack_dump_bottom (uns + unreachable(); \ + }) + ++#define reset_stack_and_jump(fn) \ ++ switch_stack_and_jump(fn, "jmp %c", "i") ++ ++/* The constraint may only specify non-call-clobbered registers. */ ++#define reset_stack_and_jump_ind(fn) \ ++ switch_stack_and_jump(fn, "INDIRECT_JMP %", "b") ++ + /* + * Which VCPU's state is currently running on each CPU? + * This is not necesasrily the same as 'current' as a CPU may be +--- a/xen/include/asm-x86/domain.h ++++ b/xen/include/asm-x86/domain.h +@@ -337,7 +337,7 @@ struct arch_domain + const struct arch_csw { + void (*from)(struct vcpu *); + void (*to)(struct vcpu *); +- void (*tail)(struct vcpu *); ++ void noreturn (*tail)(void); + } *ctxt_switch; + + #ifdef CONFIG_HVM +--- a/xen/include/asm-x86/hvm/vmx/vmx.h ++++ b/xen/include/asm-x86/hvm/vmx/vmx.h +@@ -95,7 +95,7 @@ typedef enum { + void vmx_asm_vmexit_handler(struct cpu_user_regs); + void vmx_asm_do_vmentry(void); + void vmx_intr_assist(void); +-void noreturn vmx_do_resume(struct vcpu *); ++void noreturn vmx_do_resume(void); + void vmx_vlapic_msr_changed(struct vcpu *v); + struct hvm_emulate_ctxt; + void vmx_realmode_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt); diff --git a/xsa352.patch b/xsa352.patch new file mode 100644 index 0000000..e21d21a --- /dev/null +++ b/xsa352.patch @@ -0,0 +1,42 @@ +From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= +Subject: tools/ocaml/xenstored: only Dom0 can change node owner +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Otherwise we can give quota away to another domain, either causing it to run +out of quota, or in case of Dom0 use unbounded amounts of memory and bypass +the quota system entirely. + +This was fixed in the C version of xenstored in 2006 (c/s db34d2aaa5f5, +predating the XSA process by 5 years). + +It was also fixed in the mirage version of xenstore in 2012, with a unit test +demonstrating the vulnerability: + + https://github.com/mirage/ocaml-xenstore/commit/6b91f3ac46b885d0530a51d57a9b3a57d64923a7 + https://github.com/mirage/ocaml-xenstore/commit/22ee5417c90b8fda905c38de0d534506152eace6 + +but possibly without realising that the vulnerability still affected the +in-tree oxenstored (added c/s f44af660412 in 2010). + +This is XSA-352. + +Signed-off-by: Edwin Török +Acked-by: Christian Lindig +Reviewed-by: Andrew Cooper + +diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml +index 3b05128f1b..5f915f2bbe 100644 +--- a/tools/ocaml/xenstored/store.ml ++++ b/tools/ocaml/xenstored/store.ml +@@ -407,7 +407,8 @@ let setperms store perm path nperms = + | Some node -> + let old_owner = Node.get_owner node in + let new_owner = Perms.Node.get_owner nperms in +- if not ((old_owner = new_owner) || (Perms.Connection.is_dom0 perm)) then Quota.check store.quota new_owner 0; ++ if not ((old_owner = new_owner) || (Perms.Connection.is_dom0 perm)) then ++ raise Define.Permission_denied; + store.root <- path_setperms store perm path nperms; + Quota.del_entry store.quota old_owner; + Quota.add_entry store.quota new_owner diff --git a/xsa353.patch b/xsa353.patch new file mode 100644 index 0000000..764f93c --- /dev/null +++ b/xsa353.patch @@ -0,0 +1,89 @@ +From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= +Subject: tools/ocaml/xenstored: do permission checks on xenstore root +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This was lacking in a disappointing number of places. + +The xenstore root node is treated differently from all other nodes, because it +doesn't have a parent, and mutation requires changing the parent. + +Unfortunately this lead to open-coding the special case for root into every +single xenstore operation, and out of all the xenstore operations only read +did a permission check when handling the root node. + +This means that an unprivileged guest can: + + * xenstore-chmod / to its liking and subsequently write new arbitrary nodes + there (subject to quota) + * xenstore-rm -r / deletes almost the entire xenstore tree (xenopsd quickly + refills some, but you are left with a broken system) + * DIRECTORY on / lists all children when called through python + bindings (xenstore-ls stops at /local because it tries to list recursively) + * get-perms on / works too, but that is just a minor information leak + +Add the missing permission checks, but this should really be refactored to do +the root handling and permission checks on the node only once from a single +function, instead of getting it wrong nearly everywhere. + +This is XSA-353. + +Signed-off-by: Edwin Török +Acked-by: Christian Lindig +Reviewed-by: Andrew Cooper + +diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml +index f299ec6461..92b6289b5e 100644 +--- a/tools/ocaml/xenstored/store.ml ++++ b/tools/ocaml/xenstored/store.ml +@@ -273,15 +273,17 @@ let path_rm store perm path = + Node.del_childname node name + with Not_found -> + raise Define.Doesnt_exist in +- if path = [] then ++ if path = [] then ( ++ Node.check_perm store.root perm Perms.WRITE; + Node.del_all_children store.root +- else ++ ) else + Path.apply_modify store.root path do_rm + + let path_setperms store perm path perms = +- if path = [] then ++ if path = [] then ( ++ Node.check_perm store.root perm Perms.WRITE; + Node.set_perms store.root perms +- else ++ ) else + let do_setperms node name = + let c = Node.find node name in + Node.check_owner c perm; +@@ -313,9 +315,10 @@ let read store perm path = + + let ls store perm path = + let children = +- if path = [] then +- (Node.get_children store.root) +- else ++ if path = [] then ( ++ Node.check_perm store.root perm Perms.READ; ++ Node.get_children store.root ++ ) else + let do_ls node name = + let cnode = Node.find node name in + Node.check_perm cnode perm Perms.READ; +@@ -324,9 +327,10 @@ let ls store perm path = + List.rev (List.map (fun n -> Symbol.to_string n.Node.name) children) + + let getperms store perm path = +- if path = [] then +- (Node.get_perms store.root) +- else ++ if path = [] then ( ++ Node.check_perm store.root perm Perms.READ; ++ Node.get_perms store.root ++ ) else + let fct n name = + let c = Node.find n name in + Node.check_perm c perm Perms.READ; diff --git a/xsa356.patch b/xsa356.patch new file mode 100644 index 0000000..f18d220 --- /dev/null +++ b/xsa356.patch @@ -0,0 +1,65 @@ +From: Roger Pau Monné +Subject: x86/irq: fix infinite loop in irq_move_cleanup_interrupt + +If Xen enters irq_move_cleanup_interrupt with a dynamic vector below +IRQ_MOVE_CLEANUP_VECTOR pending in IRR (0x20 or 0x21) that's also +designated for a cleanup it will enter a loop where +irq_move_cleanup_interrupt continuously sends a cleanup IPI (vector +0x22) to itself while waiting for the vector with lower priority to be +injected - which will never happen because IRQ_MOVE_CLEANUP_VECTOR +takes precedence and it's always injected first. + +Fix this by making sure vectors below IRQ_MOVE_CLEANUP_VECTOR are +marked as used and thus not available for APs. Also add some logic to +assert and prevent irq_move_cleanup_interrupt from entering such an +infinite loop, albeit that should never happen given the current code. + +This is XSA-356 / CVE-2020-29567. + +Fixes: 3fba06ba9f8 ('x86/IRQ: re-use legacy vector ranges on APs') +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -441,8 +441,15 @@ int __init init_irq_data(void) + set_bit(HYPERCALL_VECTOR, used_vectors); + #endif + +- /* IRQ_MOVE_CLEANUP_VECTOR used for clean up vectors */ +- set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); ++ /* ++ * Mark vectors up to the cleanup one as used, to prevent an infinite loop ++ * invoking irq_move_cleanup_interrupt. ++ */ ++ BUILD_BUG_ON(IRQ_MOVE_CLEANUP_VECTOR < FIRST_DYNAMIC_VECTOR); ++ for ( vector = FIRST_DYNAMIC_VECTOR; ++ vector <= IRQ_MOVE_CLEANUP_VECTOR; ++ vector++ ) ++ __set_bit(vector, used_vectors); + + return 0; + } +@@ -727,10 +734,6 @@ void irq_move_cleanup_interrupt(struct cpu_user_regs *regs) + { + unsigned vector, me; + +- /* This interrupt should not nest inside others. */ +- BUILD_BUG_ON(APIC_PRIO_CLASS(IRQ_MOVE_CLEANUP_VECTOR) != +- APIC_PRIO_CLASS(FIRST_DYNAMIC_VECTOR)); +- + ack_APIC_irq(); + + me = smp_processor_id(); +@@ -774,6 +777,11 @@ void irq_move_cleanup_interrupt(struct cpu_user_regs *regs) + */ + if ( irr & (1u << (vector % 32)) ) + { ++ if ( vector < IRQ_MOVE_CLEANUP_VECTOR ) ++ { ++ ASSERT_UNREACHABLE(); ++ goto unlock; ++ } + send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); + TRACE_3D(TRC_HW_IRQ_MOVE_CLEANUP_DELAY, + irq, vector, smp_processor_id()); diff --git a/xsa358.patch b/xsa358.patch new file mode 100644 index 0000000..a9ff89a --- /dev/null +++ b/xsa358.patch @@ -0,0 +1,57 @@ +From: Jan Beulich +Subject: evtchn/FIFO: re-order and synchronize (with) map_control_block() + +For evtchn_fifo_set_pending()'s check of the control block having been +set to be effective, ordering of respective reads and writes needs to be +ensured: The control block pointer needs to be recorded strictly after +the setting of all the queue heads, and it needs checking strictly +before any uses of them (this latter aspect was already guaranteed). + +This is XSA-358 / CVE-2020-29570. + +Reported-by: Julien Grall +Signed-off-by: Jan Beulich +Acked-by: Julien Grall +--- +v3: Drop read-side barrier again, leveraging guest_test_and_set_bit(). +v2: Re-base over queue locking re-work. + +--- a/xen/common/event_fifo.c ++++ b/xen/common/event_fifo.c +@@ -249,6 +249,10 @@ static void evtchn_fifo_set_pending(stru + goto unlock; + } + ++ /* ++ * This also acts as the read counterpart of the smp_wmb() in ++ * map_control_block(). ++ */ + if ( guest_test_and_set_bit(d, EVTCHN_FIFO_LINKED, word) ) + goto unlock; + +@@ -474,6 +478,7 @@ static int setup_control_block(struct vc + static int map_control_block(struct vcpu *v, uint64_t gfn, uint32_t offset) + { + void *virt; ++ struct evtchn_fifo_control_block *control_block; + unsigned int i; + int rc; + +@@ -484,10 +489,15 @@ static int map_control_block(struct vcpu + if ( rc < 0 ) + return rc; + +- v->evtchn_fifo->control_block = virt + offset; ++ control_block = virt + offset; + + for ( i = 0; i <= EVTCHN_FIFO_PRIORITY_MIN; i++ ) +- v->evtchn_fifo->queue[i].head = &v->evtchn_fifo->control_block->head[i]; ++ v->evtchn_fifo->queue[i].head = &control_block->head[i]; ++ ++ /* All queue heads must have been set before setting the control block. */ ++ smp_wmb(); ++ ++ v->evtchn_fifo->control_block = control_block; + + return 0; + } diff --git a/xsa359.patch b/xsa359.patch new file mode 100644 index 0000000..231810b --- /dev/null +++ b/xsa359.patch @@ -0,0 +1,40 @@ +From: Jan Beulich +Subject: evtchn/FIFO: add 2nd smp_rmb() to evtchn_fifo_word_from_port() + +Besides with add_page_to_event_array() the function also needs to +synchronize with evtchn_fifo_init_control() setting both d->evtchn_fifo +and (subsequently) d->evtchn_port_ops. + +This is XSA-359 / CVE-2020-29571. + +Reported-by: Julien Grall +Signed-off-by: Jan Beulich +Reviewed-by: Julien Grall + +--- a/xen/common/event_fifo.c ++++ b/xen/common/event_fifo.c +@@ -55,6 +55,13 @@ static inline event_word_t *evtchn_fifo_ + { + unsigned int p, w; + ++ /* ++ * Callers aren't required to hold d->event_lock, so we need to synchronize ++ * with evtchn_fifo_init_control() setting d->evtchn_port_ops /after/ ++ * d->evtchn_fifo. ++ */ ++ smp_rmb(); ++ + if ( unlikely(port >= d->evtchn_fifo->num_evtchns) ) + return NULL; + +@@ -606,6 +613,10 @@ int evtchn_fifo_init_control(struct evtc + if ( rc < 0 ) + goto error; + ++ /* ++ * This call, as a side effect, synchronizes with ++ * evtchn_fifo_word_from_port(). ++ */ + rc = map_control_block(v, gfn, offset); + if ( rc < 0 ) + goto error;