Blob Blame History Raw
From e4250bf8b39ed73623c75b0f1436ac7c1d45aba7 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 13 Sep 2022 07:35:12 +0200
Subject: tools/xenstore: remove nodes owned by destroyed domain

In case a domain is removed from Xenstore, remove all nodes owned by
it per default.

This tackles the problem that nodes might be created by a domain
outside its home path in Xenstore, leading to Xenstore hogging more
and more memory. Domain quota don't work in this case if the guest is
rebooting in between.

Since XSA-322 ownership of such stale nodes is transferred to dom0,
which is helping against unintended access, but not against OOM of
Xenstore.

As a fallback for weird cases add a Xenstore start parameter for
keeping today's way to handle stale nodes, adding the risk of Xenstore
hitting an OOM situation.

This is part of XSA-419 / CVE-2022-42322.

Reported-by: Juergen Gross <jgross@suse.com>
Fixes: 496306324d8d ("tools/xenstore: revoke access rights for removed domains")
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Julien Grall <jgrall@amazon.com>

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index bdc14679adf5..13e48aaa731c 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -80,6 +80,7 @@ static bool verbose = false;
 LIST_HEAD(connections);
 int tracefd = -1;
 static bool recovery = true;
+bool keep_orphans = false;
 static int reopen_log_pipe[2];
 static int reopen_log_pipe0_pollfd_idx = -1;
 char *tracefile = NULL;
@@ -753,7 +754,7 @@ struct node *read_node(struct connection *conn, const void *ctx,
 	node->perms.p = hdr->perms;
 	node->acc.domid = node->perms.p[0].id;
 	node->acc.memory = data.dsize;
-	if (domain_adjust_node_perms(conn, node))
+	if (domain_adjust_node_perms(node))
 		goto error;
 
 	/* If owner is gone reset currently accounted memory size. */
@@ -796,7 +797,7 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
 	void *p;
 	struct xs_tdb_record_hdr *hdr;
 
-	if (domain_adjust_node_perms(conn, node))
+	if (domain_adjust_node_perms(node))
 		return errno;
 
 	data.dsize = sizeof(*hdr)
@@ -1647,7 +1648,7 @@ static int delnode_sub(const void *ctx, struct connection *conn,
 	return WALK_TREE_RM_CHILDENTRY;
 }
 
-static int _rm(struct connection *conn, const void *ctx, const char *name)
+int rm_node(struct connection *conn, const void *ctx, const char *name)
 {
 	struct node *parent;
 	char *parentname = get_parent(ctx, name);
@@ -1711,7 +1712,7 @@ static int do_rm(const void *ctx, struct connection *conn,
 	if (streq(name, "/"))
 		return EINVAL;
 
-	ret = _rm(conn, ctx, name);
+	ret = rm_node(conn, ctx, name);
 	if (ret)
 		return ret;
 
@@ -2618,6 +2619,8 @@ static void usage(void)
 "  -R, --no-recovery       to request that no recovery should be attempted when\n"
 "                          the store is corrupted (debug only),\n"
 "  -I, --internal-db       store database in memory, not on disk\n"
+"  -K, --keep-orphans      don't delete nodes owned by a domain when the\n"
+"                          domain is deleted (this is a security risk!)\n"
 "  -V, --verbose           to request verbose execution.\n");
 }
 
@@ -2642,6 +2645,7 @@ static struct option options[] = {
 	{ "timeout", 1, NULL, 'w' },
 	{ "no-recovery", 0, NULL, 'R' },
 	{ "internal-db", 0, NULL, 'I' },
+	{ "keep-orphans", 0, NULL, 'K' },
 	{ "verbose", 0, NULL, 'V' },
 	{ "watch-nb", 1, NULL, 'W' },
 #ifndef NO_LIVE_UPDATE
@@ -2721,7 +2725,7 @@ int main(int argc, char *argv[])
 	orig_argc = argc;
 	orig_argv = argv;
 
-	while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:Q:q:T:RVW:w:U",
+	while ((opt = getopt_long(argc, argv, "DE:F:HKNPS:t:A:M:Q:q:T:RVW:w:U",
 				  options, NULL)) != -1) {
 		switch (opt) {
 		case 'D':
@@ -2757,6 +2761,9 @@ int main(int argc, char *argv[])
 		case 'I':
 			tdb_flags = TDB_INTERNAL|TDB_NOLOCK;
 			break;
+		case 'K':
+			keep_orphans = true;
+			break;
 		case 'V':
 			verbose = true;
 			break;
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index acb00ad96914..37006d508dbf 100644
--- a/tools/xenstore/xenstored_core.h
+++ b/tools/xenstore/xenstored_core.h
@@ -240,6 +240,9 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
 struct node *read_node(struct connection *conn, const void *ctx,
 		       const char *name);
 
+/* Remove a node and its children. */
+int rm_node(struct connection *conn, const void *ctx, const char *name);
+
 void setup_structure(bool live_update);
 struct connection *new_connection(const struct interface_funcs *funcs);
 struct connection *get_connection_by_id(unsigned int conn_id);
@@ -284,6 +287,7 @@ extern int quota_req_outstanding;
 extern int quota_trans_nodes;
 extern int quota_memory_per_domain_soft;
 extern int quota_memory_per_domain_hard;
+extern bool keep_orphans;
 
 extern unsigned int timeout_watch_event_msec;
 
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
index 98b401fdec30..84b7817cd5e6 100644
--- a/tools/xenstore/xenstored_domain.c
+++ b/tools/xenstore/xenstored_domain.c
@@ -227,10 +227,64 @@ static void unmap_interface(void *interface)
 	xengnttab_unmap(*xgt_handle, interface, 1);
 }
 
+static int domain_tree_remove_sub(const void *ctx, struct connection *conn,
+				  struct node *node, void *arg)
+{
+	struct domain *domain = arg;
+	TDB_DATA key;
+	int ret = WALK_TREE_OK;
+
+	if (node->perms.p[0].id != domain->domid)
+		return WALK_TREE_OK;
+
+	if (keep_orphans) {
+		set_tdb_key(node->name, &key);
+		domain->nbentry--;
+		node->perms.p[0].id = priv_domid;
+		node->acc.memory = 0;
+		domain_entry_inc(NULL, node);
+		if (write_node_raw(NULL, &key, node, true)) {
+			/* That's unfortunate. We only can try to continue. */
+			syslog(LOG_ERR,
+			       "error when moving orphaned node %s to dom0\n",
+			       node->name);
+		} else
+			trace("orphaned node %s moved to dom0\n", node->name);
+	} else {
+		if (rm_node(NULL, ctx, node->name)) {
+			/* That's unfortunate. We only can try to continue. */
+			syslog(LOG_ERR,
+			       "error when deleting orphaned node %s\n",
+			       node->name);
+		} else
+			trace("orphaned node %s deleted\n", node->name);
+
+		/* Skip children in all cases in order to avoid more errors. */
+		ret = WALK_TREE_SKIP_CHILDREN;
+	}
+
+	return domain->nbentry > 0 ? ret : WALK_TREE_SUCCESS_STOP;
+}
+
+static void domain_tree_remove(struct domain *domain)
+{
+	int ret;
+	struct walk_funcs walkfuncs = { .enter = domain_tree_remove_sub };
+
+	if (domain->nbentry > 0) {
+		ret = walk_node_tree(domain, NULL, "/", &walkfuncs, domain);
+		if (ret == WALK_TREE_ERROR_STOP)
+			syslog(LOG_ERR,
+			       "error when looking for orphaned nodes\n");
+	}
+}
+
 static int destroy_domain(void *_domain)
 {
 	struct domain *domain = _domain;
 
+	domain_tree_remove(domain);
+
 	list_del(&domain->list);
 
 	if (!domain->introduced)
@@ -883,15 +937,15 @@ int domain_entry_inc(struct connection *conn, struct node *node)
 	struct domain *d;
 	unsigned int domid;
 
-	if (!conn)
+	if (!node->perms.p)
 		return 0;
 
-	domid = node->perms.p ? node->perms.p[0].id : conn->id;
+	domid = node->perms.p[0].id;
 
-	if (conn->transaction) {
+	if (conn && conn->transaction) {
 		transaction_entry_inc(conn->transaction, domid);
 	} else {
-		d = (domid == conn->id && conn->domain) ? conn->domain
+		d = (conn && domid == conn->id && conn->domain) ? conn->domain
 		    : find_or_alloc_existing_domain(domid);
 		if (d)
 			d->nbentry++;
@@ -952,23 +1006,11 @@ int domain_alloc_permrefs(struct node_perms *perms)
  * Remove permissions for no longer existing domains in order to avoid a new
  * domain with the same domid inheriting the permissions.
  */
-int domain_adjust_node_perms(struct connection *conn, struct node *node)
+int domain_adjust_node_perms(struct node *node)
 {
 	unsigned int i;
 	int ret;
 
-	ret = chk_domain_generation(node->perms.p[0].id, node->generation);
-
-	/* If the owner doesn't exist any longer give it to priv domain. */
-	if (!ret) {
-		/*
-		 * In theory we'd need to update the number of dom0 nodes here,
-		 * but we could be called for a read of the node. So better
-		 * avoid the risk to overflow the node count of dom0.
-		 */
-		node->perms.p[0].id = priv_domid;
-	}
-
 	for (i = 1; i < node->perms.num; i++) {
 		if (node->perms.p[i].perms & XS_PERM_IGNORE)
 			continue;
@@ -986,15 +1028,15 @@ void domain_entry_dec(struct connection *conn, struct node *node)
 	struct domain *d;
 	unsigned int domid;
 
-	if (!conn)
+	if (!node->perms.p)
 		return;
 
 	domid = node->perms.p ? node->perms.p[0].id : conn->id;
 
-	if (conn->transaction) {
+	if (conn && conn->transaction) {
 		transaction_entry_dec(conn->transaction, domid);
 	} else {
-		d = (domid == conn->id && conn->domain) ? conn->domain
+		d = (conn && domid == conn->id && conn->domain) ? conn->domain
 		    : find_domain_struct(domid);
 		if (d) {
 			d->nbentry--;
@@ -1113,7 +1155,7 @@ int domain_memory_add(unsigned int domid, int mem, bool no_quota_check)
 		 * exist, as accounting is done either for a domain related to
 		 * the current connection, or for the domain owning a node
 		 * (which is always existing, as the owner of the node is
-		 * tested to exist and replaced by domid 0 if not).
+		 * tested to exist and deleted or replaced by domid 0 if not).
 		 * So not finding the related domain MUST be an error in the
 		 * data base.
 		 */
diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
index 7fe0a21d9e45..b38c82991dc6 100644
--- a/tools/xenstore/xenstored_domain.h
+++ b/tools/xenstore/xenstored_domain.h
@@ -62,7 +62,7 @@ const char *get_implicit_path(const struct connection *conn);
 bool domain_is_unprivileged(struct connection *conn);
 
 /* Remove node permissions for no longer existing domains. */
-int domain_adjust_node_perms(struct connection *conn, struct node *node);
+int domain_adjust_node_perms(struct node *node);
 int domain_alloc_permrefs(struct node_perms *perms);
 
 /* Quota manipulation */