97374c0
From 2feed737530592688382c655680982e10951c1ec Mon Sep 17 00:00:00 2001
97374c0
From: Juergen Gross <jgross@suse.com>
97374c0
Date: Tue, 13 Sep 2022 07:35:07 +0200
97374c0
Subject: tools/xenstore: let unread watch events time out
97374c0
97374c0
A future modification will limit the number of outstanding requests
97374c0
for a domain, where "outstanding" means that the response of the
97374c0
request or any resulting watch event hasn't been consumed yet.
97374c0
97374c0
In order to avoid a malicious guest being capable to block other guests
97374c0
by not reading watch events, add a timeout for watch events. In case a
97374c0
watch event hasn't been consumed after this timeout, it is being
97374c0
deleted. Set the default timeout to 20 seconds (a random value being
97374c0
not too high).
97374c0
97374c0
In order to support to specify other timeout values in future, use a
97374c0
generic command line option for that purpose:
97374c0
97374c0
--timeout|-w watch-event=<seconds>
97374c0
97374c0
This is part of XSA-326 / CVE-2022-42311.
97374c0
97374c0
Reported-by: Julien Grall <jgrall@amazon.com>
97374c0
Signed-off-by: Juergen Gross <jgross@suse.com>
97374c0
Reviewed-by: Julien Grall <jgrall@amazon.com>
97374c0
97374c0
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
97374c0
index bf2243873901..45244c021cd3 100644
97374c0
--- a/tools/xenstore/xenstored_core.c
97374c0
+++ b/tools/xenstore/xenstored_core.c
97374c0
@@ -108,6 +108,8 @@ int quota_max_transaction = 10;
97374c0
 int quota_nb_perms_per_node = 5;
97374c0
 int quota_max_path_len = XENSTORE_REL_PATH_MAX;
97374c0
 
97374c0
+unsigned int timeout_watch_event_msec = 20000;
97374c0
+
97374c0
 void trace(const char *fmt, ...)
97374c0
 {
97374c0
 	va_list arglist;
97374c0
@@ -211,19 +213,92 @@ void reopen_log(void)
97374c0
 	}
97374c0
 }
97374c0
 
97374c0
+static uint64_t get_now_msec(void)
97374c0
+{
97374c0
+	struct timespec now_ts;
97374c0
+
97374c0
+	if (clock_gettime(CLOCK_MONOTONIC, &now_ts))
97374c0
+		barf_perror("Could not find time (clock_gettime failed)");
97374c0
+
97374c0
+	return now_ts.tv_sec * 1000 + now_ts.tv_nsec / 1000000;
97374c0
+}
97374c0
+
97374c0
 static void free_buffered_data(struct buffered_data *out,
97374c0
 			       struct connection *conn)
97374c0
 {
97374c0
+	struct buffered_data *req;
97374c0
+
97374c0
 	list_del(&out->list);
97374c0
+
97374c0
+	/*
97374c0
+	 * Update conn->timeout_msec with the next found timeout value in the
97374c0
+	 * queued pending requests.
97374c0
+	 */
97374c0
+	if (out->timeout_msec) {
97374c0
+		conn->timeout_msec = 0;
97374c0
+		list_for_each_entry(req, &conn->out_list, list) {
97374c0
+			if (req->timeout_msec) {
97374c0
+				conn->timeout_msec = req->timeout_msec;
97374c0
+				break;
97374c0
+			}
97374c0
+		}
97374c0
+	}
97374c0
+
97374c0
 	talloc_free(out);
97374c0
 }
97374c0
 
97374c0
+static void check_event_timeout(struct connection *conn, uint64_t msecs,
97374c0
+				int *ptimeout)
97374c0
+{
97374c0
+	uint64_t delta;
97374c0
+	struct buffered_data *out, *tmp;
97374c0
+
97374c0
+	if (!conn->timeout_msec)
97374c0
+		return;
97374c0
+
97374c0
+	delta = conn->timeout_msec - msecs;
97374c0
+	if (conn->timeout_msec <= msecs) {
97374c0
+		delta = 0;
97374c0
+		list_for_each_entry_safe(out, tmp, &conn->out_list, list) {
97374c0
+			/*
97374c0
+			 * Only look at buffers with timeout and no data
97374c0
+			 * already written to the ring.
97374c0
+			 */
97374c0
+			if (out->timeout_msec && out->inhdr && !out->used) {
97374c0
+				if (out->timeout_msec > msecs) {
97374c0
+					conn->timeout_msec = out->timeout_msec;
97374c0
+					delta = conn->timeout_msec - msecs;
97374c0
+					break;
97374c0
+				}
97374c0
+
97374c0
+				/*
97374c0
+				 * Free out without updating conn->timeout_msec,
97374c0
+				 * as the update is done in this loop already.
97374c0
+				 */
97374c0
+				out->timeout_msec = 0;
97374c0
+				trace("watch event path %s for domain %u timed out\n",
97374c0
+				      out->buffer, conn->id);
97374c0
+				free_buffered_data(out, conn);
97374c0
+			}
97374c0
+		}
97374c0
+		if (!delta) {
97374c0
+			conn->timeout_msec = 0;
97374c0
+			return;
97374c0
+		}
97374c0
+	}
97374c0
+
97374c0
+	if (*ptimeout == -1 || *ptimeout > delta)
97374c0
+		*ptimeout = delta;
97374c0
+}
97374c0
+
97374c0
 void conn_free_buffered_data(struct connection *conn)
97374c0
 {
97374c0
 	struct buffered_data *out;
97374c0
 
97374c0
 	while ((out = list_top(&conn->out_list, struct buffered_data, list)))
97374c0
 		free_buffered_data(out, conn);
97374c0
+
97374c0
+	conn->timeout_msec = 0;
97374c0
 }
97374c0
 
97374c0
 static bool write_messages(struct connection *conn)
97374c0
@@ -411,6 +486,7 @@ static void initialize_fds(int *p_sock_pollfd_idx, int *ptimeout)
97374c0
 {
97374c0
 	struct connection *conn;
97374c0
 	struct wrl_timestampt now;
97374c0
+	uint64_t msecs;
97374c0
 
97374c0
 	if (fds)
97374c0
 		memset(fds, 0, sizeof(struct pollfd) * current_array_size);
97374c0
@@ -431,10 +507,12 @@ static void initialize_fds(int *p_sock_pollfd_idx, int *ptimeout)
97374c0
 
97374c0
 	wrl_gettime_now(&now;;
97374c0
 	wrl_log_periodic(now);
97374c0
+	msecs = get_now_msec();
97374c0
 
97374c0
 	list_for_each_entry(conn, &connections, list) {
97374c0
 		if (conn->domain) {
97374c0
 			wrl_check_timeout(conn->domain, now, ptimeout);
97374c0
+			check_event_timeout(conn, msecs, ptimeout);
97374c0
 			if (conn_can_read(conn) ||
97374c0
 			    (conn_can_write(conn) &&
97374c0
 			     !list_empty(&conn->out_list)))
97374c0
@@ -794,6 +872,7 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
97374c0
 		return;
97374c0
 	bdata->inhdr = true;
97374c0
 	bdata->used = 0;
97374c0
+	bdata->timeout_msec = 0;
97374c0
 
97374c0
 	if (len <= DEFAULT_BUFFER_SIZE)
97374c0
 		bdata->buffer = bdata->default_buffer;
97374c0
@@ -845,6 +924,12 @@ void send_event(struct connection *conn, const char *path, const char *token)
97374c0
 	bdata->hdr.msg.type = XS_WATCH_EVENT;
97374c0
 	bdata->hdr.msg.len = len;
97374c0
 
97374c0
+	if (timeout_watch_event_msec && domain_is_unprivileged(conn)) {
97374c0
+		bdata->timeout_msec = get_now_msec() + timeout_watch_event_msec;
97374c0
+		if (!conn->timeout_msec)
97374c0
+			conn->timeout_msec = bdata->timeout_msec;
97374c0
+	}
97374c0
+
97374c0
 	/* Queue for later transmission. */
97374c0
 	list_add_tail(&bdata->list, &conn->out_list);
97374c0
 }
97374c0
@@ -2201,6 +2286,9 @@ static void usage(void)
97374c0
 "  -t, --transaction <nb>  limit the number of transaction allowed per domain,\n"
97374c0
 "  -A, --perm-nb <nb>      limit the number of permissions per node,\n"
97374c0
 "  -M, --path-max <chars>  limit the allowed Xenstore node path length,\n"
97374c0
+"  -w, --timeout <what>=<seconds>   set the timeout in seconds for <what>,\n"
97374c0
+"                          allowed timeout candidates are:\n"
97374c0
+"                          watch-event: time a watch-event is kept pending\n"
97374c0
 "  -R, --no-recovery       to request that no recovery should be attempted when\n"
97374c0
 "                          the store is corrupted (debug only),\n"
97374c0
 "  -I, --internal-db       store database in memory, not on disk\n"
97374c0
@@ -2223,6 +2311,7 @@ static struct option options[] = {
97374c0
 	{ "transaction", 1, NULL, 't' },
97374c0
 	{ "perm-nb", 1, NULL, 'A' },
97374c0
 	{ "path-max", 1, NULL, 'M' },
97374c0
+	{ "timeout", 1, NULL, 'w' },
97374c0
 	{ "no-recovery", 0, NULL, 'R' },
97374c0
 	{ "internal-db", 0, NULL, 'I' },
97374c0
 	{ "verbose", 0, NULL, 'V' },
97374c0
@@ -2236,6 +2325,39 @@ int dom0_domid = 0;
97374c0
 int dom0_event = 0;
97374c0
 int priv_domid = 0;
97374c0
 
97374c0
+static int get_optval_int(const char *arg)
97374c0
+{
97374c0
+	char *end;
97374c0
+	long val;
97374c0
+
97374c0
+	val = strtol(arg, &end, 10);
97374c0
+	if (!*arg || *end || val < 0 || val > INT_MAX)
97374c0
+		barf("invalid parameter value \"%s\"\n", arg);
97374c0
+
97374c0
+	return val;
97374c0
+}
97374c0
+
97374c0
+static bool what_matches(const char *arg, const char *what)
97374c0
+{
97374c0
+	unsigned int what_len = strlen(what);
97374c0
+
97374c0
+	return !strncmp(arg, what, what_len) && arg[what_len] == '=';
97374c0
+}
97374c0
+
97374c0
+static void set_timeout(const char *arg)
97374c0
+{
97374c0
+	const char *eq = strchr(arg, '=');
97374c0
+	int val;
97374c0
+
97374c0
+	if (!eq)
97374c0
+		barf("quotas must be specified via <what>=<seconds>\n");
97374c0
+	val = get_optval_int(eq + 1);
97374c0
+	if (what_matches(arg, "watch-event"))
97374c0
+		timeout_watch_event_msec = val * 1000;
97374c0
+	else
97374c0
+		barf("unknown timeout \"%s\"\n", arg);
97374c0
+}
97374c0
+
97374c0
 int main(int argc, char *argv[])
97374c0
 {
97374c0
 	int opt;
97374c0
@@ -2250,7 +2372,7 @@ int main(int argc, char *argv[])
97374c0
 	orig_argc = argc;
97374c0
 	orig_argv = argv;
97374c0
 
97374c0
-	while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:T:RVW:U", options,
97374c0
+	while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:T:RVW:w:U", options,
97374c0
 				  NULL)) != -1) {
97374c0
 		switch (opt) {
97374c0
 		case 'D':
97374c0
@@ -2300,6 +2422,9 @@ int main(int argc, char *argv[])
97374c0
 			quota_max_path_len = min(XENSTORE_REL_PATH_MAX,
97374c0
 						 quota_max_path_len);
97374c0
 			break;
97374c0
+		case 'w':
97374c0
+			set_timeout(optarg);
97374c0
+			break;
97374c0
 		case 'e':
97374c0
 			dom0_event = strtol(optarg, NULL, 10);
97374c0
 			break;
97374c0
@@ -2741,6 +2866,12 @@ static void add_buffered_data(struct buffered_data *bdata,
97374c0
 		barf("error restoring buffered data");
97374c0
 
97374c0
 	memcpy(bdata->buffer, data, len);
97374c0
+	if (bdata->hdr.msg.type == XS_WATCH_EVENT && timeout_watch_event_msec &&
97374c0
+	    domain_is_unprivileged(conn)) {
97374c0
+		bdata->timeout_msec = get_now_msec() + timeout_watch_event_msec;
97374c0
+		if (!conn->timeout_msec)
97374c0
+			conn->timeout_msec = bdata->timeout_msec;
97374c0
+	}
97374c0
 
97374c0
 	/* Queue for later transmission. */
97374c0
 	list_add_tail(&bdata->list, &conn->out_list);
97374c0
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
97374c0
index e7ee87825c3b..8a81fc693f01 100644
97374c0
--- a/tools/xenstore/xenstored_core.h
97374c0
+++ b/tools/xenstore/xenstored_core.h
97374c0
@@ -27,6 +27,7 @@
97374c0
 #include <fcntl.h>
97374c0
 #include <stdbool.h>
97374c0
 #include <stdint.h>
97374c0
+#include <time.h>
97374c0
 #include <errno.h>
97374c0
 
97374c0
 #include "xenstore_lib.h"
97374c0
@@ -67,6 +68,8 @@ struct buffered_data
97374c0
 		char raw[sizeof(struct xsd_sockmsg)];
97374c0
 	} hdr;
97374c0
 
97374c0
+	uint64_t timeout_msec;
97374c0
+
97374c0
 	/* The actual data. */
97374c0
 	char *buffer;
97374c0
 	char default_buffer[DEFAULT_BUFFER_SIZE];
97374c0
@@ -118,6 +121,7 @@ struct connection
97374c0
 
97374c0
 	/* Buffered output data */
97374c0
 	struct list_head out_list;
97374c0
+	uint64_t timeout_msec;
97374c0
 
97374c0
 	/* Transaction context for current request (NULL if none). */
97374c0
 	struct transaction *transaction;
97374c0
@@ -244,6 +248,8 @@ extern int dom0_event;
97374c0
 extern int priv_domid;
97374c0
 extern int quota_nb_entry_per_domain;
97374c0
 
97374c0
+extern unsigned int timeout_watch_event_msec;
97374c0
+
97374c0
 /* Map the kernel's xenstore page. */
97374c0
 void *xenbus_map(void);
97374c0
 void unmap_xenbus(void *interface);