From 2eee122a45eb4a218596b103ce7f0759a824cf2e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 13 Sep 2022 07:35:08 +0200 Subject: tools/xenstore: limit outstanding requests Add another quota for limiting the number of outstanding requests of a guest. As the way to specify quotas on the command line is becoming rather nasty, switch to a new scheme using [--quota|-Q] = allowing to add more quotas in future easily. Set the default value to 20 (basically a random value not seeming to be too high or too low). A request is said to be outstanding if any message generated by this request (the direct response plus potential watch events) is not yet completely stored into a ring buffer. The initial watch event sent as a result of registering a watch is an exception. Note that across a live update the relation to buffered watch events for other domains is lost. Use talloc_zero() for allocating the domain structure in order to have all per-domain quota zeroed initially. This is part of XSA-326 / CVE-2022-42312. Reported-by: Julien Grall Signed-off-by: Juergen Gross Acked-by: Julien Grall diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c index 45244c021cd3..488d540f3a32 100644 --- a/tools/xenstore/xenstored_core.c +++ b/tools/xenstore/xenstored_core.c @@ -107,6 +107,7 @@ int quota_max_entry_size = 2048; /* 2K */ int quota_max_transaction = 10; int quota_nb_perms_per_node = 5; int quota_max_path_len = XENSTORE_REL_PATH_MAX; +int quota_req_outstanding = 20; unsigned int timeout_watch_event_msec = 20000; @@ -223,12 +224,24 @@ static uint64_t get_now_msec(void) return now_ts.tv_sec * 1000 + now_ts.tv_nsec / 1000000; } +/* + * Remove a struct buffered_data from the list of outgoing data. + * A struct buffered_data related to a request having caused watch events to be + * sent is kept until all those events have been written out. + * Each watch event is referencing the related request via pend.req, while the + * number of watch events caused by a request is kept in pend.ref.event_cnt + * (those two cases are mutually exclusive, so the two fields can share memory + * via a union). + * The struct buffered_data is freed only if no related watch event is + * referencing it. The related return data can be freed right away. + */ static void free_buffered_data(struct buffered_data *out, struct connection *conn) { struct buffered_data *req; list_del(&out->list); + out->on_out_list = false; /* * Update conn->timeout_msec with the next found timeout value in the @@ -244,6 +257,30 @@ static void free_buffered_data(struct buffered_data *out, } } + if (out->hdr.msg.type == XS_WATCH_EVENT) { + req = out->pend.req; + if (req) { + req->pend.ref.event_cnt--; + if (!req->pend.ref.event_cnt && !req->on_out_list) { + if (req->on_ref_list) { + domain_outstanding_domid_dec( + req->pend.ref.domid); + list_del(&req->list); + } + talloc_free(req); + } + } + } else if (out->pend.ref.event_cnt) { + /* Hang out off from conn. */ + talloc_steal(NULL, out); + if (out->buffer != out->default_buffer) + talloc_free(out->buffer); + list_add(&out->list, &conn->ref_list); + out->on_ref_list = true; + return; + } else + domain_outstanding_dec(conn); + talloc_free(out); } @@ -405,6 +442,7 @@ int delay_request(struct connection *conn, struct buffered_data *in, static int destroy_conn(void *_conn) { struct connection *conn = _conn; + struct buffered_data *req; /* Flush outgoing if possible, but don't block. */ if (!conn->domain) { @@ -418,6 +456,11 @@ static int destroy_conn(void *_conn) break; close(conn->fd); } + + conn_free_buffered_data(conn); + list_for_each_entry(req, &conn->ref_list, list) + req->on_ref_list = false; + if (conn->target) talloc_unlink(conn, conn->target); list_del(&conn->list); @@ -893,6 +936,8 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type, /* Queue for later transmission. */ list_add_tail(&bdata->list, &conn->out_list); + bdata->on_out_list = true; + domain_outstanding_inc(conn); } /* @@ -900,7 +945,8 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type, * As this is not directly related to the current command, errors can't be * reported. */ -void send_event(struct connection *conn, const char *path, const char *token) +void send_event(struct buffered_data *req, struct connection *conn, + const char *path, const char *token) { struct buffered_data *bdata; unsigned int len; @@ -930,8 +976,13 @@ void send_event(struct connection *conn, const char *path, const char *token) conn->timeout_msec = bdata->timeout_msec; } + bdata->pend.req = req; + if (req) + req->pend.ref.event_cnt++; + /* Queue for later transmission. */ list_add_tail(&bdata->list, &conn->out_list); + bdata->on_out_list = true; } /* Some routines (write, mkdir, etc) just need a non-error return */ @@ -1740,6 +1791,7 @@ static void handle_input(struct connection *conn) return; } in = conn->in; + in->pend.ref.domid = conn->id; /* Not finished header yet? */ if (in->inhdr) { @@ -1808,6 +1860,7 @@ struct connection *new_connection(const struct interface_funcs *funcs) new->is_stalled = false; new->transaction_started = 0; INIT_LIST_HEAD(&new->out_list); + INIT_LIST_HEAD(&new->ref_list); INIT_LIST_HEAD(&new->watches); INIT_LIST_HEAD(&new->transaction_list); INIT_LIST_HEAD(&new->delayed); @@ -2286,6 +2339,9 @@ static void usage(void) " -t, --transaction limit the number of transaction allowed per domain,\n" " -A, --perm-nb limit the number of permissions per node,\n" " -M, --path-max limit the allowed Xenstore node path length,\n" +" -Q, --quota = set the quota to the value , allowed\n" +" quotas are:\n" +" outstanding: number of outstanding requests\n" " -w, --timeout = set the timeout in seconds for ,\n" " allowed timeout candidates are:\n" " watch-event: time a watch-event is kept pending\n" @@ -2311,6 +2367,7 @@ static struct option options[] = { { "transaction", 1, NULL, 't' }, { "perm-nb", 1, NULL, 'A' }, { "path-max", 1, NULL, 'M' }, + { "quota", 1, NULL, 'Q' }, { "timeout", 1, NULL, 'w' }, { "no-recovery", 0, NULL, 'R' }, { "internal-db", 0, NULL, 'I' }, @@ -2358,6 +2415,20 @@ static void set_timeout(const char *arg) barf("unknown timeout \"%s\"\n", arg); } +static void set_quota(const char *arg) +{ + const char *eq = strchr(arg, '='); + int val; + + if (!eq) + barf("quotas must be specified via =\n"); + val = get_optval_int(eq + 1); + if (what_matches(arg, "outstanding")) + quota_req_outstanding = val; + else + barf("unknown quota \"%s\"\n", arg); +} + int main(int argc, char *argv[]) { int opt; @@ -2372,8 +2443,8 @@ int main(int argc, char *argv[]) orig_argc = argc; orig_argv = argv; - while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:T:RVW:w:U", options, - NULL)) != -1) { + while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:Q:T:RVW:w:U", + options, NULL)) != -1) { switch (opt) { case 'D': no_domain_init = true; @@ -2422,6 +2493,9 @@ int main(int argc, char *argv[]) quota_max_path_len = min(XENSTORE_REL_PATH_MAX, quota_max_path_len); break; + case 'Q': + set_quota(optarg); + break; case 'w': set_timeout(optarg); break; @@ -2875,6 +2949,14 @@ static void add_buffered_data(struct buffered_data *bdata, /* Queue for later transmission. */ list_add_tail(&bdata->list, &conn->out_list); + bdata->on_out_list = true; + /* + * Watch events are never "outstanding", but the request causing them + * are instead kept "outstanding" until all watch events caused by that + * request have been delivered. + */ + if (bdata->hdr.msg.type != XS_WATCH_EVENT) + domain_outstanding_inc(conn); } void read_state_buffered_data(const void *ctx, struct connection *conn, diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h index 8a81fc693f01..db09f463a657 100644 --- a/tools/xenstore/xenstored_core.h +++ b/tools/xenstore/xenstored_core.h @@ -56,6 +56,8 @@ struct xs_state_connection; struct buffered_data { struct list_head list; + bool on_out_list; + bool on_ref_list; /* Are we still doing the header? */ bool inhdr; @@ -63,6 +65,17 @@ struct buffered_data /* How far are we? */ unsigned int used; + /* Outstanding request accounting. */ + union { + /* ref is being used for requests. */ + struct { + unsigned int event_cnt; /* # of outstanding events. */ + unsigned int domid; /* domid of request. */ + } ref; + /* req is being used for watch events. */ + struct buffered_data *req; /* request causing event. */ + } pend; + union { struct xsd_sockmsg msg; char raw[sizeof(struct xsd_sockmsg)]; @@ -123,6 +136,9 @@ struct connection struct list_head out_list; uint64_t timeout_msec; + /* Referenced requests no longer pending. */ + struct list_head ref_list; + /* Transaction context for current request (NULL if none). */ struct transaction *transaction; @@ -191,7 +207,8 @@ unsigned int get_string(const struct buffered_data *data, unsigned int offset); void send_reply(struct connection *conn, enum xsd_sockmsg_type type, const void *data, unsigned int len); -void send_event(struct connection *conn, const char *path, const char *token); +void send_event(struct buffered_data *req, struct connection *conn, + const char *path, const char *token); /* Some routines (write, mkdir, etc) just need a non-error return */ void send_ack(struct connection *conn, enum xsd_sockmsg_type type); @@ -247,6 +264,7 @@ extern int dom0_domid; extern int dom0_event; extern int priv_domid; extern int quota_nb_entry_per_domain; +extern int quota_req_outstanding; extern unsigned int timeout_watch_event_msec; diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c index 93c4c1edcdd1..850085a92c76 100644 --- a/tools/xenstore/xenstored_domain.c +++ b/tools/xenstore/xenstored_domain.c @@ -78,6 +78,9 @@ struct domain /* number of watch for this domain */ int nbwatch; + /* Number of outstanding requests. */ + int nboutstanding; + /* write rate limit */ wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */ struct wrl_timestampt wrl_timestamp; @@ -183,8 +186,12 @@ static bool domain_can_read(struct connection *conn) { struct xenstore_domain_interface *intf = conn->domain->interface; - if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0) - return false; + if (domain_is_unprivileged(conn)) { + if (conn->domain->wrl_credit < 0) + return false; + if (conn->domain->nboutstanding >= quota_req_outstanding) + return false; + } return (intf->req_cons != intf->req_prod); } @@ -331,7 +338,7 @@ static struct domain *alloc_domain(const void *context, unsigned int domid) { struct domain *domain; - domain = talloc(context, struct domain); + domain = talloc_zero(context, struct domain); if (!domain) { errno = ENOMEM; return NULL; @@ -392,9 +399,6 @@ static int new_domain(struct domain *domain, int port, bool restore) domain->conn->domain = domain; domain->conn->id = domain->domid; - domain->nbentry = 0; - domain->nbwatch = 0; - return 0; } @@ -938,6 +942,28 @@ int domain_watch(struct connection *conn) : 0; } +void domain_outstanding_inc(struct connection *conn) +{ + if (!conn || !conn->domain) + return; + conn->domain->nboutstanding++; +} + +void domain_outstanding_dec(struct connection *conn) +{ + if (!conn || !conn->domain) + return; + conn->domain->nboutstanding--; +} + +void domain_outstanding_domid_dec(unsigned int domid) +{ + struct domain *d = find_domain_by_domid(domid); + + if (d) + d->nboutstanding--; +} + static wrl_creditt wrl_config_writecost = WRL_FACTOR; static wrl_creditt wrl_config_rate = WRL_RATE * WRL_FACTOR; static wrl_creditt wrl_config_dburst = WRL_DBURST * WRL_FACTOR; diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h index 1e929b8f8c6f..4f51b005291a 100644 --- a/tools/xenstore/xenstored_domain.h +++ b/tools/xenstore/xenstored_domain.h @@ -64,6 +64,9 @@ int domain_entry(struct connection *conn); void domain_watch_inc(struct connection *conn); void domain_watch_dec(struct connection *conn); int domain_watch(struct connection *conn); +void domain_outstanding_inc(struct connection *conn); +void domain_outstanding_dec(struct connection *conn); +void domain_outstanding_domid_dec(unsigned int domid); /* Special node permission handling. */ int set_perms_special(struct connection *conn, const char *name, diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c index 205d9d8ea116..0755ffa375ba 100644 --- a/tools/xenstore/xenstored_watch.c +++ b/tools/xenstore/xenstored_watch.c @@ -142,6 +142,7 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name, struct node *node, bool exact, struct node_perms *perms) { struct connection *i; + struct buffered_data *req; struct watch *watch; /* During transactions, don't fire watches, but queue them. */ @@ -150,6 +151,8 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name, return; } + req = domain_is_unprivileged(conn) ? conn->in : NULL; + /* Create an event for each watch. */ list_for_each_entry(i, &connections, list) { /* introduce/release domain watches */ @@ -164,12 +167,12 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name, list_for_each_entry(watch, &i->watches, list) { if (exact) { if (streq(name, watch->node)) - send_event(i, + send_event(req, i, get_watch_path(watch, name), watch->token); } else { if (is_child(name, watch->node)) - send_event(i, + send_event(req, i, get_watch_path(watch, name), watch->token); } @@ -269,8 +272,12 @@ int do_watch(struct connection *conn, struct buffered_data *in) trace_create(watch, "watch"); send_ack(conn, XS_WATCH); - /* We fire once up front: simplifies clients and restart. */ - send_event(conn, get_watch_path(watch, watch->node), watch->token); + /* + * We fire once up front: simplifies clients and restart. + * This event will not be linked to the XS_WATCH request. + */ + send_event(NULL, conn, get_watch_path(watch, watch->node), + watch->token); return 0; }