20acea1
From patchwork Thu Apr 25 07:33:19 2019
20acea1
Content-Type: text/plain; charset="utf-8"
20acea1
MIME-Version: 1.0
20acea1
Content-Transfer-Encoding: 7bit
20acea1
X-Patchwork-Submitter: Jason Wang <jasowang@redhat.com>
20acea1
X-Patchwork-Id: 10916185
20acea1
Return-Path: <kvm-owner@kernel.org>
20acea1
Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org
20acea1
 [172.30.200.125])
20acea1
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E4F501575
20acea1
	for <patchwork-kvm@patchwork.kernel.org>;
20acea1
 Thu, 25 Apr 2019 07:33:33 +0000 (UTC)
20acea1
Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1])
20acea1
	by mail.wl.linuxfoundation.org (Postfix) with ESMTP id D276828BD7
20acea1
	for <patchwork-kvm@patchwork.kernel.org>;
20acea1
 Thu, 25 Apr 2019 07:33:33 +0000 (UTC)
20acea1
Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486)
20acea1
	id C64AC28BE1; Thu, 25 Apr 2019 07:33:33 +0000 (UTC)
20acea1
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on
20acea1
	pdx-wl-mail.web.codeaurora.org
20acea1
X-Spam-Level: 
20acea1
X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI,
20acea1
	RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1
20acea1
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
20acea1
	by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 590B228BD7
20acea1
	for <patchwork-kvm@patchwork.kernel.org>;
20acea1
 Thu, 25 Apr 2019 07:33:33 +0000 (UTC)
20acea1
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
20acea1
        id S1726957AbfDYHd1 (ORCPT
20acea1
        <rfc822;patchwork-kvm@patchwork.kernel.org>);
20acea1
        Thu, 25 Apr 2019 03:33:27 -0400
20acea1
Received: from mx1.redhat.com ([209.132.183.28]:60130 "EHLO mx1.redhat.com"
20acea1
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
20acea1
        id S1726317AbfDYHd1 (ORCPT <rfc822;kvm@vger.kernel.org>);
20acea1
        Thu, 25 Apr 2019 03:33:27 -0400
20acea1
Received: from smtp.corp.redhat.com (int-mx07.intmail.prod.int.phx2.redhat.com
20acea1
 [10.5.11.22])
20acea1
        (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
20acea1
        (No client certificate requested)
20acea1
        by mx1.redhat.com (Postfix) with ESMTPS id C2BCE3002619;
20acea1
        Thu, 25 Apr 2019 07:33:26 +0000 (UTC)
20acea1
Received: from hp-dl380pg8-02.lab.eng.pek2.redhat.com
20acea1
 (hp-dl380pg8-02.lab.eng.pek2.redhat.com [10.73.8.12])
20acea1
        by smtp.corp.redhat.com (Postfix) with ESMTP id 5DA021001DDB;
20acea1
        Thu, 25 Apr 2019 07:33:21 +0000 (UTC)
20acea1
From: Jason Wang <jasowang@redhat.com>
20acea1
To: mst@redhat.com, jasowang@redhat.com, kvm@vger.kernel.org,
20acea1
        virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
20acea1
        linux-kernel@vger.kernel.org
20acea1
Cc: ppandit@redhat.com
20acea1
Subject: [PATCH net] vhost_net: fix possible infinite loop
20acea1
Date: Thu, 25 Apr 2019 03:33:19 -0400
20acea1
Message-Id: <1556177599-56248-1-git-send-email-jasowang@redhat.com>
20acea1
X-Scanned-By: MIMEDefang 2.84 on 10.5.11.22
20acea1
X-Greylist: Sender IP whitelisted,
20acea1
 not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.40]);
20acea1
 Thu, 25 Apr 2019 07:33:26 +0000 (UTC)
20acea1
Sender: kvm-owner@vger.kernel.org
20acea1
Precedence: bulk
20acea1
List-ID: <kvm.vger.kernel.org>
20acea1
X-Mailing-List: kvm@vger.kernel.org
20acea1
X-Virus-Scanned: ClamAV using ClamSMTP
20acea1
20acea1
When the rx buffer is too small for a packet, we will discard the vq
20acea1
descriptor and retry it for the next packet:
20acea1
20acea1
while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
20acea1
					      &busyloop_intr))) {
20acea1
...
20acea1
	/* On overrun, truncate and discard */
20acea1
	if (unlikely(headcount > UIO_MAXIOV)) {
20acea1
		iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
20acea1
		err = sock->ops->recvmsg(sock, &msg,
20acea1
					 1, MSG_DONTWAIT | MSG_TRUNC);
20acea1
		pr_debug("Discarded rx packet: len %zd\n", sock_len);
20acea1
		continue;
20acea1
	}
20acea1
...
20acea1
}
20acea1
20acea1
This makes it possible to trigger a infinite while..continue loop
20acea1
through the co-opreation of two VMs like:
20acea1
20acea1
1) Malicious VM1 allocate 1 byte rx buffer and try to slow down the
20acea1
   vhost process as much as possible e.g using indirect descriptors or
20acea1
   other.
20acea1
2) Malicious VM2 generate packets to VM1 as fast as possible
20acea1
20acea1
Fixing this by checking against weight at the end of RX and TX
20acea1
loop. This also eliminate other similar cases when:
20acea1
20acea1
- userspace is consuming the packets in the meanwhile
20acea1
- theoretical TOCTOU attack if guest moving avail index back and forth
20acea1
  to hit the continue after vhost find guest just add new buffers
20acea1
20acea1
This addresses CVE-2019-3900.
20acea1
20acea1
Fixes: d8316f3991d20 ("vhost: fix total length when packets are too short")
20acea1
Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
20acea1
Signed-off-by: Jason Wang <jasowang@redhat.com>
20acea1
---
20acea1
 drivers/vhost/net.c | 41 +++++++++++++++++++++--------------------
20acea1
 1 file changed, 21 insertions(+), 20 deletions(-)
20acea1
20acea1
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
20acea1
index df51a35..fb46e6b 100644
20acea1
--- a/drivers/vhost/net.c
20acea1
+++ b/drivers/vhost/net.c
20acea1
@@ -778,8 +778,9 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
20acea1
 	int err;
20acea1
 	int sent_pkts = 0;
20acea1
 	bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
20acea1
+	bool next_round = false;
20acea1
 
20acea1
-	for (;;) {
20acea1
+	do {
20acea1
 		bool busyloop_intr = false;
20acea1
 
20acea1
 		if (nvq->done_idx == VHOST_NET_BATCH)
20acea1
@@ -845,11 +846,10 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
20acea1
 		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
20acea1
 		vq->heads[nvq->done_idx].len = 0;
20acea1
 		++nvq->done_idx;
20acea1
-		if (vhost_exceeds_weight(++sent_pkts, total_len)) {
20acea1
-			vhost_poll_queue(&vq->poll);
20acea1
-			break;
20acea1
-		}
20acea1
-	}
20acea1
+	} while (!(next_round = vhost_exceeds_weight(++sent_pkts, total_len)));
20acea1
+
20acea1
+	if (next_round)
20acea1
+		vhost_poll_queue(&vq->poll);
20acea1
 
20acea1
 	vhost_tx_batch(net, nvq, sock, &msg;;
20acea1
 }
20acea1
@@ -873,8 +873,9 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
20acea1
 	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
20acea1
 	bool zcopy_used;
20acea1
 	int sent_pkts = 0;
20acea1
+	bool next_round = false;
20acea1
 
20acea1
-	for (;;) {
20acea1
+	do {
20acea1
 		bool busyloop_intr;
20acea1
 
20acea1
 		/* Release DMAs done buffers first */
20acea1
@@ -951,11 +952,10 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
20acea1
 		else
20acea1
 			vhost_zerocopy_signal_used(net, vq);
20acea1
 		vhost_net_tx_packet(net);
20acea1
-		if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) {
20acea1
-			vhost_poll_queue(&vq->poll);
20acea1
-			break;
20acea1
-		}
20acea1
-	}
20acea1
+	} while (!(next_round = vhost_exceeds_weight(++sent_pkts, total_len)));
20acea1
+
20acea1
+	if (next_round)
20acea1
+		vhost_poll_queue(&vq->poll);
20acea1
 }
20acea1
 
20acea1
 /* Expects to be always run from workqueue - which acts as
20acea1
@@ -1134,6 +1134,7 @@ static void handle_rx(struct vhost_net *net)
20acea1
 	struct iov_iter fixup;
20acea1
 	__virtio16 num_buffers;
20acea1
 	int recv_pkts = 0;
20acea1
+	bool next_round = false;
20acea1
 
20acea1
 	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX);
20acea1
 	sock = vq->private_data;
20acea1
@@ -1153,8 +1154,11 @@ static void handle_rx(struct vhost_net *net)
20acea1
 		vq->log : NULL;
20acea1
 	mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
20acea1
 
20acea1
-	while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
20acea1
-						      &busyloop_intr))) {
20acea1
+	do {
20acea1
+		sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
20acea1
+						      &busyloop_intr);
20acea1
+		if (!sock_len)
20acea1
+			break;
20acea1
 		sock_len += sock_hlen;
20acea1
 		vhost_len = sock_len + vhost_hlen;
20acea1
 		headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
20acea1
@@ -1239,12 +1243,9 @@ static void handle_rx(struct vhost_net *net)
20acea1
 			vhost_log_write(vq, vq_log, log, vhost_len,
20acea1
 					vq->iov, in);
20acea1
 		total_len += vhost_len;
20acea1
-		if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) {
20acea1
-			vhost_poll_queue(&vq->poll);
20acea1
-			goto out;
20acea1
-		}
20acea1
-	}
20acea1
-	if (unlikely(busyloop_intr))
20acea1
+	} while (!(next_round = vhost_exceeds_weight(++recv_pkts, total_len)));
20acea1
+
20acea1
+	if (unlikely(busyloop_intr || next_round))
20acea1
 		vhost_poll_queue(&vq->poll);
20acea1
 	else
20acea1
 		vhost_net_enable_vq(net, vq);