2f9efa0
From patchwork Thu Apr 25 07:33:19 2019
2f9efa0
Content-Type: text/plain; charset="utf-8"
2f9efa0
MIME-Version: 1.0
2f9efa0
Content-Transfer-Encoding: 7bit
2f9efa0
X-Patchwork-Submitter: Jason Wang <jasowang@redhat.com>
2f9efa0
X-Patchwork-Id: 10916185
2f9efa0
Return-Path: <kvm-owner@kernel.org>
2f9efa0
Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org
2f9efa0
 [172.30.200.125])
2f9efa0
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E4F501575
2f9efa0
	for <patchwork-kvm@patchwork.kernel.org>;
2f9efa0
 Thu, 25 Apr 2019 07:33:33 +0000 (UTC)
2f9efa0
Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1])
2f9efa0
	by mail.wl.linuxfoundation.org (Postfix) with ESMTP id D276828BD7
2f9efa0
	for <patchwork-kvm@patchwork.kernel.org>;
2f9efa0
 Thu, 25 Apr 2019 07:33:33 +0000 (UTC)
2f9efa0
Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486)
2f9efa0
	id C64AC28BE1; Thu, 25 Apr 2019 07:33:33 +0000 (UTC)
2f9efa0
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on
2f9efa0
	pdx-wl-mail.web.codeaurora.org
2f9efa0
X-Spam-Level: 
2f9efa0
X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI,
2f9efa0
	RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1
2f9efa0
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
2f9efa0
	by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 590B228BD7
2f9efa0
	for <patchwork-kvm@patchwork.kernel.org>;
2f9efa0
 Thu, 25 Apr 2019 07:33:33 +0000 (UTC)
2f9efa0
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
2f9efa0
        id S1726957AbfDYHd1 (ORCPT
2f9efa0
        <rfc822;patchwork-kvm@patchwork.kernel.org>);
2f9efa0
        Thu, 25 Apr 2019 03:33:27 -0400
2f9efa0
Received: from mx1.redhat.com ([209.132.183.28]:60130 "EHLO mx1.redhat.com"
2f9efa0
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
2f9efa0
        id S1726317AbfDYHd1 (ORCPT <rfc822;kvm@vger.kernel.org>);
2f9efa0
        Thu, 25 Apr 2019 03:33:27 -0400
2f9efa0
Received: from smtp.corp.redhat.com (int-mx07.intmail.prod.int.phx2.redhat.com
2f9efa0
 [10.5.11.22])
2f9efa0
        (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
2f9efa0
        (No client certificate requested)
2f9efa0
        by mx1.redhat.com (Postfix) with ESMTPS id C2BCE3002619;
2f9efa0
        Thu, 25 Apr 2019 07:33:26 +0000 (UTC)
2f9efa0
Received: from hp-dl380pg8-02.lab.eng.pek2.redhat.com
2f9efa0
 (hp-dl380pg8-02.lab.eng.pek2.redhat.com [10.73.8.12])
2f9efa0
        by smtp.corp.redhat.com (Postfix) with ESMTP id 5DA021001DDB;
2f9efa0
        Thu, 25 Apr 2019 07:33:21 +0000 (UTC)
2f9efa0
From: Jason Wang <jasowang@redhat.com>
2f9efa0
To: mst@redhat.com, jasowang@redhat.com, kvm@vger.kernel.org,
2f9efa0
        virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
2f9efa0
        linux-kernel@vger.kernel.org
2f9efa0
Cc: ppandit@redhat.com
2f9efa0
Subject: [PATCH net] vhost_net: fix possible infinite loop
2f9efa0
Date: Thu, 25 Apr 2019 03:33:19 -0400
2f9efa0
Message-Id: <1556177599-56248-1-git-send-email-jasowang@redhat.com>
2f9efa0
X-Scanned-By: MIMEDefang 2.84 on 10.5.11.22
2f9efa0
X-Greylist: Sender IP whitelisted,
2f9efa0
 not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.40]);
2f9efa0
 Thu, 25 Apr 2019 07:33:26 +0000 (UTC)
2f9efa0
Sender: kvm-owner@vger.kernel.org
2f9efa0
Precedence: bulk
2f9efa0
List-ID: <kvm.vger.kernel.org>
2f9efa0
X-Mailing-List: kvm@vger.kernel.org
2f9efa0
X-Virus-Scanned: ClamAV using ClamSMTP
2f9efa0
2f9efa0
When the rx buffer is too small for a packet, we will discard the vq
2f9efa0
descriptor and retry it for the next packet:
2f9efa0
2f9efa0
while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
2f9efa0
					      &busyloop_intr))) {
2f9efa0
...
2f9efa0
	/* On overrun, truncate and discard */
2f9efa0
	if (unlikely(headcount > UIO_MAXIOV)) {
2f9efa0
		iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
2f9efa0
		err = sock->ops->recvmsg(sock, &msg,
2f9efa0
					 1, MSG_DONTWAIT | MSG_TRUNC);
2f9efa0
		pr_debug("Discarded rx packet: len %zd\n", sock_len);
2f9efa0
		continue;
2f9efa0
	}
2f9efa0
...
2f9efa0
}
2f9efa0
2f9efa0
This makes it possible to trigger a infinite while..continue loop
2f9efa0
through the co-opreation of two VMs like:
2f9efa0
2f9efa0
1) Malicious VM1 allocate 1 byte rx buffer and try to slow down the
2f9efa0
   vhost process as much as possible e.g using indirect descriptors or
2f9efa0
   other.
2f9efa0
2) Malicious VM2 generate packets to VM1 as fast as possible
2f9efa0
2f9efa0
Fixing this by checking against weight at the end of RX and TX
2f9efa0
loop. This also eliminate other similar cases when:
2f9efa0
2f9efa0
- userspace is consuming the packets in the meanwhile
2f9efa0
- theoretical TOCTOU attack if guest moving avail index back and forth
2f9efa0
  to hit the continue after vhost find guest just add new buffers
2f9efa0
2f9efa0
This addresses CVE-2019-3900.
2f9efa0
2f9efa0
Fixes: d8316f3991d20 ("vhost: fix total length when packets are too short")
2f9efa0
Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
2f9efa0
Signed-off-by: Jason Wang <jasowang@redhat.com>
2f9efa0
---
2f9efa0
 drivers/vhost/net.c | 41 +++++++++++++++++++++--------------------
2f9efa0
 1 file changed, 21 insertions(+), 20 deletions(-)
2f9efa0
2f9efa0
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
2f9efa0
index df51a35..fb46e6b 100644
2f9efa0
--- a/drivers/vhost/net.c
2f9efa0
+++ b/drivers/vhost/net.c
2f9efa0
@@ -778,8 +778,9 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
2f9efa0
 	int err;
2f9efa0
 	int sent_pkts = 0;
2f9efa0
 	bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
2f9efa0
+	bool next_round = false;
2f9efa0
 
2f9efa0
-	for (;;) {
2f9efa0
+	do {
2f9efa0
 		bool busyloop_intr = false;
2f9efa0
 
2f9efa0
 		if (nvq->done_idx == VHOST_NET_BATCH)
2f9efa0
@@ -845,11 +846,10 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
2f9efa0
 		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
2f9efa0
 		vq->heads[nvq->done_idx].len = 0;
2f9efa0
 		++nvq->done_idx;
2f9efa0
-		if (vhost_exceeds_weight(++sent_pkts, total_len)) {
2f9efa0
-			vhost_poll_queue(&vq->poll);
2f9efa0
-			break;
2f9efa0
-		}
2f9efa0
-	}
2f9efa0
+	} while (!(next_round = vhost_exceeds_weight(++sent_pkts, total_len)));
2f9efa0
+
2f9efa0
+	if (next_round)
2f9efa0
+		vhost_poll_queue(&vq->poll);
2f9efa0
 
2f9efa0
 	vhost_tx_batch(net, nvq, sock, &msg;;
2f9efa0
 }
2f9efa0
@@ -873,8 +873,9 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
2f9efa0
 	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
2f9efa0
 	bool zcopy_used;
2f9efa0
 	int sent_pkts = 0;
2f9efa0
+	bool next_round = false;
2f9efa0
 
2f9efa0
-	for (;;) {
2f9efa0
+	do {
2f9efa0
 		bool busyloop_intr;
2f9efa0
 
2f9efa0
 		/* Release DMAs done buffers first */
2f9efa0
@@ -951,11 +952,10 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
2f9efa0
 		else
2f9efa0
 			vhost_zerocopy_signal_used(net, vq);
2f9efa0
 		vhost_net_tx_packet(net);
2f9efa0
-		if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) {
2f9efa0
-			vhost_poll_queue(&vq->poll);
2f9efa0
-			break;
2f9efa0
-		}
2f9efa0
-	}
2f9efa0
+	} while (!(next_round = vhost_exceeds_weight(++sent_pkts, total_len)));
2f9efa0
+
2f9efa0
+	if (next_round)
2f9efa0
+		vhost_poll_queue(&vq->poll);
2f9efa0
 }
2f9efa0
 
2f9efa0
 /* Expects to be always run from workqueue - which acts as
2f9efa0
@@ -1134,6 +1134,7 @@ static void handle_rx(struct vhost_net *net)
2f9efa0
 	struct iov_iter fixup;
2f9efa0
 	__virtio16 num_buffers;
2f9efa0
 	int recv_pkts = 0;
2f9efa0
+	bool next_round = false;
2f9efa0
 
2f9efa0
 	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX);
2f9efa0
 	sock = vq->private_data;
2f9efa0
@@ -1153,8 +1154,11 @@ static void handle_rx(struct vhost_net *net)
2f9efa0
 		vq->log : NULL;
2f9efa0
 	mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
2f9efa0
 
2f9efa0
-	while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
2f9efa0
-						      &busyloop_intr))) {
2f9efa0
+	do {
2f9efa0
+		sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
2f9efa0
+						      &busyloop_intr);
2f9efa0
+		if (!sock_len)
2f9efa0
+			break;
2f9efa0
 		sock_len += sock_hlen;
2f9efa0
 		vhost_len = sock_len + vhost_hlen;
2f9efa0
 		headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
2f9efa0
@@ -1239,12 +1243,9 @@ static void handle_rx(struct vhost_net *net)
2f9efa0
 			vhost_log_write(vq, vq_log, log, vhost_len,
2f9efa0
 					vq->iov, in);
2f9efa0
 		total_len += vhost_len;
2f9efa0
-		if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) {
2f9efa0
-			vhost_poll_queue(&vq->poll);
2f9efa0
-			goto out;
2f9efa0
-		}
2f9efa0
-	}
2f9efa0
-	if (unlikely(busyloop_intr))
2f9efa0
+	} while (!(next_round = vhost_exceeds_weight(++recv_pkts, total_len)));
2f9efa0
+
2f9efa0
+	if (unlikely(busyloop_intr || next_round))
2f9efa0
 		vhost_poll_queue(&vq->poll);
2f9efa0
 	else
2f9efa0
 		vhost_net_enable_vq(net, vq);