Blob Blame History Raw
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Mon, 15 Dec 2014 09:24:13 +0200
Subject: gre: fix the inner mac header in nbma tunnel xmit path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>

[ Upstream commit 8a0033a947403569caeca45fa5e6f7ba60d51974 ]

The NBMA GRE tunnels temporarily push GRE header that contain the
per-packet NBMA destination on the skb via header ops early in xmit
path. It is the later pulled before the real GRE header is constructed.

The inner mac was thus set differently in nbma case: the GRE header
has been pushed by neighbor layer, and mac header points to beginning
of the temporary gre header (set by dev_queue_xmit).

Now that the offloads expect mac header to point to the gre payload,
fix the xmit patch to:
 - pull first the temporary gre header away
 - and reset mac header to point to gre payload

This fixes tso to work again with nbma tunnels.

Fixes: 14051f0452a2 ("gre: Use inner mac length when computing tunnel length")
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Cc: Tom Herbert <therbert@google.com>
Cc: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_gre.c |    9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -252,10 +252,6 @@ static netdev_tx_t ipgre_xmit(struct sk_
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr *tnl_params;
 
-	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
-	if (IS_ERR(skb))
-		goto out;
-
 	if (dev->header_ops) {
 		/* Need space for new headers */
 		if (skb_cow_head(skb, dev->needed_headroom -
@@ -268,6 +264,7 @@ static netdev_tx_t ipgre_xmit(struct sk_
 		 * to gre header.
 		 */
 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+		skb_reset_mac_header(skb);
 	} else {
 		if (skb_cow_head(skb, dev->needed_headroom))
 			goto free_skb;
@@ -275,6 +272,10 @@ static netdev_tx_t ipgre_xmit(struct sk_
 		tnl_params = &tunnel->parms.iph;
 	}
 
+	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
+	if (IS_ERR(skb))
+		goto out;
+
 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
 
 	return NETDEV_TX_OK;
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Ido Shamay <idos@mellanox.com>
Date: Tue, 16 Dec 2014 13:28:54 +0200
Subject: net/mlx4: Cache line CQE/EQE stride fixes

From: Ido Shamay <idos@mellanox.com>

[ Upstream commit c3f2511feac088030055012cc8f64ebd84c87dbc ]

This commit contains 2 fixes for the 128B CQE/EQE stride feaure.
Wei found that mlx4_QUERY_HCA function marked the wrong capability
in flags (64B CQE/EQE), when CQE/EQE stride feature was enabled.
Also added small fix in initial CQE ownership bit assignment, when CQE
is size is not default 32B.

Fixes: 77507aa24 (net/mlx4: Enable CQE/EQE stride support)
Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   11 +++++++++--
 drivers/net/ethernet/mellanox/mlx4/fw.c        |    4 ++--
 2 files changed, 11 insertions(+), 4 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1569,8 +1569,15 @@ int mlx4_en_start_port(struct net_device
 			mlx4_en_free_affinity_hint(priv, i);
 			goto cq_err;
 		}
-		for (j = 0; j < cq->size; j++)
-			cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK;
+
+		for (j = 0; j < cq->size; j++) {
+			struct mlx4_cqe *cqe = NULL;
+
+			cqe = mlx4_en_get_cqe(cq->buf, j, priv->cqe_size) +
+			      priv->cqe_factor;
+			cqe->owner_sr_opcode = MLX4_CQE_OWNER_MASK;
+		}
+
 		err = mlx4_en_set_cq_moder(priv, cq);
 		if (err) {
 			en_err(priv, "Failed setting cq moderation parameters\n");
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -1647,8 +1647,8 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 	/* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
 	MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET);
 	if (byte_field) {
-		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
-		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+		param->dev_cap_enabled |= MLX4_DEV_CAP_EQE_STRIDE_ENABLED;
+		param->dev_cap_enabled |= MLX4_DEV_CAP_CQE_STRIDE_ENABLED;
 		param->cqe_size = 1 << ((byte_field &
 					 MLX4_CQE_SIZE_MASK_STRIDE) + 5);
 		param->eqe_size = 1 << (((byte_field &
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: David Miller <davem@davemloft.net>
Date: Tue, 16 Dec 2014 17:58:17 -0500
Subject: netlink: Always copy on mmap TX.

From: David Miller <davem@davemloft.net>

[ Upstream commit 4682a0358639b29cf69437ed909c6221f8c89847 ]

Checking the file f_count and the nlk->mapped count is not completely
sufficient to prevent the mmap'd area contents from changing from
under us during netlink mmap sendmsg() operations.

Be careful to sample the header's length field only once, because this
could change from under us as well.

Fixes: 5fd96123ee19 ("netlink: implement memory mapped sendmsg()")
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c |   52 ++++++++++++++---------------------------------
 1 file changed, 16 insertions(+), 36 deletions(-)

--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -526,14 +526,14 @@ out:
 	return err;
 }
 
-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
+static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
 {
 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
 	struct page *p_start, *p_end;
 
 	/* First page is flushed through netlink_{get,set}_status */
 	p_start = pgvec_to_page(hdr + PAGE_SIZE);
-	p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
+	p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
 	while (p_start <= p_end) {
 		flush_dcache_page(p_start);
 		p_start++;
@@ -715,24 +715,16 @@ static int netlink_mmap_sendmsg(struct s
 	struct nl_mmap_hdr *hdr;
 	struct sk_buff *skb;
 	unsigned int maxlen;
-	bool excl = true;
 	int err = 0, len = 0;
 
-	/* Netlink messages are validated by the receiver before processing.
-	 * In order to avoid userspace changing the contents of the message
-	 * after validation, the socket and the ring may only be used by a
-	 * single process, otherwise we fall back to copying.
-	 */
-	if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 ||
-	    atomic_read(&nlk->mapped) > 1)
-		excl = false;
-
 	mutex_lock(&nlk->pg_vec_lock);
 
 	ring   = &nlk->tx_ring;
 	maxlen = ring->frame_size - NL_MMAP_HDRLEN;
 
 	do {
+		unsigned int nm_len;
+
 		hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
 		if (hdr == NULL) {
 			if (!(msg->msg_flags & MSG_DONTWAIT) &&
@@ -740,35 +732,23 @@ static int netlink_mmap_sendmsg(struct s
 				schedule();
 			continue;
 		}
-		if (hdr->nm_len > maxlen) {
+
+		nm_len = ACCESS_ONCE(hdr->nm_len);
+		if (nm_len > maxlen) {
 			err = -EINVAL;
 			goto out;
 		}
 
-		netlink_frame_flush_dcache(hdr);
+		netlink_frame_flush_dcache(hdr, nm_len);
 
-		if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
-			skb = alloc_skb_head(GFP_KERNEL);
-			if (skb == NULL) {
-				err = -ENOBUFS;
-				goto out;
-			}
-			sock_hold(sk);
-			netlink_ring_setup_skb(skb, sk, ring, hdr);
-			NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
-			__skb_put(skb, hdr->nm_len);
-			netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
-			atomic_inc(&ring->pending);
-		} else {
-			skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
-			if (skb == NULL) {
-				err = -ENOBUFS;
-				goto out;
-			}
-			__skb_put(skb, hdr->nm_len);
-			memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
-			netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+		skb = alloc_skb(nm_len, GFP_KERNEL);
+		if (skb == NULL) {
+			err = -ENOBUFS;
+			goto out;
 		}
+		__skb_put(skb, nm_len);
+		memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
+		netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
 
 		netlink_increment_head(ring);
 
@@ -814,7 +794,7 @@ static void netlink_queue_mmaped_skb(str
 	hdr->nm_pid	= NETLINK_CB(skb).creds.pid;
 	hdr->nm_uid	= from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
 	hdr->nm_gid	= from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
-	netlink_frame_flush_dcache(hdr);
+	netlink_frame_flush_dcache(hdr, hdr->nm_len);
 	netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
 
 	NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 18 Dec 2014 10:30:26 +0000
Subject: netlink: Don't reorder loads/stores before marking mmap netlink frame as available

From: Thomas Graf <tgraf@suug.ch>

[ Upstream commit a18e6a186f53af06937a2c268c72443336f4ab56 ]

Each mmap Netlink frame contains a status field which indicates
whether the frame is unused, reserved, contains data or needs to
be skipped. Both loads and stores may not be reordeded and must
complete before the status field is changed and another CPU might
pick up the frame for use. Use an smp_mb() to cover needs of both
types of callers to netlink_set_status(), callers which have been
reading data frame from the frame, and callers which have been
filling or releasing and thus writing to the frame.

- Example code path requiring a smp_rmb():
  memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
  netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);

- Example code path requiring a smp_wmb():
  hdr->nm_uid	= from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
  hdr->nm_gid	= from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
  netlink_frame_flush_dcache(hdr);
  netlink_set_status(hdr, NL_MMAP_STATUS_VALID);

Fixes: f9c228 ("netlink: implement memory mapped recvmsg()")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -551,9 +551,9 @@ static enum nl_mmap_status netlink_get_s
 static void netlink_set_status(struct nl_mmap_hdr *hdr,
 			       enum nl_mmap_status status)
 {
+	smp_mb();
 	hdr->nm_status = status;
 	flush_dcache_page(pgvec_to_page(hdr));
-	smp_wmb();
 }
 
 static struct nl_mmap_hdr *
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 16 Dec 2014 18:25:31 -0800
Subject: geneve: Remove socket and offload handlers at destruction.

From: Jesse Gross <jesse@nicira.com>

[ Upstream commit 7ed767f73192d6daf673c6d885cd02d5f280ac1f ]

Sockets aren't currently removed from the the global list when
they are destroyed. In addition, offload handlers need to be cleaned
up as well.

Fixes: 0b5e8b8e ("net: Add Geneve tunneling protocol driver")
CC: Andy Zhou <azhou@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/geneve.c |   17 +++++++++++++++++
 1 file changed, 17 insertions(+)

--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -165,6 +165,15 @@ static void geneve_notify_add_rx_port(st
 	}
 }
 
+static void geneve_notify_del_rx_port(struct geneve_sock *gs)
+{
+	struct sock *sk = gs->sock->sk;
+	sa_family_t sa_family = sk->sk_family;
+
+	if (sa_family == AF_INET)
+		udp_del_offload(&gs->udp_offloads);
+}
+
 /* Callback from net/ipv4/udp.c to receive packets */
 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
@@ -318,9 +327,17 @@ EXPORT_SYMBOL_GPL(geneve_sock_add);
 
 void geneve_sock_release(struct geneve_sock *gs)
 {
+	struct net *net = sock_net(gs->sock->sk);
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
+
 	if (!atomic_dec_and_test(&gs->refcnt))
 		return;
 
+	spin_lock(&gn->sock_lock);
+	hlist_del_rcu(&gs->hlist);
+	geneve_notify_del_rx_port(gs);
+	spin_unlock(&gn->sock_lock);
+
 	queue_work(geneve_wq, &gs->del_work);
 }
 EXPORT_SYMBOL_GPL(geneve_sock_release);
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 16 Dec 2014 18:25:32 -0800
Subject: geneve: Fix races between socket add and release.

From: Jesse Gross <jesse@nicira.com>

[ Upstream commit 12069401d895ff84076a50189ca842c0696b84b2 ]

Currently, searching for a socket to add a reference to is not
synchronized with deletion of sockets. This can result in use
after free if there is another operation that is removing a
socket at the same time. Solving this requires both holding the
appropriate lock and checking the refcount to ensure that it
has not already hit zero.

Inspired by a related (but not exactly the same) issue in the
VXLAN driver.

Fixes: 0b5e8b8e ("net: Add Geneve tunneling protocol driver")
CC: Andy Zhou <azhou@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/geneve.c |   13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -302,6 +302,7 @@ struct geneve_sock *geneve_sock_add(stru
 				    geneve_rcv_t *rcv, void *data,
 				    bool no_share, bool ipv6)
 {
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 
 	gs = geneve_socket_create(net, port, rcv, data, ipv6);
@@ -311,15 +312,15 @@ struct geneve_sock *geneve_sock_add(stru
 	if (no_share)	/* Return error if sharing is not allowed. */
 		return ERR_PTR(-EINVAL);
 
+	spin_lock(&gn->sock_lock);
 	gs = geneve_find_sock(net, port);
-	if (gs) {
-		if (gs->rcv == rcv)
-			atomic_inc(&gs->refcnt);
-		else
+	if (gs && ((gs->rcv != rcv) ||
+		   !atomic_add_unless(&gs->refcnt, 1, 0)))
 			gs = ERR_PTR(-EBUSY);
-	} else {
+	spin_unlock(&gn->sock_lock);
+
+	if (!gs)
 		gs = ERR_PTR(-EINVAL);
-	}
 
 	return gs;
 }
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: David Vrabel <david.vrabel@citrix.com>
Date: Thu, 18 Dec 2014 11:13:06 +0000
Subject: xen-netback: support frontends without feature-rx-notify again

From: David Vrabel <david.vrabel@citrix.com>

[ Upstram commit 26c0e102585d5a4d311f5d6eb7f524d288e7f6b7 ]

Commit bc96f648df1bbc2729abbb84513cf4f64273a1f1 (xen-netback: make
feature-rx-notify mandatory) incorrectly assumed that there were no
frontends in use that did not support this feature.  But the frontend
driver in MiniOS does not and since this is used by (qemu) stubdoms,
these stopped working.

Netback sort of works as-is in this mode except:

- If there are no Rx requests and the internal Rx queue fills, only
  the drain timeout will wake the thread.  The default drain timeout
  of 10 s would give unacceptable pauses.

- If an Rx stall was detected and the internal Rx queue is drained,
  then the Rx thread would never wake.

Handle these two cases (when feature-rx-notify is disabled) by:

- Reducing the drain timeout to 30 ms.

- Disabling Rx stall detection.

Reported-by: John <jw@nuclearfallout.net>
Tested-by: John <jw@nuclearfallout.net>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/xen-netback/common.h    |    4 +++-
 drivers/net/xen-netback/interface.c |    4 +++-
 drivers/net/xen-netback/netback.c   |   27 ++++++++++++++-------------
 drivers/net/xen-netback/xenbus.c    |   12 +++++++++---
 4 files changed, 29 insertions(+), 18 deletions(-)

--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -230,6 +230,8 @@ struct xenvif {
 	 */
 	bool disabled;
 	unsigned long status;
+	unsigned long drain_timeout;
+	unsigned long stall_timeout;
 
 	/* Queues */
 	struct xenvif_queue *queues;
@@ -328,7 +330,7 @@ irqreturn_t xenvif_interrupt(int irq, vo
 extern bool separate_tx_rx_irq;
 
 extern unsigned int rx_drain_timeout_msecs;
-extern unsigned int rx_drain_timeout_jiffies;
+extern unsigned int rx_stall_timeout_msecs;
 extern unsigned int xenvif_max_queues;
 
 #ifdef CONFIG_DEBUG_FS
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -166,7 +166,7 @@ static int xenvif_start_xmit(struct sk_b
 		goto drop;
 
 	cb = XENVIF_RX_CB(skb);
-	cb->expires = jiffies + rx_drain_timeout_jiffies;
+	cb->expires = jiffies + vif->drain_timeout;
 
 	xenvif_rx_queue_tail(queue, skb);
 	xenvif_kick_thread(queue);
@@ -414,6 +414,8 @@ struct xenvif *xenvif_alloc(struct devic
 	vif->ip_csum = 1;
 	vif->dev = dev;
 	vif->disabled = false;
+	vif->drain_timeout = msecs_to_jiffies(rx_drain_timeout_msecs);
+	vif->stall_timeout = msecs_to_jiffies(rx_stall_timeout_msecs);
 
 	/* Start out with no queues. */
 	vif->queues = NULL;
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -60,14 +60,12 @@ module_param(separate_tx_rx_irq, bool, 0
  */
 unsigned int rx_drain_timeout_msecs = 10000;
 module_param(rx_drain_timeout_msecs, uint, 0444);
-unsigned int rx_drain_timeout_jiffies;
 
 /* The length of time before the frontend is considered unresponsive
  * because it isn't providing Rx slots.
  */
-static unsigned int rx_stall_timeout_msecs = 60000;
+unsigned int rx_stall_timeout_msecs = 60000;
 module_param(rx_stall_timeout_msecs, uint, 0444);
-static unsigned int rx_stall_timeout_jiffies;
 
 unsigned int xenvif_max_queues;
 module_param_named(max_queues, xenvif_max_queues, uint, 0644);
@@ -2022,7 +2020,7 @@ static bool xenvif_rx_queue_stalled(stru
 	return !queue->stalled
 		&& prod - cons < XEN_NETBK_RX_SLOTS_MAX
 		&& time_after(jiffies,
-			      queue->last_rx_time + rx_stall_timeout_jiffies);
+			      queue->last_rx_time + queue->vif->stall_timeout);
 }
 
 static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
@@ -2040,8 +2038,9 @@ static bool xenvif_have_rx_work(struct x
 {
 	return (!skb_queue_empty(&queue->rx_queue)
 		&& xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
-		|| xenvif_rx_queue_stalled(queue)
-		|| xenvif_rx_queue_ready(queue)
+		|| (queue->vif->stall_timeout &&
+		    (xenvif_rx_queue_stalled(queue)
+		     || xenvif_rx_queue_ready(queue)))
 		|| kthread_should_stop()
 		|| queue->vif->disabled;
 }
@@ -2094,6 +2093,9 @@ int xenvif_kthread_guest_rx(void *data)
 	struct xenvif_queue *queue = data;
 	struct xenvif *vif = queue->vif;
 
+	if (!vif->stall_timeout)
+		xenvif_queue_carrier_on(queue);
+
 	for (;;) {
 		xenvif_wait_for_rx_work(queue);
 
@@ -2120,10 +2122,12 @@ int xenvif_kthread_guest_rx(void *data)
 		 * while it's probably not responsive, drop the
 		 * carrier so packets are dropped earlier.
 		 */
-		if (xenvif_rx_queue_stalled(queue))
-			xenvif_queue_carrier_off(queue);
-		else if (xenvif_rx_queue_ready(queue))
-			xenvif_queue_carrier_on(queue);
+		if (vif->stall_timeout) {
+			if (xenvif_rx_queue_stalled(queue))
+				xenvif_queue_carrier_off(queue);
+			else if (xenvif_rx_queue_ready(queue))
+				xenvif_queue_carrier_on(queue);
+		}
 
 		/* Queued packets may have foreign pages from other
 		 * domains.  These cannot be queued indefinitely as
@@ -2194,9 +2198,6 @@ static int __init netback_init(void)
 	if (rc)
 		goto failed_init;
 
-	rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
-	rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs);
-
 #ifdef CONFIG_DEBUG_FS
 	xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
 	if (IS_ERR_OR_NULL(xen_netback_dbg_root))
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -886,9 +886,15 @@ static int read_xenbus_vif_flags(struct
 		return -EOPNOTSUPP;
 
 	if (xenbus_scanf(XBT_NIL, dev->otherend,
-			 "feature-rx-notify", "%d", &val) < 0 || val == 0) {
-		xenbus_dev_fatal(dev, -EINVAL, "feature-rx-notify is mandatory");
-		return -EINVAL;
+			 "feature-rx-notify", "%d", &val) < 0)
+		val = 0;
+	if (!val) {
+		/* - Reduce drain timeout to poll more frequently for
+		 *   Rx requests.
+		 * - Disable Rx stall detection.
+		 */
+		be->vif->drain_timeout = msecs_to_jiffies(30);
+		be->vif->stall_timeout = 0;
 	}
 
 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 19 Dec 2014 11:09:13 +0800
Subject: net: drop the packet when fails to do software segmentation or header check

From: Jason Wang <jasowang@redhat.com>

[ Upstream commit af6dabc9c70ae3f307685b1f32f52d60b1bf0527 ]

Commit cecda693a969816bac5e470e1d9c9c0ef5567bca ("net: keep original skb
which only needs header checking during software GSO") keeps the original
skb for packets that only needs header check, but it doesn't drop the
packet if software segmentation or header check were failed.

Fixes cecda693a9 ("net: keep original skb which only needs header checking during software GSO")
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/dev.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2680,7 +2680,7 @@ static struct sk_buff *validate_xmit_skb
 
 		segs = skb_gso_segment(skb, features);
 		if (IS_ERR(segs)) {
-			segs = NULL;
+			goto out_kfree_skb;
 		} else if (segs) {
 			consume_skb(skb);
 			skb = segs;
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: stephen hemminger <stephen@networkplumber.org>
Date: Sat, 20 Dec 2014 12:15:49 -0800
Subject: in6: fix conflict with glibc

From: stephen hemminger <stephen@networkplumber.org>

[ Upstream commit 6d08acd2d32e3e877579315dc3202d7a5f336d98 ]

Resolve conflicts between glibc definition of IPV6 socket options
and those defined in Linux headers. Looks like earlier efforts to
solve this did not cover all the definitions.

It resolves warnings during iproute2 build.
Please consider for stable as well.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/in6.h         |    3 ++-
 include/uapi/linux/libc-compat.h |    3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -149,7 +149,7 @@ struct in6_flowlabel_req {
 /*
  *	IPV6 socket options
  */
-
+#if __UAPI_DEF_IPV6_OPTIONS
 #define IPV6_ADDRFORM		1
 #define IPV6_2292PKTINFO	2
 #define IPV6_2292HOPOPTS	3
@@ -196,6 +196,7 @@ struct in6_flowlabel_req {
 
 #define IPV6_IPSEC_POLICY	34
 #define IPV6_XFRM_POLICY	35
+#endif
 
 /*
  * Multicast:
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -69,6 +69,7 @@
 #define __UAPI_DEF_SOCKADDR_IN6		0
 #define __UAPI_DEF_IPV6_MREQ		0
 #define __UAPI_DEF_IPPROTO_V6		0
+#define __UAPI_DEF_IPV6_OPTIONS		0
 
 #else
 
@@ -82,6 +83,7 @@
 #define __UAPI_DEF_SOCKADDR_IN6		1
 #define __UAPI_DEF_IPV6_MREQ		1
 #define __UAPI_DEF_IPPROTO_V6		1
+#define __UAPI_DEF_IPV6_OPTIONS		1
 
 #endif /* _NETINET_IN_H */
 
@@ -103,6 +105,7 @@
 #define __UAPI_DEF_SOCKADDR_IN6		1
 #define __UAPI_DEF_IPV6_MREQ		1
 #define __UAPI_DEF_IPPROTO_V6		1
+#define __UAPI_DEF_IPV6_OPTIONS		1
 
 /* Definitions for xattr.h */
 #define __UAPI_DEF_XATTR		1
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Prashant Sreedharan <prashant@broadcom.com>
Date: Sat, 20 Dec 2014 12:16:17 -0800
Subject: tg3: tg3_disable_ints using uninitialized mailbox value to disable interrupts

From: Prashant Sreedharan <prashant@broadcom.com>

[ Upstream commit 05b0aa579397b734f127af58e401a30784a1e315 ]

During driver load in tg3_init_one, if the driver detects DMA activity before
intializing the chip tg3_halt is called. As part of tg3_halt interrupts are
disabled using routine tg3_disable_ints. This routine was using mailbox value
which was not initialized (default value is 0). As a result driver was writing
0x00000001 to pci config space register 0, which is the vendor id / device id.

This driver bug was exposed because of the commit a7877b17a667 (PCI: Check only
the Vendor ID to identify Configuration Request Retry). Also this issue is only
seen in older generation chipsets like 5722 because config space write to offset
0 from driver is possible. The newer generation chips ignore writes to offset 0.
Also without commit a7877b17a667, for these older chips when a GRC reset is
issued the Bootcode would reprogram the vendor id/device id, which is the reason
this bug was masked earlier.

Fixed by initializing the interrupt mailbox registers before calling tg3_halt.

Please queue for -stable.

Reported-by: Nils Holland <nholland@tisys.org>
Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Prashant Sreedharan <prashant@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/tg3.c |   34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -17789,23 +17789,6 @@ static int tg3_init_one(struct pci_dev *
 		goto err_out_apeunmap;
 	}
 
-	/*
-	 * Reset chip in case UNDI or EFI driver did not shutdown
-	 * DMA self test will enable WDMAC and we'll see (spurious)
-	 * pending DMA on the PCI bus at that point.
-	 */
-	if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) ||
-	    (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
-		tw32(MEMARB_MODE, MEMARB_MODE_ENABLE);
-		tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
-	}
-
-	err = tg3_test_dma(tp);
-	if (err) {
-		dev_err(&pdev->dev, "DMA engine test failed, aborting\n");
-		goto err_out_apeunmap;
-	}
-
 	intmbx = MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW;
 	rcvmbx = MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW;
 	sndmbx = MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW;
@@ -17850,6 +17833,23 @@ static int tg3_init_one(struct pci_dev *
 			sndmbx += 0xc;
 	}
 
+	/*
+	 * Reset chip in case UNDI or EFI driver did not shutdown
+	 * DMA self test will enable WDMAC and we'll see (spurious)
+	 * pending DMA on the PCI bus at that point.
+	 */
+	if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) ||
+	    (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
+		tw32(MEMARB_MODE, MEMARB_MODE_ENABLE);
+		tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
+	}
+
+	err = tg3_test_dma(tp);
+	if (err) {
+		dev_err(&pdev->dev, "DMA engine test failed, aborting\n");
+		goto err_out_apeunmap;
+	}
+
 	tg3_init_coal(tp);
 
 	pci_set_drvdata(pdev, dev);
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 20 Dec 2014 13:48:56 +0100
Subject: batman-adv: Unify fragment size calculation

From: Sven Eckelmann <sven@narfation.org>

[ Upstream commit 0402e444cd199389b7fe47be68a67b817e09e097 ]

The fragmentation code was replaced in 610bfc6bc99bc83680d190ebc69359a05fc7f605
("batman-adv: Receive fragmented packets and merge") by an implementation which
can handle up to 16 fragments of a packet. The packet is prepared for the split
in fragments by the function batadv_frag_send_packet and the actual split is
done by batadv_frag_create.

Both functions calculate the size of a fragment themself. But their calculation
differs because batadv_frag_send_packet also subtracts ETH_HLEN. Therefore,
the check in batadv_frag_send_packet "can a full fragment can be created?" may
return true even when batadv_frag_create cannot create a full fragment.

The function batadv_frag_create doesn't check the size of the skb before
splitting it and therefore might try to create a larger fragment than the
remaining buffer. This creates an integer underflow and an invalid len is given
to skb_split.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/fragmentation.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -434,7 +434,7 @@ bool batadv_frag_send_packet(struct sk_b
 	 * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
 	 */
 	mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
-	max_fragment_size = (mtu - header_size - ETH_HLEN);
+	max_fragment_size = mtu - header_size;
 	max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
 
 	/* Don't even try to fragment, if we need more than 16 fragments */
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Sat, 20 Dec 2014 13:48:57 +0100
Subject: batman-adv: avoid NULL dereferences and fix if check

From: Antonio Quartulli <antonio@meshcoding.com>

[ Upstream commit 0d1644919578db525b9a7b6c8197ce02adbfce26 ]

Gateway having bandwidth_down equal to zero are not accepted
at all and so never added to the Gateway list.
For this reason checking the bandwidth_down member in
batadv_gw_out_of_range() is useless.

This is probably a copy/paste error and this check was supposed
to be "!gw_node" only. Moreover, the way the check is written
now may also lead to a NULL dereference.

Fix this by rewriting the if-condition properly.

Introduced by 414254e342a0d58144de40c3da777521ebaeeb07
("batman-adv: tvlv - gateway download/upload bandwidth container")

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/gateway_client.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -810,7 +810,7 @@ bool batadv_gw_out_of_range(struct batad
 		goto out;
 
 	gw_node = batadv_gw_node_get(bat_priv, orig_dst_node);
-	if (!gw_node->bandwidth_down == 0)
+	if (!gw_node)
 		goto out;
 
 	switch (atomic_read(&bat_priv->gw_mode)) {
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Amir Vadai <amirv@mellanox.com>
Date: Mon, 22 Dec 2014 10:21:57 +0200
Subject: net/mlx4_en: Doorbell is byteswapped in Little Endian archs

From: Amir Vadai <amirv@mellanox.com>

[ Upstream commit 492f5add4be84652bbe13da8a250d60c6856a5c5 ]

iowrite32() will byteswap it's argument on big endian archs.
iowrite32be() will byteswap on little endian archs.
Since we don't want to do this unnecessary byteswap on the fast path,
doorbell is stored in the NIC's native endianness. Using the right
iowrite() according to the arch endianness.

CC: Wei Yang <weiyang@linux.vnet.ibm.com>
CC: David Laight <david.laight@aculab.com>
Fixes: 6a4e812 ("net/mlx4_en: Avoid calling bswap in tx fast path")
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c |   12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -954,7 +954,17 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff
 		tx_desc->ctrl.owner_opcode = op_own;
 		if (send_doorbell) {
 			wmb();
-			iowrite32(ring->doorbell_qpn,
+			/* Since there is no iowrite*_native() that writes the
+			 * value as is, without byteswapping - using the one
+			 * the doesn't do byteswapping in the relevant arch
+			 * endianness.
+			 */
+#if defined(__LITTLE_ENDIAN)
+			iowrite32(
+#else
+			iowrite32be(
+#endif
+				  ring->doorbell_qpn,
 				  ring->bf.uar->map + MLX4_SEND_DOORBELL);
 		} else {
 			ring->xmit_more++;
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 22 Dec 2014 18:22:48 +0100
Subject: tcp6: don't move IP6CB before xfrm6_policy_check()

From: Nicolas Dichtel <nicolas.dichtel@6wind.com>

[ Upstream commit 2dc49d1680b534877fd20cce52557ea542bb06b6 ]

When xfrm6_policy_check() is used, _decode_session6() is called after some
intermediate functions. This function uses IP6CB(), thus TCP_SKB_CB() must be
prepared after the call of xfrm6_policy_check().

Before this patch, scenarii with IPv6 + TCP + IPsec Transport are broken.

Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
Reported-by: Huaibin Wang <huaibin.wang@6wind.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/tcp_ipv6.c |   45 +++++++++++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 16 deletions(-)

--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1385,6 +1385,28 @@ ipv6_pktoptions:
 	return 0;
 }
 
+static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
+			   const struct tcphdr *th)
+{
+	/* This is tricky: we move IP6CB at its correct location into
+	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
+	 * _decode_session6() uses IP6CB().
+	 * barrier() makes sure compiler won't play aliasing games.
+	 */
+	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
+		sizeof(struct inet6_skb_parm));
+	barrier();
+
+	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+				    skb->len - th->doff*4);
+	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
+	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
+	TCP_SKB_CB(skb)->sacked = 0;
+}
+
 static int tcp_v6_rcv(struct sk_buff *skb)
 {
 	const struct tcphdr *th;
@@ -1416,24 +1438,9 @@ static int tcp_v6_rcv(struct sk_buff *sk
 
 	th = tcp_hdr(skb);
 	hdr = ipv6_hdr(skb);
-	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
-	 * barrier() makes sure compiler wont play fool^Waliasing games.
-	 */
-	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
-		sizeof(struct inet6_skb_parm));
-	barrier();
-
-	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
-	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
-				    skb->len - th->doff*4);
-	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
-	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
-	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
-	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
-	TCP_SKB_CB(skb)->sacked = 0;
 
 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
-				tcp_v6_iif(skb));
+				inet6_iif(skb));
 	if (!sk)
 		goto no_tcp_socket;
 
@@ -1449,6 +1456,8 @@ process:
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
+	tcp_v6_fill_cb(skb, hdr, th);
+
 #ifdef CONFIG_TCP_MD5SIG
 	if (tcp_v6_inbound_md5_hash(sk, skb))
 		goto discard_and_relse;
@@ -1480,6 +1489,8 @@ no_tcp_socket:
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto discard_it;
 
+	tcp_v6_fill_cb(skb, hdr, th);
+
 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
 csum_error:
 		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
@@ -1503,6 +1514,8 @@ do_time_wait:
 		goto discard_it;
 	}
 
+	tcp_v6_fill_cb(skb, hdr, th);
+
 	if (skb->len < (th->doff<<2)) {
 		inet_twsk_put(inet_twsk(sk));
 		goto bad_packet;
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Mon, 22 Dec 2014 19:04:14 +0900
Subject: net: Fix stacked vlan offload features computation

From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>

[ Upstream commit 796f2da81bead71ffc91ef70912cd8d1827bf756 ]

When vlan tags are stacked, it is very likely that the outer tag is stored
in skb->vlan_tci and skb->protocol shows the inner tag's vlan_proto.
Currently netif_skb_features() first looks at skb->protocol even if there
is the outer tag in vlan_tci, thus it incorrectly retrieves the protocol
encapsulated by the inner vlan instead of the inner vlan protocol.
This allows GSO packets to be passed to HW and they end up being
corrupted.

Fixes: 58e998c6d239 ("offloading: Force software GSO for multiple vlan tags.")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/dev.c |   13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2573,11 +2573,14 @@ netdev_features_t netif_skb_features(str
 	if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
-	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
-		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
-		protocol = veh->h_vlan_encapsulated_proto;
-	} else if (!vlan_tx_tag_present(skb)) {
-		return harmonize_features(skb, features);
+	if (!vlan_tx_tag_present(skb)) {
+		if (unlikely(protocol == htons(ETH_P_8021Q) ||
+			     protocol == htons(ETH_P_8021AD))) {
+			struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+			protocol = veh->h_vlan_encapsulated_proto;
+		} else {
+			return harmonize_features(skb, features);
+		}
 	}
 
 	features = netdev_intersect_features(features,
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 23 Dec 2014 01:13:18 +0100
Subject: net: Reset secmark when scrubbing packet

From: Thomas Graf <tgraf@suug.ch>

[ Upstream commit b8fb4e0648a2ab3734140342002f68fb0c7d1602 ]

skb_scrub_packet() is called when a packet switches between a context
such as between underlay and overlay, between namespaces, or between
L3 subnets.

While we already scrub the packet mark, connection tracking entry,
and cached destination, the security mark/context is left intact.

It seems wrong to inherit the security context of a packet when going
from overlay to underlay or across forwarding paths.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/skbuff.c |    1 +
 1 file changed, 1 insertion(+)

--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4040,6 +4040,7 @@ void skb_scrub_packet(struct sk_buff *sk
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
 	skb->mark = 0;
+	skb_init_secmark(skb);
 	secpath_reset(skb);
 	nf_reset(skb);
 	nf_reset_trace(skb);
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Jay Vosburgh <jay.vosburgh@canonical.com>
Date: Fri, 19 Dec 2014 15:32:00 -0800
Subject: net/core: Handle csum for CHECKSUM_COMPLETE VXLAN forwarding

From: Jay Vosburgh <jay.vosburgh@canonical.com>

[ Upstream commit 2c26d34bbcc0b3f30385d5587aa232289e2eed8e ]

When using VXLAN tunnels and a sky2 device, I have experienced
checksum failures of the following type:

[ 4297.761899] eth0: hw csum failure
[...]
[ 4297.765223] Call Trace:
[ 4297.765224]  <IRQ>  [<ffffffff8172f026>] dump_stack+0x46/0x58
[ 4297.765235]  [<ffffffff8162ba52>] netdev_rx_csum_fault+0x42/0x50
[ 4297.765238]  [<ffffffff8161c1a0>] ? skb_push+0x40/0x40
[ 4297.765240]  [<ffffffff8162325c>] __skb_checksum_complete+0xbc/0xd0
[ 4297.765243]  [<ffffffff8168c602>] tcp_v4_rcv+0x2e2/0x950
[ 4297.765246]  [<ffffffff81666ca0>] ? ip_rcv_finish+0x360/0x360

	These are reliably reproduced in a network topology of:

container:eth0 == host(OVS VXLAN on VLAN) == bond0 == eth0 (sky2) -> switch

	When VXLAN encapsulated traffic is received from a similarly
configured peer, the above warning is generated in the receive
processing of the encapsulated packet.  Note that the warning is
associated with the container eth0.

        The skbs from sky2 have ip_summed set to CHECKSUM_COMPLETE, and
because the packet is an encapsulated Ethernet frame, the checksum
generated by the hardware includes the inner protocol and Ethernet
headers.

	The receive code is careful to update the skb->csum, except in
__dev_forward_skb, as called by dev_forward_skb.  __dev_forward_skb
calls eth_type_trans, which in turn calls skb_pull_inline(skb, ETH_HLEN)
to skip over the Ethernet header, but does not update skb->csum when
doing so.

	This patch resolves the problem by adding a call to
skb_postpull_rcsum to update the skb->csum after the call to
eth_type_trans.

Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/dev.c |    1 +
 1 file changed, 1 insertion(+)

--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1697,6 +1697,7 @@ int __dev_forward_skb(struct net_device
 
 	skb_scrub_packet(skb, true);
 	skb->protocol = eth_type_trans(skb, dev);
+	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 
 	return 0;
 }
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 23 Dec 2014 22:37:26 -0800
Subject: net: Generalize ndo_gso_check to ndo_features_check

From: Jesse Gross <jesse@nicira.com>

[ Upstream commit 5f35227ea34bb616c436d9da47fc325866c428f3 ]

GSO isn't the only offload feature with restrictions that
potentially can't be expressed with the current features mechanism.
Checksum is another although it's a general issue that could in
theory apply to anything. Even if it may be possible to
implement these restrictions in other ways, it can result in
duplicate code or inefficient per-packet behavior.

This generalizes ndo_gso_check so that drivers can remove any
features that don't make sense for a given packet, similar to
netif_skb_features(). It also converts existing driver
restrictions to the new format, completing the work that was
done to support tunnel protocols since the issues apply to
checksums as well.

By actually removing features from the set that are used to do
offloading, it solves another problem with the existing
interface. In these cases, GSO would run with the original set
of features and not do anything because it appears that
segmentation is not required.

CC: Tom Herbert <therbert@google.com>
CC: Joe Stringer <joestringer@nicira.com>
CC: Eric Dumazet <edumazet@google.com>
CC: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by:  Tom Herbert <therbert@google.com>
Fixes: 04ffcb255f22 ("net: Add ndo_gso_check")
Tested-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/emulex/benet/be_main.c      |    8 ++++--
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c   |   10 ++++----
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |    8 ++++--
 include/linux/netdevice.h                        |   20 +++++++++-------
 include/net/vxlan.h                              |   28 +++++++++++++++++++----
 net/core/dev.c                                   |   23 +++++++++++-------
 6 files changed, 65 insertions(+), 32 deletions(-)

--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4427,9 +4427,11 @@ static void be_del_vxlan_port(struct net
 		 be16_to_cpu(port));
 }
 
-static bool be_gso_check(struct sk_buff *skb, struct net_device *dev)
+static netdev_features_t be_features_check(struct sk_buff *skb,
+					   struct net_device *dev,
+					   netdev_features_t features)
 {
-	return vxlan_gso_check(skb);
+	return vxlan_features_check(skb, features);
 }
 #endif
 
@@ -4460,7 +4462,7 @@ static const struct net_device_ops be_ne
 #ifdef CONFIG_BE2NET_VXLAN
 	.ndo_add_vxlan_port	= be_add_vxlan_port,
 	.ndo_del_vxlan_port	= be_del_vxlan_port,
-	.ndo_gso_check		= be_gso_check,
+	.ndo_features_check	= be_features_check,
 #endif
 };
 
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2363,9 +2363,11 @@ static void mlx4_en_del_vxlan_port(struc
 	queue_work(priv->mdev->workqueue, &priv->vxlan_del_task);
 }
 
-static bool mlx4_en_gso_check(struct sk_buff *skb, struct net_device *dev)
+static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
+						struct net_device *dev,
+						netdev_features_t features)
 {
-	return vxlan_gso_check(skb);
+	return vxlan_features_check(skb, features);
 }
 #endif
 
@@ -2398,7 +2400,7 @@ static const struct net_device_ops mlx4_
 #ifdef CONFIG_MLX4_EN_VXLAN
 	.ndo_add_vxlan_port	= mlx4_en_add_vxlan_port,
 	.ndo_del_vxlan_port	= mlx4_en_del_vxlan_port,
-	.ndo_gso_check		= mlx4_en_gso_check,
+	.ndo_features_check	= mlx4_en_features_check,
 #endif
 };
 
@@ -2432,7 +2434,7 @@ static const struct net_device_ops mlx4_
 #ifdef CONFIG_MLX4_EN_VXLAN
 	.ndo_add_vxlan_port	= mlx4_en_add_vxlan_port,
 	.ndo_del_vxlan_port	= mlx4_en_del_vxlan_port,
-	.ndo_gso_check		= mlx4_en_gso_check,
+	.ndo_features_check	= mlx4_en_features_check,
 #endif
 };
 
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -504,9 +504,11 @@ static void qlcnic_del_vxlan_port(struct
 	adapter->flags |= QLCNIC_DEL_VXLAN_PORT;
 }
 
-static bool qlcnic_gso_check(struct sk_buff *skb, struct net_device *dev)
+static netdev_features_t qlcnic_features_check(struct sk_buff *skb,
+					       struct net_device *dev,
+					       netdev_features_t features)
 {
-	return vxlan_gso_check(skb);
+	return vxlan_features_check(skb, features);
 }
 #endif
 
@@ -531,7 +533,7 @@ static const struct net_device_ops qlcni
 #ifdef CONFIG_QLCNIC_VXLAN
 	.ndo_add_vxlan_port	= qlcnic_add_vxlan_port,
 	.ndo_del_vxlan_port	= qlcnic_del_vxlan_port,
-	.ndo_gso_check		= qlcnic_gso_check,
+	.ndo_features_check	= qlcnic_features_check,
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = qlcnic_poll_controller,
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -998,12 +998,15 @@ typedef u16 (*select_queue_fallback_t)(s
  *	Callback to use for xmit over the accelerated station. This
  *	is used in place of ndo_start_xmit on accelerated net
  *	devices.
- * bool	(*ndo_gso_check) (struct sk_buff *skb,
- *			  struct net_device *dev);
+ * netdev_features_t (*ndo_features_check) (struct sk_buff *skb,
+ *					    struct net_device *dev
+ *					    netdev_features_t features);
  *	Called by core transmit path to determine if device is capable of
- *	performing GSO on a packet. The device returns true if it is
- *	able to GSO the packet, false otherwise. If the return value is
- *	false the stack will do software GSO.
+ *	performing offload operations on a given packet. This is to give
+ *	the device an opportunity to implement any restrictions that cannot
+ *	be otherwise expressed by feature flags. The check is called with
+ *	the set of features that the stack has calculated and it returns
+ *	those the driver believes to be appropriate.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1153,8 +1156,9 @@ struct net_device_ops {
 							struct net_device *dev,
 							void *priv);
 	int			(*ndo_get_lock_subclass)(struct net_device *dev);
-	bool			(*ndo_gso_check) (struct sk_buff *skb,
-						  struct net_device *dev);
+	netdev_features_t	(*ndo_features_check) (struct sk_buff *skb,
+						       struct net_device *dev,
+						       netdev_features_t features);
 };
 
 /**
@@ -3584,8 +3588,6 @@ static inline bool netif_needs_gso(struc
 				   netdev_features_t features)
 {
 	return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
-		(dev->netdev_ops->ndo_gso_check &&
-		 !dev->netdev_ops->ndo_gso_check(skb, dev)) ||
 		unlikely((skb->ip_summed != CHECKSUM_PARTIAL) &&
 			 (skb->ip_summed != CHECKSUM_UNNECESSARY)));
 }
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -1,6 +1,9 @@
 #ifndef __NET_VXLAN_H
 #define __NET_VXLAN_H 1
 
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/if_vlan.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/udp.h>
@@ -51,16 +54,33 @@ int vxlan_xmit_skb(struct vxlan_sock *vs
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
 		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
 
-static inline bool vxlan_gso_check(struct sk_buff *skb)
+static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
+						     netdev_features_t features)
 {
-	if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) &&
+	u8 l4_hdr = 0;
+
+	if (!skb->encapsulation)
+		return features;
+
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_IP):
+		l4_hdr = ip_hdr(skb)->protocol;
+		break;
+	case htons(ETH_P_IPV6):
+		l4_hdr = ipv6_hdr(skb)->nexthdr;
+		break;
+	default:
+		return features;;
+	}
+
+	if ((l4_hdr == IPPROTO_UDP) &&
 	    (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
 	     skb->inner_protocol != htons(ETH_P_TEB) ||
 	     (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
 	      sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
-		return false;
+		return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
 
-	return true;
+	return features;
 }
 
 /* IP header + UDP + VXLAN + Ethernet header */
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2566,7 +2566,7 @@ static netdev_features_t harmonize_featu
 
 netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
-	const struct net_device *dev = skb->dev;
+	struct net_device *dev = skb->dev;
 	netdev_features_t features = dev->features;
 	u16 gso_segs = skb_shinfo(skb)->gso_segs;
 	__be16 protocol = skb->protocol;
@@ -2574,13 +2574,20 @@ netdev_features_t netif_skb_features(str
 	if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
+	/* If encapsulation offload request, verify we are testing
+	 * hardware encapsulation features instead of standard
+	 * features for the netdev
+	 */
+	if (skb->encapsulation)
+		features &= dev->hw_enc_features;
+
 	if (!vlan_tx_tag_present(skb)) {
 		if (unlikely(protocol == htons(ETH_P_8021Q) ||
 			     protocol == htons(ETH_P_8021AD))) {
 			struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 			protocol = veh->h_vlan_encapsulated_proto;
 		} else {
-			return harmonize_features(skb, features);
+			goto finalize;
 		}
 	}
 
@@ -2598,6 +2605,11 @@ netdev_features_t netif_skb_features(str
 						     NETIF_F_HW_VLAN_CTAG_TX |
 						     NETIF_F_HW_VLAN_STAG_TX);
 
+finalize:
+	if (dev->netdev_ops->ndo_features_check)
+		features &= dev->netdev_ops->ndo_features_check(skb, dev,
+								features);
+
 	return harmonize_features(skb, features);
 }
 EXPORT_SYMBOL(netif_skb_features);
@@ -2672,13 +2684,6 @@ static struct sk_buff *validate_xmit_skb
 	if (unlikely(!skb))
 		goto out_null;
 
-	/* If encapsulation offload request, verify we are testing
-	 * hardware encapsulation features instead of standard
-	 * features for the netdev
-	 */
-	if (skb->encapsulation)
-		features &= dev->hw_enc_features;
-
 	if (netif_needs_gso(dev, skb, features)) {
 		struct sk_buff *segs;
 
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Maor Gottlieb <maorg@mellanox.com>
Date: Tue, 30 Dec 2014 11:59:49 +0200
Subject: net/mlx4_core: Correcly update the mtt's offset in the MR re-reg flow

From: Maor Gottlieb <maorg@mellanox.com>

[ Upstream commit a51e0df4c1e06afd7aba84496c14238e6b363caa ]

Previously, mlx4_mt_rereg_write filled the MPT's entity_size with the
old MTT's page shift, which could result in using an incorrect offset.
Fix the initialization to be after we calculate the new MTT offset.

In addition, assign mtt order to -1 after calling mlx4_mtt_cleanup. This
is necessary in order to mark the MTT as invalid and avoid freeing it later.

Fixes: e630664 ('mlx4_core: Add helper functions to support MR re-registration')
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx4/mr.c |    9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -590,6 +590,7 @@ EXPORT_SYMBOL_GPL(mlx4_mr_free);
 void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr)
 {
 	mlx4_mtt_cleanup(dev, &mr->mtt);
+	mr->mtt.order = -1;
 }
 EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup);
 
@@ -599,14 +600,14 @@ int mlx4_mr_rereg_mem_write(struct mlx4_
 {
 	int err;
 
-	mpt_entry->start       = cpu_to_be64(iova);
-	mpt_entry->length      = cpu_to_be64(size);
-	mpt_entry->entity_size = cpu_to_be32(page_shift);
-
 	err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
 	if (err)
 		return err;
 
+	mpt_entry->start       = cpu_to_be64(mr->iova);
+	mpt_entry->length      = cpu_to_be64(mr->size);
+	mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
+
 	mpt_entry->pd_flags &= cpu_to_be32(MLX4_MPT_PD_MASK |
 					   MLX4_MPT_PD_FLAG_EN_INV);
 	mpt_entry->flags    &= cpu_to_be32(MLX4_MPT_FLAG_FREE |
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 1 Jan 2015 00:39:23 +1100
Subject: tcp: Do not apply TSO segment limit to non-TSO packets

From: Herbert Xu <herbert@gondor.apana.org.au>

[ Upstream commit 843925f33fcc293d80acf2c5c8a78adf3344d49b ]

Thomas Jarosch reported IPsec TCP stalls when a PMTU event occurs.

In fact the problem was completely unrelated to IPsec.  The bug is
also reproducible if you just disable TSO/GSO.

The problem is that when the MSS goes down, existing queued packet
on the TX queue that have not been transmitted yet all look like
TSO packets and get treated as such.

This then triggers a bug where tcp_mss_split_point tells us to
generate a zero-sized packet on the TX queue.  Once that happens
we're screwed because the zero-sized packet can never be removed
by ACKs.

Fixes: 1485348d242 ("tcp: Apply device TSO segment limit earlier")
Reported-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Cheers,
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_output.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1984,7 +1984,7 @@ static bool tcp_write_xmit(struct sock *
 		if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
 			break;
 
-		if (tso_segs == 1) {
+		if (tso_segs == 1 || !sk->sk_gso_max_segs) {
 			if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
 						     (tcp_skb_is_last(sk, skb) ?
 						      nonagle : TCP_NAGLE_PUSH))))
@@ -2020,7 +2020,7 @@ static bool tcp_write_xmit(struct sock *
 		}
 
 		limit = mss_now;
-		if (tso_segs > 1 && !tcp_urg_mode(tp))
+		if (tso_segs > 1 && sk->sk_gso_max_segs && !tcp_urg_mode(tp))
 			limit = tcp_mss_split_point(sk, skb, mss_now,
 						    min_t(unsigned int,
 							  cwnd_quota,
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: "Palik, Imre" <imrep@amazon.de>
Date: Tue, 6 Jan 2015 16:44:44 +0100
Subject: xen-netback: fixing the propagation of the transmit shaper timeout

From: "Palik, Imre" <imrep@amazon.de>

[ Upstream commit 07ff890daeda31cf23173865edf50bcb03e100c3 ]

Since e9ce7cb6b107 ("xen-netback: Factor queue-specific data into queue struct"),
the transimt shaper timeout is always set to 0.  The value the user sets via
xenbus is never propagated to the transmit shaper.

This patch fixes the issue.

Cc: Anthony Liguori <aliguori@amazon.com>
Signed-off-by: Imre Palik <imrep@amazon.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/xen-netback/xenbus.c |    1 +
 1 file changed, 1 insertion(+)

--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -736,6 +736,7 @@ static void connect(struct backend_info
 		}
 
 		queue->remaining_credit = credit_bytes;
+		queue->credit_usec = credit_usec;
 
 		err = connect_rings(be, queue);
 		if (err) {
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 11 Jan 2015 10:32:18 -0800
Subject: alx: fix alx_poll()

From: Eric Dumazet <edumazet@google.com>

[ Upstream commit 7a05dc64e2e4c611d89007b125b20c0d2a4d31a5 ]

Commit d75b1ade567f ("net: less interrupt masking in NAPI") uncovered
wrong alx_poll() behavior.

A NAPI poll() handler is supposed to return exactly the budget when/if
napi_complete() has not been called.

It is also supposed to return number of frames that were received, so
that netdev_budget can have a meaning.

Also, in case of TX pressure, we still have to dequeue received
packets : alx_clean_rx_irq() has to be called even if
alx_clean_tx_irq(alx) returns false, otherwise device is half duplex.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: d75b1ade567f ("net: less interrupt masking in NAPI")
Reported-by: Oded Gabbay <oded.gabbay@amd.com>
Bisected-by: Oded Gabbay <oded.gabbay@amd.com>
Tested-by: Oded Gabbay <oded.gabbay@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/atheros/alx/main.c |   24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -184,15 +184,16 @@ static void alx_schedule_reset(struct al
 	schedule_work(&alx->reset_wk);
 }
 
-static bool alx_clean_rx_irq(struct alx_priv *alx, int budget)
+static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
 {
 	struct alx_rx_queue *rxq = &alx->rxq;
 	struct alx_rrd *rrd;
 	struct alx_buffer *rxb;
 	struct sk_buff *skb;
 	u16 length, rfd_cleaned = 0;
+	int work = 0;
 
-	while (budget > 0) {
+	while (work < budget) {
 		rrd = &rxq->rrd[rxq->rrd_read_idx];
 		if (!(rrd->word3 & cpu_to_le32(1 << RRD_UPDATED_SHIFT)))
 			break;
@@ -203,7 +204,7 @@ static bool alx_clean_rx_irq(struct alx_
 		    ALX_GET_FIELD(le32_to_cpu(rrd->word0),
 				  RRD_NOR) != 1) {
 			alx_schedule_reset(alx);
-			return 0;
+			return work;
 		}
 
 		rxb = &rxq->bufs[rxq->read_idx];
@@ -243,7 +244,7 @@ static bool alx_clean_rx_irq(struct alx_
 		}
 
 		napi_gro_receive(&alx->napi, skb);
-		budget--;
+		work++;
 
 next_pkt:
 		if (++rxq->read_idx == alx->rx_ringsz)
@@ -258,21 +259,22 @@ next_pkt:
 	if (rfd_cleaned)
 		alx_refill_rx_ring(alx, GFP_ATOMIC);
 
-	return budget > 0;
+	return work;
 }
 
 static int alx_poll(struct napi_struct *napi, int budget)
 {
 	struct alx_priv *alx = container_of(napi, struct alx_priv, napi);
 	struct alx_hw *hw = &alx->hw;
-	bool complete = true;
 	unsigned long flags;
+	bool tx_complete;
+	int work;
 
-	complete = alx_clean_tx_irq(alx) &&
-		   alx_clean_rx_irq(alx, budget);
+	tx_complete = alx_clean_tx_irq(alx);
+	work = alx_clean_rx_irq(alx, budget);
 
-	if (!complete)
-		return 1;
+	if (!tx_complete || work == budget)
+		return budget;
 
 	napi_complete(&alx->napi);
 
@@ -284,7 +286,7 @@ static int alx_poll(struct napi_struct *
 
 	alx_post_write(hw);
 
-	return 0;
+	return work;
 }
 
 static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 14 Jan 2015 18:15:30 +0100
Subject: team: avoid possible underflow of count_pending value for notify_peers and mcast_rejoin

From: Jiri Pirko <jiri@resnulli.us>

[ Upstream commit b0d11b42785b70e19bc6a3122eead3f7969a7589 ]

This patch is fixing a race condition that may cause setting
count_pending to -1, which results in unwanted big bulk of arp messages
(in case of "notify peers").

Consider following scenario:

count_pending == 2
   CPU0                                           CPU1
					team_notify_peers_work
					  atomic_dec_and_test (dec count_pending to 1)
					  schedule_delayed_work
 team_notify_peers
   atomic_add (adding 1 to count_pending)
					team_notify_peers_work
					  atomic_dec_and_test (dec count_pending to 1)
					  schedule_delayed_work
					team_notify_peers_work
					  atomic_dec_and_test (dec count_pending to 0)
   schedule_delayed_work
					team_notify_peers_work
					  atomic_dec_and_test (dec count_pending to -1)

Fix this race by using atomic_dec_if_positive - that will prevent
count_pending running under 0.

Fixes: fc423ff00df3a1955441 ("team: add peer notification")
Fixes: 492b200efdd20b8fcfd  ("team: add support for sending multicast rejoins")
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/team/team.c |   16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -629,6 +629,7 @@ static int team_change_mode(struct team
 static void team_notify_peers_work(struct work_struct *work)
 {
 	struct team *team;
+	int val;
 
 	team = container_of(work, struct team, notify_peers.dw.work);
 
@@ -636,9 +637,14 @@ static void team_notify_peers_work(struc
 		schedule_delayed_work(&team->notify_peers.dw, 0);
 		return;
 	}
+	val = atomic_dec_if_positive(&team->notify_peers.count_pending);
+	if (val < 0) {
+		rtnl_unlock();
+		return;
+	}
 	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, team->dev);
 	rtnl_unlock();
-	if (!atomic_dec_and_test(&team->notify_peers.count_pending))
+	if (val)
 		schedule_delayed_work(&team->notify_peers.dw,
 				      msecs_to_jiffies(team->notify_peers.interval));
 }
@@ -669,6 +675,7 @@ static void team_notify_peers_fini(struc
 static void team_mcast_rejoin_work(struct work_struct *work)
 {
 	struct team *team;
+	int val;
 
 	team = container_of(work, struct team, mcast_rejoin.dw.work);
 
@@ -676,9 +683,14 @@ static void team_mcast_rejoin_work(struc
 		schedule_delayed_work(&team->mcast_rejoin.dw, 0);
 		return;
 	}
+	val = atomic_dec_if_positive(&team->mcast_rejoin.count_pending);
+	if (val < 0) {
+		rtnl_unlock();
+		return;
+	}
 	call_netdevice_notifiers(NETDEV_RESEND_IGMP, team->dev);
 	rtnl_unlock();
-	if (!atomic_dec_and_test(&team->mcast_rejoin.count_pending))
+	if (val)
 		schedule_delayed_work(&team->mcast_rejoin.dw,
 				      msecs_to_jiffies(team->mcast_rejoin.interval));
 }
From foo@baz Sat Jan 17 18:12:21 PST 2015
From: Govindarajulu Varadarajan <_govind@gmx.com>
Date: Thu, 18 Dec 2014 15:58:42 +0530
Subject: enic: fix rx skb checksum

From: Govindarajulu Varadarajan <_govind@gmx.com>

[ Upstream commit 17e96834fd35997ca7cdfbf15413bcd5a36ad448 ]

Hardware always provides compliment of IP pseudo checksum. Stack expects
whole packet checksum without pseudo checksum if CHECKSUM_COMPLETE is set.

This causes checksum error in nf & ovs.

kernel: qg-19546f09-f2: hw csum failure
kernel: CPU: 9 PID: 0 Comm: swapper/9 Tainted: GF          O--------------   3.10.0-123.8.1.el7.x86_64 #1
kernel: Hardware name: Cisco Systems Inc UCSB-B200-M3/UCSB-B200-M3, BIOS B200M3.2.2.3.0.080820141339 08/08/2014
kernel: ffff881218f40000 df68243feb35e3a8 ffff881237a43ab8 ffffffff815e237b
kernel: ffff881237a43ad0 ffffffff814cd4ca ffff8829ec71eb00 ffff881237a43af0
kernel: ffffffff814c6232 0000000000000286 ffff8829ec71eb00 ffff881237a43b00
kernel: Call Trace:
kernel: <IRQ>  [<ffffffff815e237b>] dump_stack+0x19/0x1b
kernel: [<ffffffff814cd4ca>] netdev_rx_csum_fault+0x3a/0x40
kernel: [<ffffffff814c6232>] __skb_checksum_complete_head+0x62/0x70
kernel: [<ffffffff814c6251>] __skb_checksum_complete+0x11/0x20
kernel: [<ffffffff8155a20c>] nf_ip_checksum+0xcc/0x100
kernel: [<ffffffffa049edc7>] icmp_error+0x1f7/0x35c [nf_conntrack_ipv4]
kernel: [<ffffffff814cf419>] ? netif_rx+0xb9/0x1d0
kernel: [<ffffffffa040eb7b>] ? internal_dev_recv+0xdb/0x130 [openvswitch]
kernel: [<ffffffffa04c8330>] nf_conntrack_in+0xf0/0xa80 [nf_conntrack]
kernel: [<ffffffff81509380>] ? inet_del_offload+0x40/0x40
kernel: [<ffffffffa049e302>] ipv4_conntrack_in+0x22/0x30 [nf_conntrack_ipv4]
kernel: [<ffffffff815005ca>] nf_iterate+0xaa/0xc0
kernel: [<ffffffff81509380>] ? inet_del_offload+0x40/0x40
kernel: [<ffffffff81500664>] nf_hook_slow+0x84/0x140
kernel: [<ffffffff81509380>] ? inet_del_offload+0x40/0x40
kernel: [<ffffffff81509dd4>] ip_rcv+0x344/0x380

Hardware verifies IP & tcp/udp header checksum but does not provide payload
checksum, use CHECKSUM_UNNECESSARY. Set it only if its valid IP tcp/udp packet.

Cc: Jiri Benc <jbenc@redhat.com>
Cc: Stefan Assmann <sassmann@redhat.com>
Reported-by: Sunil Choudhary <schoudha@redhat.com>
Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com>
Reviewed-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/cisco/enic/enic_main.c |   12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1059,10 +1059,14 @@ static void enic_rq_indicate_buf(struct
 				     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
 		}
 
-		if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc) {
-			skb->csum = htons(checksum);
-			skb->ip_summed = CHECKSUM_COMPLETE;
-		}
+		/* Hardware does not provide whole packet checksum. It only
+		 * provides pseudo checksum. Since hw validates the packet
+		 * checksum but not provide us the checksum value. use
+		 * CHECSUM_UNNECESSARY.
+		 */
+		if ((netdev->features & NETIF_F_RXCSUM) && tcp_udp_csum_ok &&
+		    ipv4_csum_ok)
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 		if (vlan_stripped)
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
From 1f563a6a46544602183e7493b6ef69769d3d76d9 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 2 Dec 2014 03:32:24 -0800
Subject: drm/vmwgfx: Don't use memory accounting for kernel-side fence objects

From: Thomas Hellstrom <thellstrom@vmware.com>

commit 1f563a6a46544602183e7493b6ef69769d3d76d9 upstream.

Kernel side fence objects are used when unbinding resources and may thus be
created as part of a memory reclaim operation. This might trigger recursive
memory reclaims and result in the kernel running out of stack space.

So a simple way out is to avoid accounting of these fence objects.
In principle this is OK since while user-space can trigger the creation of
such objects, it can't really hold on to them. However, their lifetime is
quite long, so some form of accounting should perhaps be implemented in the
future.

Fixes kernel crashes when running, for example viewperf11 ensight-04 test 3
with low system memory settings.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c |   22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -545,35 +545,19 @@ void vmw_fence_obj_flush(struct vmw_fenc
 
 static void vmw_fence_destroy(struct vmw_fence_obj *fence)
 {
-	struct vmw_fence_manager *fman = fman_from_fence(fence);
-
 	fence_free(&fence->base);
-
-	/*
-	 * Free kernel space accounting.
-	 */
-	ttm_mem_global_free(vmw_mem_glob(fman->dev_priv),
-			    fman->fence_size);
 }
 
 int vmw_fence_create(struct vmw_fence_manager *fman,
 		     uint32_t seqno,
 		     struct vmw_fence_obj **p_fence)
 {
-	struct ttm_mem_global *mem_glob = vmw_mem_glob(fman->dev_priv);
 	struct vmw_fence_obj *fence;
 	int ret;
 
-	ret = ttm_mem_global_alloc(mem_glob, fman->fence_size,
-				   false, false);
-	if (unlikely(ret != 0))
-		return ret;
-
 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
-	if (unlikely(fence == NULL)) {
-		ret = -ENOMEM;
-		goto out_no_object;
-	}
+	if (unlikely(fence == NULL))
+		return -ENOMEM;
 
 	ret = vmw_fence_obj_init(fman, fence, seqno,
 				 vmw_fence_destroy);
@@ -585,8 +569,6 @@ int vmw_fence_create(struct vmw_fence_ma
 
 out_err_init:
 	kfree(fence);
-out_no_object:
-	ttm_mem_global_free(mem_glob, fman->fence_size);
 	return ret;
 }
 
From e338c4c2b620ba4e75fd3576f8142eb93be12ce3 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 25 Nov 2014 08:20:05 +0100
Subject: drm/vmwgfx: Fix error printout on signals pending

From: Thomas Hellstrom <thellstrom@vmware.com>

commit e338c4c2b620ba4e75fd3576f8142eb93be12ce3 upstream.

The function vmw_master_check() might return -ERESTARTSYS if there is a
signal pending, indicating that the IOCTL should be rerun, potentially from
user-space. At that point we shouldn't print out an error message since that
is not an error condition. In short, avoid bloating the kernel log when a
process refuses to die on SIGTERM.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |    8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1063,8 +1063,12 @@ static long vmw_generic_ioctl(struct fil
 
 	vmaster = vmw_master_check(dev, file_priv, flags);
 	if (unlikely(IS_ERR(vmaster))) {
-		DRM_INFO("IOCTL ERROR %d\n", nr);
-		return PTR_ERR(vmaster);
+		ret = PTR_ERR(vmaster);
+
+		if (ret != -ERESTARTSYS)
+			DRM_INFO("IOCTL ERROR Command %d, Error %ld.\n",
+				 nr, ret);
+		return ret;
 	}
 
 	ret = ioctl_func(filp, cmd, arg);
From 89669e7a7f96be3ee8d9a22a071d7c0d3b4428fc Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 2 Dec 2014 03:36:57 -0800
Subject: drm/vmwgfx: Fix fence event code

From: Thomas Hellstrom <thellstrom@vmware.com>

commit 89669e7a7f96be3ee8d9a22a071d7c0d3b4428fc upstream.

The commit "vmwgfx: Rework fence event action" introduced a number of bugs
that are fixed with this commit:

a) A forgotten return stateemnt.
b) An if statement with identical branches.

Reported-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c |   17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -1087,6 +1087,8 @@ static int vmw_event_fence_action_create
 	if (ret != 0)
 		goto out_no_queue;
 
+	return 0;
+
 out_no_queue:
 	event->base.destroy(&event->base);
 out_no_event:
@@ -1162,17 +1164,10 @@ int vmw_fence_event_ioctl(struct drm_dev
 
 	BUG_ON(fence == NULL);
 
-	if (arg->flags & DRM_VMW_FE_FLAG_REQ_TIME)
-		ret = vmw_event_fence_action_create(file_priv, fence,
-						    arg->flags,
-						    arg->user_data,
-						    true);
-	else
-		ret = vmw_event_fence_action_create(file_priv, fence,
-						    arg->flags,
-						    arg->user_data,
-						    true);
-
+	ret = vmw_event_fence_action_create(file_priv, fence,
+					    arg->flags,
+					    arg->user_data,
+					    true);
 	if (unlikely(ret != 0)) {
 		if (ret != -ERESTARTSYS)
 			DRM_ERROR("Failed to attach event to fence.\n");
From 881fdaa5e4cb0d68e52acab0ad4e1820e2bfffa4 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Thu, 13 Nov 2014 22:43:23 +0900
Subject: drm/ttm: Avoid memory allocation from shrinker functions.

From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>

commit 881fdaa5e4cb0d68e52acab0ad4e1820e2bfffa4 upstream.

Andrew Morton wrote:
> On Wed, 12 Nov 2014 13:08:55 +0900 Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp> wrote:
>
> > Andrew Morton wrote:
> > > Poor ttm guys - this is a bit of a trap we set for them.
> >
> > Commit a91576d7916f6cce ("drm/ttm: Pass GFP flags in order to avoid deadlock.")
> > changed to use sc->gfp_mask rather than GFP_KERNEL.
> >
> > -       pages_to_free = kmalloc(npages_to_free * sizeof(struct page *),
> > -                       GFP_KERNEL);
> > +       pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), gfp);
> >
> > But this bug is caused by sc->gfp_mask containing some flags which are not
> > in GFP_KERNEL, right? Then, I think
> >
> > -       pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), gfp);
> > +       pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), gfp & GFP_KERNEL);
> >
> > would hide this bug.
> >
> > But I think we should use GFP_ATOMIC (or drop __GFP_WAIT flag)
>
> Well no - ttm_page_pool_free() should stop calling kmalloc altogether.
> Just do
>
> 	struct page *pages_to_free[16];
>
> and rework the code to free 16 pages at a time.  Easy.

Well, ttm code wants to process 512 pages at a time for performance.
Memory footprint increased by 512 * sizeof(struct page *) buffer is
only 4096 bytes. What about using static buffer like below?
----------
>From d3cb5393c9c8099d6b37e769f78c31af1541fe8c Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Thu, 13 Nov 2014 22:21:54 +0900
Subject: drm/ttm: Avoid memory allocation from shrinker functions.

Commit a91576d7916f6cce ("drm/ttm: Pass GFP flags in order to avoid
deadlock.") caused BUG_ON() due to sc->gfp_mask containing flags
which are not in GFP_KERNEL.

  https://bugzilla.kernel.org/show_bug.cgi?id=87891

Changing from sc->gfp_mask to (sc->gfp_mask & GFP_KERNEL) would
avoid the BUG_ON(), but avoiding memory allocation from shrinker
function is better and reliable fix.

Shrinker function is already serialized by global lock, and
clean up function is called after shrinker function is unregistered.
Thus, we can use static buffer when called from shrinker function
and clean up function.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/ttm/ttm_page_alloc.c     |   26 +++++++++++++++-----------
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c |   25 +++++++++++++++----------
 2 files changed, 30 insertions(+), 21 deletions(-)

--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -297,11 +297,12 @@ static void ttm_pool_update_free_locked(
  *
  * @pool: to free the pages from
  * @free_all: If set to true will free all pages in pool
- * @gfp: GFP flags.
+ * @use_static: Safe to use static buffer
  **/
 static int ttm_page_pool_free(struct ttm_page_pool *pool, unsigned nr_free,
-			      gfp_t gfp)
+			      bool use_static)
 {
+	static struct page *static_buf[NUM_PAGES_TO_ALLOC];
 	unsigned long irq_flags;
 	struct page *p;
 	struct page **pages_to_free;
@@ -311,7 +312,11 @@ static int ttm_page_pool_free(struct ttm
 	if (NUM_PAGES_TO_ALLOC < nr_free)
 		npages_to_free = NUM_PAGES_TO_ALLOC;
 
-	pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), gfp);
+	if (use_static)
+		pages_to_free = static_buf;
+	else
+		pages_to_free = kmalloc(npages_to_free * sizeof(struct page *),
+					GFP_KERNEL);
 	if (!pages_to_free) {
 		pr_err("Failed to allocate memory for pool free operation\n");
 		return 0;
@@ -374,7 +379,8 @@ restart:
 	if (freed_pages)
 		ttm_pages_put(pages_to_free, freed_pages);
 out:
-	kfree(pages_to_free);
+	if (pages_to_free != static_buf)
+		kfree(pages_to_free);
 	return nr_free;
 }
 
@@ -383,8 +389,6 @@ out:
  *
  * XXX: (dchinner) Deadlock warning!
  *
- * We need to pass sc->gfp_mask to ttm_page_pool_free().
- *
  * This code is crying out for a shrinker per pool....
  */
 static unsigned long
@@ -407,8 +411,8 @@ ttm_pool_shrink_scan(struct shrinker *sh
 		if (shrink_pages == 0)
 			break;
 		pool = &_manager->pools[(i + pool_offset)%NUM_POOLS];
-		shrink_pages = ttm_page_pool_free(pool, nr_free,
-						  sc->gfp_mask);
+		/* OK to use static buffer since global mutex is held. */
+		shrink_pages = ttm_page_pool_free(pool, nr_free, true);
 		freed += nr_free - shrink_pages;
 	}
 	mutex_unlock(&lock);
@@ -710,7 +714,7 @@ static void ttm_put_pages(struct page **
 	}
 	spin_unlock_irqrestore(&pool->lock, irq_flags);
 	if (npages)
-		ttm_page_pool_free(pool, npages, GFP_KERNEL);
+		ttm_page_pool_free(pool, npages, false);
 }
 
 /*
@@ -849,9 +853,9 @@ void ttm_page_alloc_fini(void)
 	pr_info("Finalizing pool allocator\n");
 	ttm_pool_mm_shrink_fini(_manager);
 
+	/* OK to use static buffer since global mutex is no longer used. */
 	for (i = 0; i < NUM_POOLS; ++i)
-		ttm_page_pool_free(&_manager->pools[i], FREE_ALL_PAGES,
-				   GFP_KERNEL);
+		ttm_page_pool_free(&_manager->pools[i], FREE_ALL_PAGES, true);
 
 	kobject_put(&_manager->kobj);
 	_manager = NULL;
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -411,11 +411,12 @@ static void ttm_dma_page_put(struct dma_
  *
  * @pool: to free the pages from
  * @nr_free: If set to true will free all pages in pool
- * @gfp: GFP flags.
+ * @use_static: Safe to use static buffer
  **/
 static unsigned ttm_dma_page_pool_free(struct dma_pool *pool, unsigned nr_free,
-				       gfp_t gfp)
+				       bool use_static)
 {
+	static struct page *static_buf[NUM_PAGES_TO_ALLOC];
 	unsigned long irq_flags;
 	struct dma_page *dma_p, *tmp;
 	struct page **pages_to_free;
@@ -432,7 +433,11 @@ static unsigned ttm_dma_page_pool_free(s
 			 npages_to_free, nr_free);
 	}
 #endif
-	pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), gfp);
+	if (use_static)
+		pages_to_free = static_buf;
+	else
+		pages_to_free = kmalloc(npages_to_free * sizeof(struct page *),
+					GFP_KERNEL);
 
 	if (!pages_to_free) {
 		pr_err("%s: Failed to allocate memory for pool free operation\n",
@@ -502,7 +507,8 @@ restart:
 	if (freed_pages)
 		ttm_dma_pages_put(pool, &d_pages, pages_to_free, freed_pages);
 out:
-	kfree(pages_to_free);
+	if (pages_to_free != static_buf)
+		kfree(pages_to_free);
 	return nr_free;
 }
 
@@ -531,7 +537,8 @@ static void ttm_dma_free_pool(struct dev
 		if (pool->type != type)
 			continue;
 		/* Takes a spinlock.. */
-		ttm_dma_page_pool_free(pool, FREE_ALL_PAGES, GFP_KERNEL);
+		/* OK to use static buffer since global mutex is held. */
+		ttm_dma_page_pool_free(pool, FREE_ALL_PAGES, true);
 		WARN_ON(((pool->npages_in_use + pool->npages_free) != 0));
 		/* This code path is called after _all_ references to the
 		 * struct device has been dropped - so nobody should be
@@ -986,7 +993,7 @@ void ttm_dma_unpopulate(struct ttm_dma_t
 
 	/* shrink pool if necessary (only on !is_cached pools)*/
 	if (npages)
-		ttm_dma_page_pool_free(pool, npages, GFP_KERNEL);
+		ttm_dma_page_pool_free(pool, npages, false);
 	ttm->state = tt_unpopulated;
 }
 EXPORT_SYMBOL_GPL(ttm_dma_unpopulate);
@@ -996,8 +1003,6 @@ EXPORT_SYMBOL_GPL(ttm_dma_unpopulate);
  *
  * XXX: (dchinner) Deadlock warning!
  *
- * We need to pass sc->gfp_mask to ttm_dma_page_pool_free().
- *
  * I'm getting sadder as I hear more pathetical whimpers about needing per-pool
  * shrinkers
  */
@@ -1030,8 +1035,8 @@ ttm_dma_pool_shrink_scan(struct shrinker
 		if (++idx < pool_offset)
 			continue;
 		nr_free = shrink_pages;
-		shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free,
-						      sc->gfp_mask);
+		/* OK to use static buffer since global mutex is held. */
+		shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free, true);
 		freed += nr_free - shrink_pages;
 
 		pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n",
From e2809c7db818df6bbd0edf843e1beb2fbc9d8541 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 26 Nov 2014 13:15:24 +1000
Subject: drm/fb_helper: move deferred fb checking into restore mode (v2)

From: Dave Airlie <airlied@redhat.com>

commit e2809c7db818df6bbd0edf843e1beb2fbc9d8541 upstream.

On MST systems the monitors don't appear when we set the fb up,
but plymouth opens the drm device and holds it open while they
come up, when plymouth finishes and lastclose gets called we
don't do the delayed fb probe, so the monitor never appears on the
console.

Fix this by moving the delayed checking into the mode restore.

v2: Daniel suggested that ->delayed_hotplug is set under
the mode_config mutex, so we should check it under that as
well, while we are in the area.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/drm_fb_helper.c |   13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -347,9 +347,18 @@ bool drm_fb_helper_restore_fbdev_mode_un
 {
 	struct drm_device *dev = fb_helper->dev;
 	bool ret;
+	bool do_delayed = false;
+
 	drm_modeset_lock_all(dev);
 	ret = restore_fbdev_mode(fb_helper);
+
+	do_delayed = fb_helper->delayed_hotplug;
+	if (do_delayed)
+		fb_helper->delayed_hotplug = false;
 	drm_modeset_unlock_all(dev);
+
+	if (do_delayed)
+		drm_fb_helper_hotplug_event(fb_helper);
 	return ret;
 }
 EXPORT_SYMBOL(drm_fb_helper_restore_fbdev_mode_unlocked);
@@ -888,10 +897,6 @@ int drm_fb_helper_set_par(struct fb_info
 
 	drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper);
 
-	if (fb_helper->delayed_hotplug) {
-		fb_helper->delayed_hotplug = false;
-		drm_fb_helper_hotplug_event(fb_helper);
-	}
 	return 0;
 }
 EXPORT_SYMBOL(drm_fb_helper_set_par);
From 19a93f042fc241ecdf98543cedfe7c171f8cdf53 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 26 Nov 2014 13:13:09 +1000
Subject: drm/dp: retry AUX transactions 32 times (v1.1)

From: Dave Airlie <airlied@redhat.com>

commit 19a93f042fc241ecdf98543cedfe7c171f8cdf53 upstream.

At least on two MST devices I've tested with, when
they are link training downstream, they are totally
unable to handle aux ch msgs, so they defer like nuts.
I tried 16, it wasn't enough, 32 seems better.

This fixes one Dell 4k monitor and one of the
MST hubs.

v1.1: fixup comment (Tom).

Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/drm_dp_helper.c |    7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -378,10 +378,11 @@ static int drm_dp_dpcd_access(struct drm
 
 	/*
 	 * The specification doesn't give any recommendation on how often to
-	 * retry native transactions, so retry 7 times like for I2C-over-AUX
-	 * transactions.
+	 * retry native transactions. We used to retry 7 times like for
+	 * aux i2c transactions but real world devices this wasn't
+	 * sufficient, bump to 32 which makes Dell 4k monitors happier.
 	 */
-	for (retry = 0; retry < 7; retry++) {
+	for (retry = 0; retry < 32; retry++) {
 
 		mutex_lock(&aux->hw_mutex);
 		err = aux->transfer(aux, &msg);
From 0391359ddf79b52bb7e7bb9ace08e34fb08b0e76 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 8 Dec 2014 22:55:22 +0100
Subject: drm/dp-mst: Remove branches before dropping the reference

From: Daniel Vetter <daniel.vetter@ffwll.ch>

commit 0391359ddf79b52bb7e7bb9ace08e34fb08b0e76 upstream.

When we unplug a dp mst branch we unreference the entire tree from
the root towards the leaves. Which is ok, since that's the way the
pointers and so also the refcounts go.

But when we drop the reference we must make sure that we remove the
branches/ports from the lists/pointers before dropping the reference.
Otherwise the get_validated functions will still return it instead
of returning NULL (which indicates a potentially on-going unplug).

The mst branch destroy gets this right for ports: First it deletes
the port from the ports list, then it unrefs. But the ports destroy
function gets it wrong: First it unrefs, then it drops the ref. Which
means a zombie mst branch can still be validate with get_validated_mstb_ref
when it shouldn't.

Fix this.

This should address a backtrace Dave dug out somewhere on unplug:

 [<ffffffffa00cc262>] drm_dp_mst_get_validated_mstb_ref_locked+0x92/0xa0 [drm_kms_helper]
 [<ffffffffa00cc211>] drm_dp_mst_get_validated_mstb_ref_locked+0x41/0xa0 [drm_kms_helper]
 [<ffffffffa00cc2aa>] drm_dp_get_validated_mstb_ref+0x3a/0x60 [drm_kms_helper]
 [<ffffffffa00cc2fb>] drm_dp_payload_send_msg.isra.14+0x2b/0x100 [drm_kms_helper]
 [<ffffffffa00cc547>] drm_dp_update_payload_part1+0x177/0x360 [drm_kms_helper]
 [<ffffffffa015c52e>] intel_mst_disable_dp+0x3e/0x80 [i915]
 [<ffffffffa013d60b>] haswell_crtc_disable+0x1cb/0x340 [i915]
 [<ffffffffa0136739>] intel_crtc_control+0x49/0x100 [i915]
 [<ffffffffa0136857>] intel_crtc_update_dpms+0x67/0x80 [i915]
 [<ffffffffa013fa59>] intel_connector_dpms+0x59/0x70 [i915]
 [<ffffffffa015c752>] intel_dp_destroy_mst_connector+0x32/0xc0 [i915]
 [<ffffffffa00cb44b>] drm_dp_destroy_port+0x6b/0xa0 [drm_kms_helper]
 [<ffffffffa00cb588>] drm_dp_destroy_mst_branch_device+0x108/0x130 [drm_kms_helper]
 [<ffffffffa00cb3cd>] drm_dp_port_teardown_pdt+0x3d/0x50 [drm_kms_helper]
 [<ffffffffa00cdb79>] drm_dp_mst_handle_up_req+0x499/0x540 [drm_kms_helper]
 [<ffffffff810d9ead>] ? trace_hardirqs_on_caller+0x15d/0x200 [<ffffffffa00cdc73>]
 drm_dp_mst_hpd_irq+0x53/0xa00 [drm_kms_helper] [<ffffffffa00c7dfb>]
 ? drm_dp_dpcd_read+0x1b/0x20 [drm_kms_helper] [<ffffffffa0153ed8>]
 ? intel_dp_dpcd_read_wake+0x38/0x70 [i915] [<ffffffffa015a225>]
 intel_dp_check_mst_status+0xb5/0x250 [i915] [<ffffffffa015ac71>]
 intel_dp_hpd_pulse+0x181/0x210 [i915] [<ffffffffa01104f6>]
 i915_digport_work_func+0x96/0x120 [i915]

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/drm_dp_mst_topology.c |    5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -839,6 +839,8 @@ static void drm_dp_put_mst_branch_device
 
 static void drm_dp_port_teardown_pdt(struct drm_dp_mst_port *port, int old_pdt)
 {
+	struct drm_dp_mst_branch *mstb;
+
 	switch (old_pdt) {
 	case DP_PEER_DEVICE_DP_LEGACY_CONV:
 	case DP_PEER_DEVICE_SST_SINK:
@@ -846,8 +848,9 @@ static void drm_dp_port_teardown_pdt(str
 		drm_dp_mst_unregister_i2c_bus(&port->aux);
 		break;
 	case DP_PEER_DEVICE_MST_BRANCHING:
-		drm_dp_put_mst_branch_device(port->mstb);
+		mstb = port->mstb;
 		port->mstb = NULL;
+		drm_dp_put_mst_branch_device(mstb);
 		break;
 	}
 }
From 129acb7c0b682512e89c4f65c33593d50f2f49a9 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 7 Nov 2014 11:05:04 -0500
Subject: drm/radeon: fix typo in CI dpm disable

From: Alex Deucher <alexander.deucher@amd.com>

commit 129acb7c0b682512e89c4f65c33593d50f2f49a9 upstream.

Need to disable DS, not enable it when disabling dpm.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/radeon/ci_dpm.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -4729,7 +4729,7 @@ void ci_dpm_disable(struct radeon_device
 	ci_enable_spread_spectrum(rdev, false);
 	ci_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, false);
 	ci_stop_dpm(rdev);
-	ci_enable_ds_master_switch(rdev, true);
+	ci_enable_ds_master_switch(rdev, false);
 	ci_enable_ulv(rdev, false);
 	ci_clear_vc(rdev);
 	ci_reset_to_default(rdev);
From 4bb62c95a7e781a238b2ab374f34b1bf91e01ddc Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 17 Nov 2014 15:08:17 -0500
Subject: drm/radeon: work around a hw bug in MGCG on CIK

From: Alex Deucher <alexander.deucher@amd.com>

commit 4bb62c95a7e781a238b2ab374f34b1bf91e01ddc upstream.

Always need to set bit 0 of RLC_CGTT_MGCG_OVERRIDE
to avoid unreliable doorbell updates in some cases.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/radeon/cik.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -6314,6 +6314,7 @@ static void cik_enable_mgcg(struct radeo
 		}
 
 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+		data |= 0x00000001;
 		data &= 0xfffffffd;
 		if (orig != data)
 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
@@ -6345,7 +6346,7 @@ static void cik_enable_mgcg(struct radeo
 		}
 	} else {
 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
-		data |= 0x00000002;
+		data |= 0x00000003;
 		if (orig != data)
 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
 
From 5e5c21cac1001089007260c48b0c89ebaace0e71 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 3 Dec 2014 00:03:49 -0500
Subject: drm/radeon: check the right ring in radeon_evict_flags()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: Alex Deucher <alexander.deucher@amd.com>

commit 5e5c21cac1001089007260c48b0c89ebaace0e71 upstream.

Check the that ring we are using for copies is functional
rather than the GFX ring.  On newer asics we use the DMA
ring for bo moves.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/radeon/radeon_ttm.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -196,7 +196,7 @@ static void radeon_evict_flags(struct tt
 	rbo = container_of(bo, struct radeon_bo, tbo);
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
-		if (rbo->rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready == false)
+		if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
 		else
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
From fbedf1c3fc3a1e9f249c2efe2f4553d8df9d86d3 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 5 Dec 2014 13:46:07 -0500
Subject: drm/radeon: KV has three PPLLs (v2)

From: Alex Deucher <alexander.deucher@amd.com>

commit fbedf1c3fc3a1e9f249c2efe2f4553d8df9d86d3 upstream.

Enable all three in the driver.  Early documentation
indicated the 3rd one was used for something else, but
that is not the case.

v2: handle disable as well

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/radeon/atombios_crtc.c |    8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1851,10 +1851,9 @@ static int radeon_atom_pick_pll(struct d
 				return pll;
 		}
 		/* otherwise, pick one of the plls */
-		if ((rdev->family == CHIP_KAVERI) ||
-		    (rdev->family == CHIP_KABINI) ||
+		if ((rdev->family == CHIP_KABINI) ||
 		    (rdev->family == CHIP_MULLINS)) {
-			/* KB/KV/ML has PPLL1 and PPLL2 */
+			/* KB/ML has PPLL1 and PPLL2 */
 			pll_in_use = radeon_get_pll_use_mask(crtc);
 			if (!(pll_in_use & (1 << ATOM_PPLL2)))
 				return ATOM_PPLL2;
@@ -1863,7 +1862,7 @@ static int radeon_atom_pick_pll(struct d
 			DRM_ERROR("unable to allocate a PPLL\n");
 			return ATOM_PPLL_INVALID;
 		} else {
-			/* CI has PPLL0, PPLL1, and PPLL2 */
+			/* CI/KV has PPLL0, PPLL1, and PPLL2 */
 			pll_in_use = radeon_get_pll_use_mask(crtc);
 			if (!(pll_in_use & (1 << ATOM_PPLL2)))
 				return ATOM_PPLL2;
@@ -2154,6 +2153,7 @@ static void atombios_crtc_disable(struct
 	case ATOM_PPLL0:
 		/* disable the ppll */
 		if ((rdev->family == CHIP_ARUBA) ||
+		    (rdev->family == CHIP_KAVERI) ||
 		    (rdev->family == CHIP_BONAIRE) ||
 		    (rdev->family == CHIP_HAWAII))
 			atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id,
From 5665c3ebe5ee8a2c516925461f7214ba59c2e6d7 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 9 Dec 2014 10:04:01 -0500
Subject: drm/radeon: fix sad_count check for dce3

From: Alex Deucher <alexander.deucher@amd.com>

commit 5665c3ebe5ee8a2c516925461f7214ba59c2e6d7 upstream.

Make it consistent with the sad code for other asics to deal
with monitors that don't report sads.

bug:
https://bugzilla.kernel.org/show_bug.cgi?id=89461

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/radeon/dce3_1_afmt.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/gpu/drm/radeon/dce3_1_afmt.c
+++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c
@@ -103,7 +103,7 @@ static void dce3_2_afmt_write_sad_regs(s
 	}
 
 	sad_count = drm_edid_to_sad(radeon_connector->edid, &sads);
-	if (sad_count < 0) {
+	if (sad_count <= 0) {
 		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
 		return;
 	}
From 02ae7af53a451a1b0a51022c4693f5b339133e79 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 15 Dec 2014 17:24:19 -0500
Subject: drm/radeon: adjust default bapm settings for KV

From: Alex Deucher <alexander.deucher@amd.com>

commit 02ae7af53a451a1b0a51022c4693f5b339133e79 upstream.

Enabling bapm seems to cause clocking problems on some
KV configurations.  Disable it by default for now.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/radeon/kv_dpm.c |   10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

--- a/drivers/gpu/drm/radeon/kv_dpm.c
+++ b/drivers/gpu/drm/radeon/kv_dpm.c
@@ -2745,13 +2745,11 @@ int kv_dpm_init(struct radeon_device *rd
 	pi->enable_auto_thermal_throttling = true;
 	pi->disable_nb_ps3_in_battery = false;
 	if (radeon_bapm == -1) {
-		/* There are stability issues reported on with
-		 * bapm enabled on an asrock system.
-		 */
-		if (rdev->pdev->subsystem_vendor == 0x1849)
-			pi->bapm_enable = false;
-		else
+		/* only enable bapm on KB, ML by default */
+		if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS)
 			pi->bapm_enable = true;
+		else
+			pi->bapm_enable = false;
 	} else if (radeon_bapm == 0) {
 		pi->bapm_enable = false;
 	} else {
From 410cce2a6b82299b46ff316c6384e789ce275ecb Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 10 Dec 2014 09:42:10 -0500
Subject: drm/radeon: properly filter DP1.2 4k modes on non-DP1.2 hw

From: Alex Deucher <alexander.deucher@amd.com>

commit 410cce2a6b82299b46ff316c6384e789ce275ecb upstream.

The check was already in place in the dp mode_valid check, but
radeon_dp_get_dp_link_clock() never returned the high clock
mode_valid was checking for because that function clipped the
clock based on the hw capabilities.  Add an explicit check
in the mode_valid function.

bug:
https://bugs.freedesktop.org/show_bug.cgi?id=87172

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/radeon/atombios_dp.c |    4 ++++
 1 file changed, 4 insertions(+)

--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -492,6 +492,10 @@ int radeon_dp_mode_valid_helper(struct d
 	struct radeon_connector_atom_dig *dig_connector;
 	int dp_clock;
 
+	if ((mode->clock > 340000) &&
+	    (!radeon_connector_is_dp12_capable(connector)))
+		return MODE_CLOCK_HIGH;
+
 	if (!radeon_connector->con_priv)
 		return MODE_CLOCK_HIGH;
 	dig_connector = radeon_connector->con_priv;
From 0b6d24c01932db99fc95304235e751e7f7625c41 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 11 Apr 2014 15:55:17 +0200
Subject: drm/i915: Don't complain about stolen conflicts on gen3

From: Daniel Vetter <daniel.vetter@ffwll.ch>

commit 0b6d24c01932db99fc95304235e751e7f7625c41 upstream.

Apparently stuff works that way on those machines.

I agree with Chris' concern that this is a bit risky but imo worth a
shot in -next just for fun. Afaics all these machines have the pci
resources allocated like that by the BIOS, so I suspect that it's all
ok.

This regression goes back to

commit eaba1b8f3379b5d100bd146b9a41d28348bdfd09
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Jul 4 12:28:35 2013 +0100

    drm/i915: Verify that our stolen memory doesn't conflict

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76983
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=71031
Tested-by: lu hua <huax.lu@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Tested-by: Paul Menzel <paulepanter@users.sourceforge.net>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/i915/i915_gem_stolen.c |    6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -137,7 +137,11 @@ static unsigned long i915_stolen_to_phys
 		r = devm_request_mem_region(dev->dev, base + 1,
 					    dev_priv->gtt.stolen_size - 1,
 					    "Graphics Stolen Memory");
-		if (r == NULL) {
+		/*
+		 * GEN3 firmware likes to smash pci bridges into the stolen
+		 * range. Apparently this works.
+		 */
+		if (r == NULL && !IS_GEN3(dev)) {
 			DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n",
 				  base, base + (uint32_t)dev_priv->gtt.stolen_size);
 			base = 0;
From d472fcc8379c062bd56a3876fc6ef22258f14a91 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 24 Nov 2014 11:12:42 +0100
Subject: drm/i915: Disallow pin ioctl completely for kms drivers

From: Daniel Vetter <daniel.vetter@ffwll.ch>

commit d472fcc8379c062bd56a3876fc6ef22258f14a91 upstream.

The problem here is that SNA pins batchbuffers to etch out a bit more
performance. Iirc it started out as a w/a for i830M (which we've
implemented in the kernel since a long time already). The problem is
that the pin ioctl wasn't added in

commit d23db88c3ab233daed18709e3a24d6c95344117f
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri May 23 08:48:08 2014 +0200

    drm/i915: Prevent negative relocation deltas from wrapping

Fix this by simply disallowing pinning from userspace so that the
kernel is in full control of batch placement again. Especially since
distros are moving towards running X as non-root, so most users won't
even be able to see any benefits.

UMS support is dead now, but we need this minimal patch for
backporting. Follow-up patch will remove the pin ioctl code
completely.

Note to backporters: You must have both

commit b45305fce5bb1abec263fcff9d81ebecd6306ede
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Mon Dec 17 16:21:27 2012 +0100

    drm/i915: Implement workaround for broken CS tlb on i830/845

which laned in 3.8 and

commit c4d69da167fa967749aeb70bc0e94a457e5d00c1
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Sep 8 14:25:41 2014 +0100

    drm/i915: Evict CS TLBs between batches

which is also marked cc: stable. Otherwise this could introduce a
regression by disabling the userspace w/a without the kernel w/a being
fully functional on i830/45.

References: https://bugs.freedesktop.org/show_bug.cgi?id=76554#c116
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/i915/i915_gem.c |    5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4193,7 +4193,7 @@ i915_gem_pin_ioctl(struct drm_device *de
 	struct drm_i915_gem_object *obj;
 	int ret;
 
-	if (INTEL_INFO(dev)->gen >= 6)
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		return -ENODEV;
 
 	ret = i915_mutex_lock_interruptible(dev);
@@ -4249,6 +4249,9 @@ i915_gem_unpin_ioctl(struct drm_device *
 	struct drm_i915_gem_object *obj;
 	int ret;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -ENODEV;
+
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		return ret;
From 2b387059817fd100cddc5a97118d63e3f3fade74 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 24 Nov 2014 08:03:12 +0000
Subject: drm/i915: Only warn the first time we attempt to mmio whilst suspended

From: Chris Wilson <chris@chris-wilson.co.uk>

commit 2b387059817fd100cddc5a97118d63e3f3fade74 upstream.

In all likelihood we will do a few hundred errnoneous register
operations if we do a single invalid register access whilst the device
is suspended. As each instance causes a WARN, this floods the system
logs and can make the system unresponsive.

The warning was first introduced in
commit b2ec142cb0101f298f8e091c7d75b1ec5b809b65
Author: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date:   Fri Feb 21 13:52:25 2014 -0300

    drm/i915: call assert_device_not_suspended at gen6_force_wake_work

and despite the claims the WARN is still encountered in the wild today.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/i915/intel_uncore.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -43,8 +43,8 @@
 static void
 assert_device_not_suspended(struct drm_i915_private *dev_priv)
 {
-	WARN(HAS_RUNTIME_PM(dev_priv->dev) && dev_priv->pm.suspended,
-	     "Device suspended\n");
+	WARN_ONCE(HAS_RUNTIME_PM(dev_priv->dev) && dev_priv->pm.suspended,
+		  "Device suspended\n");
 }
 
 static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv)
From e7d6f7d708290da1b7c92f533444b042c79412e0 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 8 Dec 2014 13:23:37 +1000
Subject: drm/i915: resume MST after reading back hw state

From: Dave Airlie <airlied@redhat.com>

commit e7d6f7d708290da1b7c92f533444b042c79412e0 upstream.

Otherwise the MST resume paths can hit DPMS paths
which hit state checker paths, which hit WARN_ON,
because the state checker is inconsistent with the
hw.

This fixes a bunch of WARN_ON's on resume after
undocking.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/i915/i915_drv.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -692,11 +692,12 @@ static int __i915_drm_thaw(struct drm_de
 			spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 		}
 
-		intel_dp_mst_resume(dev);
 		drm_modeset_lock_all(dev);
 		intel_modeset_setup_hw_state(dev, true);
 		drm_modeset_unlock_all(dev);
 
+		intel_dp_mst_resume(dev);
+
 		/*
 		 * ... but also need to make sure that hotplug processing
 		 * doesn't cause havoc. Like in the driver load code we don't
From 9f49c37635d5c2a801f7670d5fbf0b25ec461f2c Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Wed, 10 Dec 2014 12:16:05 -0800
Subject: drm/i915: save/restore GMBUS freq across suspend/resume on gen4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: Jesse Barnes <jbarnes@virtuousgeek.org>

commit 9f49c37635d5c2a801f7670d5fbf0b25ec461f2c upstream.

Should probably just init this in the GMbus code all the time, based on
the cdclk and HPLL like we do on newer platforms.  Ville has code for
that in a rework branch, but until then we can fix this bug fairly
easily.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76301
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Tested-by: Nikolay <mar.kolya@gmail.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/i915/i915_drv.h     |    1 +
 drivers/gpu/drm/i915/i915_reg.h     |    1 +
 drivers/gpu/drm/i915/i915_suspend.c |    8 ++++++++
 3 files changed, 10 insertions(+)

--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -877,6 +877,7 @@ struct i915_suspend_saved_registers {
 	u32 savePIPEB_LINK_N1;
 	u32 saveMCHBAR_RENDER_STANDBY;
 	u32 savePCH_PORT_HOTPLUG;
+	u16 saveGCDGMBUS;
 };
 
 struct vlv_s0ix_state {
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -74,6 +74,7 @@
 #define   I915_GC_RENDER_CLOCK_166_MHZ	(0 << 0)
 #define   I915_GC_RENDER_CLOCK_200_MHZ	(1 << 0)
 #define   I915_GC_RENDER_CLOCK_333_MHZ	(4 << 0)
+#define GCDGMBUS 0xcc
 #define PCI_LBPC 0xf4 /* legacy/combination backlight modes, also called LBB */
 
 
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -328,6 +328,10 @@ int i915_save_state(struct drm_device *d
 		}
 	}
 
+	if (IS_GEN4(dev))
+		pci_read_config_word(dev->pdev, GCDGMBUS,
+				     &dev_priv->regfile.saveGCDGMBUS);
+
 	/* Cache mode state */
 	if (INTEL_INFO(dev)->gen < 7)
 		dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
@@ -356,6 +360,10 @@ int i915_restore_state(struct drm_device
 	mutex_lock(&dev->struct_mutex);
 
 	i915_gem_restore_fences(dev);
+
+	if (IS_GEN4(dev))
+		pci_write_config_word(dev->pdev, GCDGMBUS,
+				      dev_priv->regfile.saveGCDGMBUS);
 	i915_restore_display(dev);
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
From 4761703bd04bbdf56396d264903cc5a1fdcb3c01 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Tue, 16 Dec 2014 13:55:38 -0500
Subject: drm/nv4c/mc: disable msi

From: Ilia Mirkin <imirkin@alum.mit.edu>

commit 4761703bd04bbdf56396d264903cc5a1fdcb3c01 upstream.

Several users have, over time, reported issues with MSI on these IGPs.
They're old, rarely available, and MSI doesn't provide such huge
advantages on them. Just disable.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87361
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=74492
Fixes: fa8c9ac72fe ("drm/nv4c/mc: nv4x igp's have a different msi rearm register")
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c |    8 --------
 1 file changed, 8 deletions(-)

--- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c
@@ -24,13 +24,6 @@
 
 #include "nv04.h"
 
-static void
-nv4c_mc_msi_rearm(struct nouveau_mc *pmc)
-{
-	struct nv04_mc_priv *priv = (void *)pmc;
-	nv_wr08(priv, 0x088050, 0xff);
-}
-
 struct nouveau_oclass *
 nv4c_mc_oclass = &(struct nouveau_mc_oclass) {
 	.base.handle = NV_SUBDEV(MC, 0x4c),
@@ -41,5 +34,4 @@ nv4c_mc_oclass = &(struct nouveau_mc_ocl
 		.fini = _nouveau_mc_fini,
 	},
 	.intr = nv04_mc_intr,
-	.msi_rearm = nv4c_mc_msi_rearm,
 }.base;
From 148b83d0815a3778c8949e6a97cb798cbaa0efb3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 16 Dec 2014 08:44:31 +0000
Subject: drm/i915: Invalidate media caches on gen7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: Chris Wilson <chris@chris-wilson.co.uk>

commit 148b83d0815a3778c8949e6a97cb798cbaa0efb3 upstream.

In the gen7 pipe control there is an extra bit to flush the media
caches, so let's set it during cache invalidation flushes.

v2: Rename to MEDIA_STATE_CLEAR to be more inline with spec.

Cc: Simon Farnsworth <simon@farnz.org.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/i915/i915_reg.h         |    1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |    1 +
 2 files changed, 2 insertions(+)

--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -371,6 +371,7 @@
 #define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
 #define   PIPE_CONTROL_CS_STALL				(1<<20)
 #define   PIPE_CONTROL_TLB_INVALIDATE			(1<<18)
+#define   PIPE_CONTROL_MEDIA_STATE_CLEAR		(1<<16)
 #define   PIPE_CONTROL_QW_WRITE				(1<<14)
 #define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -362,6 +362,7 @@ gen7_render_ring_flush(struct intel_engi
 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
 		/*
 		 * TLB invalidate requires a post-sync write.
 		 */
From add284a3a2481e759d6bec35f6444c32c8ddc383 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 16 Dec 2014 08:44:32 +0000
Subject: drm/i915: Force the CS stall for invalidate flushes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: Chris Wilson <chris@chris-wilson.co.uk>

commit add284a3a2481e759d6bec35f6444c32c8ddc383 upstream.

In order to act as a full command barrier by itself, we need to tell the
pipecontrol to actually stall the command streamer while the flush runs.
We require the full command barrier before operations like
MI_SET_CONTEXT, which currently rely on a prior invalidate flush.

References: https://bugs.freedesktop.org/show_bug.cgi?id=83677
Cc: Simon Farnsworth <simon@farnz.org.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/i915/intel_ringbuffer.c |    2 ++
 1 file changed, 2 insertions(+)

--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -369,6 +369,8 @@ gen7_render_ring_flush(struct intel_engi
 		flags |= PIPE_CONTROL_QW_WRITE;
 		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
 
+		flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
+
 		/* Workaround: we must issue a pipe_control with CS-stall bit
 		 * set before a pipe_control command that has the state cache
 		 * invalidate bit set. */
From 2c550183476dfa25641309ae9a28d30feed14379 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 16 Dec 2014 10:02:27 +0000
Subject: drm/i915: Disable PSMI sleep messages on all rings around context switches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: Chris Wilson <chris@chris-wilson.co.uk>

commit 2c550183476dfa25641309ae9a28d30feed14379 upstream.

There exists a current workaround to prevent a hang on context switch
should the ring go to sleep in the middle of the restore,
WaProgramMiArbOnOffAroundMiSetContext (applicable to all gen7+). In
spite of disabling arbitration (which prevents the ring from powering
down during the critical section) we were still hitting hangs that had
the hallmarks of the known erratum. That is we are still seeing hangs
"on the last instruction in the context restore". By comparing -nightly
(broken) with requests (working), we were able to deduce that it was the
semaphore LRI cross-talk that reproduced the original failure. The key
was that requests implemented deferred semaphore signalling, and
disabling that, i.e. emitting the semaphore signal to every other ring
after every batch restored the frequent hang.  Explicitly disabling PSMI
sleep on the RCS ring was insufficient, all the rings had to be awake to
prevent the hangs. Fortunately, we can reduce the wakelock to the
MI_SET_CONTEXT operation itself, and so should be able to limit the extra
power implications.

Since the MI_ARB_ON_OFF workaround is listed for all gen7 and above
products, we should apply this extra hammer for all of the same
platforms despite so far that we have only been able to reproduce the
hang on certain ivb and hsw models. The last question is whether we want
to always use the extra hammer or only when we know semaphores are in
operation. At the moment, we only use LRI on non-RCS rings for
semaphores, but that may change in the future with the possibility of
reintroducing this bug under subtle conditions.

v2: Make it explicit that the PSMI LRI are an extension to the original
workaround for the other rings.
v3: Bikeshedding variable names and whitespacing

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=80660
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83677
Cc: Simon Farnsworth <simon@farnz.org.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Tested-by: Peter Frühberger <fritsch@xbmc.org>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/i915/i915_gem_context.c |   48 ++++++++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_reg.h         |    2 +
 2 files changed, 42 insertions(+), 8 deletions(-)

--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -468,7 +468,12 @@ mi_set_context(struct intel_engine_cs *r
 	       u32 hw_flags)
 {
 	u32 flags = hw_flags | MI_MM_SPACE_GTT;
-	int ret;
+	const int num_rings =
+		/* Use an extended w/a on ivb+ if signalling from other rings */
+		i915_semaphore_is_enabled(ring->dev) ?
+		hweight32(INTEL_INFO(ring->dev)->ring_mask) - 1 :
+		0;
+	int len, i, ret;
 
 	/* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB
 	 * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value
@@ -485,15 +490,31 @@ mi_set_context(struct intel_engine_cs *r
 	if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
 		flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
 
-	ret = intel_ring_begin(ring, 6);
+
+	len = 4;
+	if (INTEL_INFO(ring->dev)->gen >= 7)
+		len += 2 + (num_rings ? 4*num_rings + 2 : 0);
+
+	ret = intel_ring_begin(ring, len);
 	if (ret)
 		return ret;
 
 	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
-	if (INTEL_INFO(ring->dev)->gen >= 7)
+	if (INTEL_INFO(ring->dev)->gen >= 7) {
 		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
-	else
-		intel_ring_emit(ring, MI_NOOP);
+		if (num_rings) {
+			struct intel_engine_cs *signaller;
+
+			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
+			for_each_ring(signaller, to_i915(ring->dev), i) {
+				if (signaller == ring)
+					continue;
+
+				intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base));
+				intel_ring_emit(ring, _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
+			}
+		}
+	}
 
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_emit(ring, MI_SET_CONTEXT);
@@ -505,10 +526,21 @@ mi_set_context(struct intel_engine_cs *r
 	 */
 	intel_ring_emit(ring, MI_NOOP);
 
-	if (INTEL_INFO(ring->dev)->gen >= 7)
+	if (INTEL_INFO(ring->dev)->gen >= 7) {
+		if (num_rings) {
+			struct intel_engine_cs *signaller;
+
+			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
+			for_each_ring(signaller, to_i915(ring->dev), i) {
+				if (signaller == ring)
+					continue;
+
+				intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base));
+				intel_ring_emit(ring, _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
+			}
+		}
 		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
-	else
-		intel_ring_emit(ring, MI_NOOP);
+	}
 
 	intel_ring_advance(ring);
 
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1073,6 +1073,7 @@ enum punit_power_well {
 #define GEN6_VERSYNC	(RING_SYNC_1(VEBOX_RING_BASE))
 #define GEN6_VEVSYNC	(RING_SYNC_2(VEBOX_RING_BASE))
 #define GEN6_NOSYNC 0
+#define RING_PSMI_CTL(base)	((base)+0x50)
 #define RING_MAX_IDLE(base)	((base)+0x54)
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
@@ -1403,6 +1404,7 @@ enum punit_power_well {
 #define   GEN6_BLITTER_FBC_NOTIFY			(1<<3)
 
 #define GEN6_RC_SLEEP_PSMI_CONTROL	0x2050
+#define   GEN6_PSMI_SLEEP_MSG_DISABLE	(1 << 0)
 #define   GEN8_RC_SEMA_IDLE_MSG_DISABLE	(1 << 12)
 #define   GEN8_FF_DOP_CLOCK_GATE_DISABLE	(1<<10)
 
From 7d47559ee84b3ac206aa9e675606fafcd7c0b500 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 17 Dec 2014 23:08:03 +0200
Subject: drm/i915: Don't call intel_prepare_page_flip() multiple times on gen2-4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>

commit 7d47559ee84b3ac206aa9e675606fafcd7c0b500 upstream.

The flip stall detector kicks in when pending>=INTEL_FLIP_COMPLETE. That
means if we first call intel_prepare_page_flip() but don't call
intel_finish_page_flip(), the next stall check will erroneosly think
the page flip was somehow stuck.

With enough debug spew emitted from the interrupt handler my 830 hangs
when this happens. My theory is that the previous vblank interrupt gets
sufficiently delayed that the handler will see the pending bit set in
IIR, but ISR still has the bit set as well (ie. the flip was processed
by CS but didn't complete yet). In this case the handler will proceed
to call intel_check_page_flip() immediately after
intel_prepare_page_flip(). It then tries to print a backtrace for the
stuck flip WARN, which apparetly results in way too much debug spew
delaying interrupt processing further. That then seems to cause an
endless loop in the interrupt handler, and the machine is dead until
the watchdog kicks in and reboots. At least limiting the number of
iterations of the loop in the interrupt handler also prevented the
hang.

So it seems better to not call intel_prepare_page_flip() without
immediately calling intel_finish_page_flip(). The IIR/ISR trickery
avoids races here so this is a perfectly safe thing to do.

v2: Fix typo in commit message (checkpatch)

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=88381
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85888
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/i915/i915_irq.c |    6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -4022,8 +4022,6 @@ static bool i8xx_handle_vblank(struct dr
 	if ((iir & flip_pending) == 0)
 		goto check_page_flip;
 
-	intel_prepare_page_flip(dev, plane);
-
 	/* We detect FlipDone by looking for the change in PendingFlip from '1'
 	 * to '0' on the following vblank, i.e. IIR has the Pendingflip
 	 * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence
@@ -4033,6 +4031,7 @@ static bool i8xx_handle_vblank(struct dr
 	if (I915_READ16(ISR) & flip_pending)
 		goto check_page_flip;
 
+	intel_prepare_page_flip(dev, plane);
 	intel_finish_page_flip(dev, pipe);
 	return true;
 
@@ -4210,8 +4209,6 @@ static bool i915_handle_vblank(struct dr
 	if ((iir & flip_pending) == 0)
 		goto check_page_flip;
 
-	intel_prepare_page_flip(dev, plane);
-
 	/* We detect FlipDone by looking for the change in PendingFlip from '1'
 	 * to '0' on the following vblank, i.e. IIR has the Pendingflip
 	 * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence
@@ -4221,6 +4218,7 @@ static bool i915_handle_vblank(struct dr
 	if (I915_READ(ISR) & flip_pending)
 		goto check_page_flip;
 
+	intel_prepare_page_flip(dev, plane);
 	intel_finish_page_flip(dev, pipe);
 	return true;
 
From 7f907bf284ba7bb8d271f094b226699d3fef2142 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Sat, 8 Nov 2014 10:16:19 -0500
Subject: drm/irq: BUG_ON() -> WARN_ON()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: Rob Clark <robdclark@gmail.com>

commit 7f907bf284ba7bb8d271f094b226699d3fef2142 upstream.

Let's make things a bit easier to debug when things go bad (potentially
under console_lock).

Signed-off-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Cc: Anand Moon <moon.linux@yahoo.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/drm_irq.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -1029,7 +1029,8 @@ void drm_vblank_put(struct drm_device *d
 {
 	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
 
-	BUG_ON(atomic_read(&vblank->refcount) == 0);
+	if (WARN_ON(atomic_read(&vblank->refcount) == 0))
+		return;
 
 	if (WARN_ON(crtc >= dev->num_crtcs))
 		return;
From e8ef060b37c2d3cc5fd0c0edbe4e42ec1cb9768b Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 1 Oct 2014 14:28:36 +0530
Subject: ARC: [nsimosci] move peripherals to match model to FPGA

From: Vineet Gupta <vgupta@synopsys.com>

commit e8ef060b37c2d3cc5fd0c0edbe4e42ec1cb9768b upstream.

This allows the sdplite/Zebu images to run on OSCI simulation platform

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/arc/boot/dts/nsimosci.dts |   18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -20,7 +20,7 @@
 		/* this is for console on PGU */
 		/* bootargs = "console=tty0 consoleblank=0"; */
 		/* this is for console on serial */
-		bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
 	};
 
 	aliases {
@@ -41,9 +41,9 @@
 			#interrupt-cells = <1>;
 		};
 
-		uart0: serial@c0000000 {
+		uart0: serial@f0000000 {
 			compatible = "ns8250";
-			reg = <0xc0000000 0x2000>;
+			reg = <0xf0000000 0x2000>;
 			interrupts = <11>;
 			clock-frequency = <3686400>;
 			baud = <115200>;
@@ -52,21 +52,21 @@
 			no-loopback-test = <1>;
 		};
 
-		pgu0: pgu@c9000000 {
+		pgu0: pgu@f9000000 {
 			compatible = "snps,arcpgufb";
-			reg = <0xc9000000 0x400>;
+			reg = <0xf9000000 0x400>;
 		};
 
-		ps2: ps2@c9001000 {
+		ps2: ps2@f9001000 {
 			compatible = "snps,arc_ps2";
-			reg = <0xc9000400 0x14>;
+			reg = <0xf9000400 0x14>;
 			interrupts = <13>;
 			interrupt-names = "arc_ps2_irq";
 		};
 
-		eth0: ethernet@c0003000 {
+		eth0: ethernet@f0003000 {
 			compatible = "snps,oscilan";
-			reg = <0xc0003000 0x44>;
+			reg = <0xf0003000 0x44>;
 			interrupts = <7>, <8>;
 			interrupt-names = "rx", "tx";
 		};
From ee41d11d53c8fc4968f0816504651541d606cf40 Mon Sep 17 00:00:00 2001
From: Ian Munsie <imunsie@au1.ibm.com>
Date: Mon, 8 Dec 2014 19:17:55 +1100
Subject: cxl: Change contexts_lock to a mutex to fix sleep while atomic bug

From: Ian Munsie <imunsie@au1.ibm.com>

commit ee41d11d53c8fc4968f0816504651541d606cf40 upstream.

We had a known sleep while atomic bug if a CXL device was forcefully
unbound while it was in use. This could occur as a result of EEH, or
manually induced with something like this while the device was in use:

echo 0000:01:00.0 > /sys/bus/pci/drivers/cxl-pci/unbind

The issue was that in this code path we iterated over each context and
forcefully detached it with the contexts_lock spin lock held, however
the detach also needed to take the spu_mutex, and call schedule.

This patch changes the contexts_lock to a mutex so that we are not in
atomic context while doing the detach, thereby avoiding the sleep while
atomic.

Also delete the related TODO comment, which suggested an alternate
solution which turned out to not be workable.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/misc/cxl/context.c |   15 ++++++++-------
 drivers/misc/cxl/cxl.h     |    2 +-
 drivers/misc/cxl/native.c  |    7 -------
 drivers/misc/cxl/pci.c     |    2 +-
 drivers/misc/cxl/sysfs.c   |   10 +++++-----
 5 files changed, 15 insertions(+), 21 deletions(-)

--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -82,12 +82,12 @@ int cxl_context_init(struct cxl_context
 	 * Allocating IDR! We better make sure everything's setup that
 	 * dereferences from it.
 	 */
+	mutex_lock(&afu->contexts_lock);
 	idr_preload(GFP_KERNEL);
-	spin_lock(&afu->contexts_lock);
 	i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0,
 		      ctx->afu->num_procs, GFP_NOWAIT);
-	spin_unlock(&afu->contexts_lock);
 	idr_preload_end();
+	mutex_unlock(&afu->contexts_lock);
 	if (i < 0)
 		return i;
 
@@ -168,21 +168,22 @@ void cxl_context_detach_all(struct cxl_a
 	struct cxl_context *ctx;
 	int tmp;
 
-	rcu_read_lock();
-	idr_for_each_entry(&afu->contexts_idr, ctx, tmp)
+	mutex_lock(&afu->contexts_lock);
+	idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
 		/*
 		 * Anything done in here needs to be setup before the IDR is
 		 * created and torn down after the IDR removed
 		 */
 		__detach_context(ctx);
-	rcu_read_unlock();
+	}
+	mutex_unlock(&afu->contexts_lock);
 }
 
 void cxl_context_free(struct cxl_context *ctx)
 {
-	spin_lock(&ctx->afu->contexts_lock);
+	mutex_lock(&ctx->afu->contexts_lock);
 	idr_remove(&ctx->afu->contexts_idr, ctx->pe);
-	spin_unlock(&ctx->afu->contexts_lock);
+	mutex_unlock(&ctx->afu->contexts_lock);
 	synchronize_rcu();
 
 	free_page((u64)ctx->sstp);
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -349,7 +349,7 @@ struct cxl_afu {
 	struct device *chardev_s, *chardev_m, *chardev_d;
 	struct idr contexts_idr;
 	struct dentry *debugfs;
-	spinlock_t contexts_lock;
+	struct mutex contexts_lock;
 	struct mutex spa_mutex;
 	spinlock_t afu_cntl_lock;
 
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -610,13 +610,6 @@ static inline int detach_process_native_
 	return 0;
 }
 
-/*
- * TODO: handle case when this is called inside a rcu_read_lock() which may
- * happen when we unbind the driver (ie. cxl_context_detach_all()) .  Terminate
- * & remove use a mutex lock and schedule which will not good with lock held.
- * May need to write do_process_element_cmd() that handles outstanding page
- * faults synchronously.
- */
 static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
 {
 	if (!ctx->pe_inserted)
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -502,7 +502,7 @@ static struct cxl_afu *cxl_alloc_afu(str
 	afu->dev.release = cxl_release_afu;
 	afu->slice = slice;
 	idr_init(&afu->contexts_idr);
-	spin_lock_init(&afu->contexts_lock);
+	mutex_init(&afu->contexts_lock);
 	spin_lock_init(&afu->afu_cntl_lock);
 	mutex_init(&afu->spa_mutex);
 
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -121,7 +121,7 @@ static ssize_t reset_store_afu(struct de
 	int rc;
 
 	/* Not safe to reset if it is currently in use */
-	spin_lock(&afu->contexts_lock);
+	mutex_lock(&afu->contexts_lock);
 	if (!idr_is_empty(&afu->contexts_idr)) {
 		rc = -EBUSY;
 		goto err;
@@ -132,7 +132,7 @@ static ssize_t reset_store_afu(struct de
 
 	rc = count;
 err:
-	spin_unlock(&afu->contexts_lock);
+	mutex_unlock(&afu->contexts_lock);
 	return rc;
 }
 
@@ -247,7 +247,7 @@ static ssize_t mode_store(struct device
 	int rc = -EBUSY;
 
 	/* can't change this if we have a user */
-	spin_lock(&afu->contexts_lock);
+	mutex_lock(&afu->contexts_lock);
 	if (!idr_is_empty(&afu->contexts_idr))
 		goto err;
 
@@ -271,7 +271,7 @@ static ssize_t mode_store(struct device
 	afu->current_mode = 0;
 	afu->num_procs = 0;
 
-	spin_unlock(&afu->contexts_lock);
+	mutex_unlock(&afu->contexts_lock);
 
 	if ((rc = _cxl_afu_deactivate_mode(afu, old_mode)))
 		return rc;
@@ -280,7 +280,7 @@ static ssize_t mode_store(struct device
 
 	return count;
 err:
-	spin_unlock(&afu->contexts_lock);
+	mutex_unlock(&afu->contexts_lock);
 	return rc;
 }
 
From a98e6e9f4e0224d85b4d951edc44af16dfe6094a Mon Sep 17 00:00:00 2001
From: Ian Munsie <imunsie@au1.ibm.com>
Date: Mon, 8 Dec 2014 19:17:56 +1100
Subject: cxl: Add timeout to process element commands

From: Ian Munsie <imunsie@au1.ibm.com>

commit a98e6e9f4e0224d85b4d951edc44af16dfe6094a upstream.

In the event that something goes wrong in the hardware and it is unable
to complete a process element comment we would end up polling forever,
effectively making the associated process unkillable.

This patch adds a timeout to the process element command code path, so
that we will give up if the hardware does not respond in a reasonable
time.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/misc/cxl/native.c |    5 +++++
 1 file changed, 5 insertions(+)

--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -277,6 +277,7 @@ static int do_process_element_cmd(struct
 				  u64 cmd, u64 pe_state)
 {
 	u64 state;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
 
 	WARN_ON(!ctx->afu->enabled);
 
@@ -286,6 +287,10 @@ static int do_process_element_cmd(struct
 	smp_mb();
 	cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe);
 	while (1) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n");
+			return -EBUSY;
+		}
 		state = be64_to_cpup(ctx->afu->sw_command_status);
 		if (state == ~0ULL) {
 			pr_err("cxl: Error adding process element to AFU\n");
From b123429e6a9e8d03aacf888d23262835f0081448 Mon Sep 17 00:00:00 2001
From: Ian Munsie <imunsie@au1.ibm.com>
Date: Mon, 8 Dec 2014 19:18:01 +1100
Subject: cxl: Unmap MMIO regions when detaching a context

From: Ian Munsie <imunsie@au1.ibm.com>

commit b123429e6a9e8d03aacf888d23262835f0081448 upstream.

If we need to force detach a context (e.g. due to EEH or simply force
unbinding the driver) we should prevent the userspace contexts from
being able to access the Problem State Area MMIO region further, which
they may have mapped with mmap().

This patch unmaps any mapped MMIO regions when detaching a userspace
context.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/misc/cxl/context.c |   11 ++++++++++-
 drivers/misc/cxl/cxl.h     |    7 ++++++-
 drivers/misc/cxl/file.c    |    6 +++++-
 3 files changed, 21 insertions(+), 3 deletions(-)

--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -34,7 +34,8 @@ struct cxl_context *cxl_context_alloc(vo
 /*
  * Initialises a CXL context.
  */
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+		     struct address_space *mapping)
 {
 	int i;
 
@@ -42,6 +43,8 @@ int cxl_context_init(struct cxl_context
 	ctx->afu = afu;
 	ctx->master = master;
 	ctx->pid = NULL; /* Set in start work ioctl */
+	mutex_init(&ctx->mapping_lock);
+	ctx->mapping = mapping;
 
 	/*
 	 * Allocate the segment table before we put it in the IDR so that we
@@ -147,6 +150,12 @@ static void __detach_context(struct cxl_
 	afu_release_irqs(ctx);
 	flush_work(&ctx->fault_work); /* Only needed for dedicated process */
 	wake_up_all(&ctx->wq);
+
+	/* Release Problem State Area mapping */
+	mutex_lock(&ctx->mapping_lock);
+	if (ctx->mapping)
+		unmap_mapping_range(ctx->mapping, 0, 0, 1);
+	mutex_unlock(&ctx->mapping_lock);
 }
 
 /*
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -390,6 +390,10 @@ struct cxl_context {
 	phys_addr_t psn_phys;
 	u64 psn_size;
 
+	/* Used to unmap any mmaps when force detaching */
+	struct address_space *mapping;
+	struct mutex mapping_lock;
+
 	spinlock_t sste_lock; /* Protects segment table entries */
 	struct cxl_sste *sstp;
 	u64 sstp0, sstp1;
@@ -592,7 +596,8 @@ int cxl_alloc_sst(struct cxl_context *ct
 void init_cxl_native(void);
 
 struct cxl_context *cxl_context_alloc(void);
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master);
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+		     struct address_space *mapping);
 void cxl_context_free(struct cxl_context *ctx);
 int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
 
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -77,7 +77,7 @@ static int __afu_open(struct inode *inod
 		goto err_put_afu;
 	}
 
-	if ((rc = cxl_context_init(ctx, afu, master)))
+	if ((rc = cxl_context_init(ctx, afu, master, inode->i_mapping)))
 		goto err_put_afu;
 
 	pr_devel("afu_open pe: %i\n", ctx->pe);
@@ -113,6 +113,10 @@ static int afu_release(struct inode *ino
 		 __func__, ctx->pe);
 	cxl_context_detach(ctx);
 
+	mutex_lock(&ctx->mapping_lock);
+	ctx->mapping = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+
 	put_device(&ctx->afu->dev);
 
 	/*