Blob Blame History Raw
From 14b602561ad208203fb04a8eb3df052ad2c6551a Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Thu, 3 Dec 2015 16:49:32 +0100
Subject: [PATCH 01/34] pppoe: fix memory corruption in padt work structure

[ Upstream commit fe53985aaac83d516b38358d4f39921d9942a0e2 ]

pppoe_connect() mustn't touch the padt_work field of pppoe sockets
because that work could be already pending.

[   21.473147] BUG: unable to handle kernel NULL pointer dereference at 00000004
[   21.474523] IP: [<c1043177>] process_one_work+0x29/0x31c
[   21.475164] *pde = 00000000
[   21.475513] Oops: 0000 [#1] SMP
[   21.475910] Modules linked in: pppoe pppox ppp_generic slhc crc32c_intel aesni_intel virtio_net xts aes_i586 lrw gf128mul ablk_helper cryptd evdev acpi_cpufreq processor serio_raw button ext4 crc16 mbcache jbd2 virtio_blk virtio_pci virtio_ring virtio
[   21.476168] CPU: 2 PID: 164 Comm: kworker/2:2 Not tainted 4.4.0-rc1 #1
[   21.476168] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014
[   21.476168] task: f5f83c00 ti: f5e28000 task.ti: f5e28000
[   21.476168] EIP: 0060:[<c1043177>] EFLAGS: 00010046 CPU: 2
[   21.476168] EIP is at process_one_work+0x29/0x31c
[   21.484082] EAX: 00000000 EBX: f678b2a0 ECX: 00000004 EDX: 00000000
[   21.484082] ESI: f6c69940 EDI: f5e29ef0 EBP: f5e29f0c ESP: f5e29edc
[   21.484082]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
[   21.484082] CR0: 80050033 CR2: 000000a4 CR3: 317ad000 CR4: 00040690
[   21.484082] Stack:
[   21.484082]  00000000 f6c69950 00000000 f6c69940 c0042338 f5e29f0c c1327945 00000000
[   21.484082]  00000008 f678b2a0 f6c69940 f678b2b8 f5e29f30 c1043984 f5f83c00 f6c69970
[   21.484082]  f678b2a0 c10437d3 f6775e80 f678b2a0 c10437d3 f5e29fac c1047059 f5e29f74
[   21.484082] Call Trace:
[   21.484082]  [<c1327945>] ? _raw_spin_lock_irq+0x28/0x30
[   21.484082]  [<c1043984>] worker_thread+0x1b1/0x244
[   21.484082]  [<c10437d3>] ? rescuer_thread+0x229/0x229
[   21.484082]  [<c10437d3>] ? rescuer_thread+0x229/0x229
[   21.484082]  [<c1047059>] kthread+0x8f/0x94
[   21.484082]  [<c1327a32>] ? _raw_spin_unlock_irq+0x22/0x26
[   21.484082]  [<c1327ee9>] ret_from_kernel_thread+0x21/0x38
[   21.484082]  [<c1046fca>] ? kthread_parkme+0x19/0x19
[   21.496082] Code: 5d c3 55 89 e5 57 56 53 89 c3 83 ec 24 89 d0 89 55 e0 8d 7d e4 e8 6c d8 ff ff b9 04 00 00 00 89 45 d8 8b 43 24 89 45 dc 8b 45 d8 <8b> 40 04 8b 80 e0 00 00 00 c1 e8 05 24 01 88 45 d7 8b 45 e0 8d
[   21.496082] EIP: [<c1043177>] process_one_work+0x29/0x31c SS:ESP 0068:f5e29edc
[   21.496082] CR2: 0000000000000004
[   21.496082] ---[ end trace e362cc9cf10dae89 ]---

Reported-by: Andrew <nitr0@seti.kr.ua>
Fixes: 287f3a943fef ("pppoe: Use workqueue to die properly when a PADT is received")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pppoe.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 5e0b432..0a37f84 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -568,6 +568,9 @@ static int pppoe_create(struct net *net, struct socket *sock, int kern)
 	sk->sk_family		= PF_PPPOX;
 	sk->sk_protocol		= PX_PROTO_OE;
 
+	INIT_WORK(&pppox_sk(sk)->proto.pppoe.padt_work,
+		  pppoe_unbind_sock_work);
+
 	return 0;
 }
 
@@ -632,8 +635,6 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 	lock_sock(sk);
 
-	INIT_WORK(&po->proto.pppoe.padt_work, pppoe_unbind_sock_work);
-
 	error = -EINVAL;
 	if (sp->sa_protocol != PX_PROTO_OE)
 		goto end;
@@ -663,8 +664,13 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 			po->pppoe_dev = NULL;
 		}
 
-		memset(sk_pppox(po) + 1, 0,
-		       sizeof(struct pppox_sock) - sizeof(struct sock));
+		po->pppoe_ifindex = 0;
+		memset(&po->pppoe_pa, 0, sizeof(po->pppoe_pa));
+		memset(&po->pppoe_relay, 0, sizeof(po->pppoe_relay));
+		memset(&po->chan, 0, sizeof(po->chan));
+		po->next = NULL;
+		po->num = 0;
+
 		sk->sk_state = PPPOX_NONE;
 	}
 
-- 
2.4.1


From 2d5925b5a6011084d1fac6b8d8625ddbcb7d95a6 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 3 Dec 2015 17:21:50 +0100
Subject: [PATCH 02/34] gre6: allow to update all parameters via rtnl

[ Upstream commit 6a61d4dbf4f54b5683e0f1e58d873cecca7cb977 ]

Parameters were updated only if the kernel was unable to find the tunnel
with the new parameters, ie only if core pamareters were updated (keys,
addr, link, type).
Now it's possible to update ttl, hoplimit, flowinfo and flags.

Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3c7b931..e5ea177 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1571,13 +1571,11 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
 			return -EEXIST;
 	} else {
 		t = nt;
-
-		ip6gre_tunnel_unlink(ign, t);
-		ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
-		ip6gre_tunnel_link(ign, t);
-		netdev_state_change(dev);
 	}
 
+	ip6gre_tunnel_unlink(ign, t);
+	ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+	ip6gre_tunnel_link(ign, t);
 	return 0;
 }
 
-- 
2.4.1


From 3960bc911e092832dac3f9a42b435d2ec566e412 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Fri, 4 Dec 2015 09:50:00 +0100
Subject: [PATCH 03/34] atl1c: Improve driver not to do order 4 GFP_ATOMIC
 allocation

[ Upstream commit f2a3771ae8aca879c32336c76ad05a017629bae2 ]

atl1c driver is doing order-4 allocation with GFP_ATOMIC
priority. That often breaks  networking after resume. Switch to
GFP_KERNEL. Still not ideal, but should be significantly better.

atl1c_setup_ring_resources() is called from .open() function, and
already uses GFP_KERNEL, so this change is safe.

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 2795d6d..8b5988e 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -1016,13 +1016,12 @@ static int atl1c_setup_ring_resources(struct atl1c_adapter *adapter)
 		sizeof(struct atl1c_recv_ret_status) * rx_desc_count +
 		8 * 4;
 
-	ring_header->desc = pci_alloc_consistent(pdev, ring_header->size,
-				&ring_header->dma);
+	ring_header->desc = dma_zalloc_coherent(&pdev->dev, ring_header->size,
+						&ring_header->dma, GFP_KERNEL);
 	if (unlikely(!ring_header->desc)) {
-		dev_err(&pdev->dev, "pci_alloc_consistend failed\n");
+		dev_err(&pdev->dev, "could not get memory for DMA buffer\n");
 		goto err_nomem;
 	}
-	memset(ring_header->desc, 0, ring_header->size);
 	/* init TPD ring */
 
 	tpd_ring[0].dma = roundup(ring_header->dma, 8);
-- 
2.4.1


From cf2265157f68424a83d74a70962781c0470d3e83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Fri, 4 Dec 2015 14:15:08 +0100
Subject: [PATCH 04/34] ipv6: keep existing flags when setting IFA_F_OPTIMISTIC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 9a1ec4612c9bfc94d4185e3459055a37a685e575 ]

Commit 64236f3f3d74 ("ipv6: introduce IFA_F_STABLE_PRIVACY flag")
failed to update the setting of the IFA_F_OPTIMISTIC flag, causing
the IFA_F_STABLE_PRIVACY flag to be lost if IFA_F_OPTIMISTIC is set.

Cc: Erik Kline <ek@google.com>
Cc: Fernando Gont <fgont@si6networks.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: YOSHIFUJI Hideaki/吉藤英明 <hideaki.yoshifuji@miraclelinux.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Fixes: 64236f3f3d74 ("ipv6: introduce IFA_F_STABLE_PRIVACY flag")
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3939dd2..ff873c8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2453,7 +2453,7 @@ ok:
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 			if (in6_dev->cnf.optimistic_dad &&
 			    !net->ipv6.devconf_all->forwarding && sllao)
-				addr_flags = IFA_F_OPTIMISTIC;
+				addr_flags |= IFA_F_OPTIMISTIC;
 #endif
 
 			/* Do not allow to create too much of autoconfigured
-- 
2.4.1


From e37caf36e65b943cb28ce6ce2d7bfb3c406ec277 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Fri, 4 Dec 2015 13:54:03 +0100
Subject: [PATCH 05/34] vxlan: fix incorrect RCO bit in VXLAN header

[ Upstream commit c5fb8caaf91ea6a92920cf24db10cfc94d58de0f ]

Commit 3511494ce2f3d ("vxlan: Group Policy extension") changed definition of
VXLAN_HF_RCO from 0x00200000 to BIT(24). This is obviously incorrect. It's
also in violation with the RFC draft.

Fixes: 3511494ce2f3d ("vxlan: Group Policy extension")
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 480a319..f4a4972 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -79,7 +79,7 @@ struct vxlanhdr {
 };
 
 /* VXLAN header flags. */
-#define VXLAN_HF_RCO BIT(24)
+#define VXLAN_HF_RCO BIT(21)
 #define VXLAN_HF_VNI BIT(27)
 #define VXLAN_HF_GBP BIT(31)
 
-- 
2.4.1


From f7804be938a80839063eae490106d196281c763b Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Fri, 4 Dec 2015 15:14:03 -0200
Subject: [PATCH 06/34] sctp: use the same clock as if sock source timestamps
 were on

[ Upstream commit cb5e173ed7c03a0d4630ce68a95a186cce3cc872 ]

SCTP echoes a cookie o INIT ACK chunks that contains a timestamp, for
detecting stale cookies. This cookie is echoed back to the server by the
client and then that timestamp is checked.

Thing is, if the listening socket is using packet timestamping, the
cookie is encoded with ktime_get() value and checked against
ktime_get_real(), as done by __net_timestamp().

The fix is to sctp also use ktime_get_real(), so we can compare bananas
with bananas later no matter if packet timestamping was enabled or not.

Fixes: 52db882f3fc2 ("net: sctp: migrate cookie life from timeval to ktime")
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 7954c52..8d67d72 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1652,7 +1652,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 
 	/* Set an expiration time for the cookie.  */
 	cookie->c.expiration = ktime_add(asoc->cookie_life,
-					 ktime_get());
+					 ktime_get_real());
 
 	/* Copy the peer's init packet.  */
 	memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr,
@@ -1780,7 +1780,7 @@ no_hmac:
 	if (sock_flag(ep->base.sk, SOCK_TIMESTAMP))
 		kt = skb_get_ktime(skb);
 	else
-		kt = ktime_get();
+		kt = ktime_get_real();
 
 	if (!asoc && ktime_before(bear_cookie->expiration, kt)) {
 		/*
-- 
2.4.1


From 46ad18aad09a087729289c5e5f57c86d1aab8d56 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Fri, 4 Dec 2015 15:14:04 -0200
Subject: [PATCH 07/34] sctp: update the netstamp_needed counter when copying
 sockets

[ Upstream commit 01ce63c90170283a9855d1db4fe81934dddce648 ]

Dmitry Vyukov reported that SCTP was triggering a WARN on socket destroy
related to disabling sock timestamp.

When SCTP accepts an association or peel one off, it copies sock flags
but forgot to call net_enable_timestamp() if a packet timestamping flag
was copied, leading to extra calls to net_disable_timestamp() whenever
such clones were closed.

The fix is to call net_enable_timestamp() whenever we copy a sock with
that flag on, like tcp does.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 ++
 net/core/sock.c    | 2 --
 net/sctp/socket.c  | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index e237170..492855d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -724,6 +724,8 @@ enum sock_flags {
 	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
 };
 
+#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
+
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
 {
 	nsk->sk_flags = osk->sk_flags;
diff --git a/net/core/sock.c b/net/core/sock.c
index 3307c02..d7a7fc5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -422,8 +422,6 @@ static void sock_warn_obsolete_bsdism(const char *name)
 	}
 }
 
-#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
-
 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
 {
 	if (sk->sk_flags & flags) {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3ec88be..f19a67c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7195,6 +7195,9 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 	newinet->mc_ttl = 1;
 	newinet->mc_index = 0;
 	newinet->mc_list = NULL;
+
+	if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
+		net_enable_timestamp();
 }
 
 static inline void sctp_copy_descendant(struct sock *sk_to,
-- 
2.4.1


From 0081745cc115ec4147644b9ed464efc1bff5846e Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Fri, 4 Dec 2015 15:14:05 -0200
Subject: [PATCH 08/34] sctp: also copy sk_tsflags when copying the socket

[ Upstream commit 50a5ffb1ef535e3c6989711c51b5d61b543a3b45 ]

As we are keeping timestamps on when copying the socket, we also have to
copy sk_tsflags.

This is needed since b9f40e21ef42 ("net-timestamp: move timestamp flags
out of sk_flags").

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f19a67c..84b1b50 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7163,6 +7163,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 	newsk->sk_type = sk->sk_type;
 	newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
 	newsk->sk_flags = sk->sk_flags;
+	newsk->sk_tsflags = sk->sk_tsflags;
 	newsk->sk_no_check_tx = sk->sk_no_check_tx;
 	newsk->sk_no_check_rx = sk->sk_no_check_rx;
 	newsk->sk_reuse = sk->sk_reuse;
-- 
2.4.1


From f1cf5767d87c24f3e9c7a780651230cc34485c39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Sat, 5 Dec 2015 13:01:50 +0100
Subject: [PATCH 09/34] net: cdc_mbim: add "NDP to end" quirk for Huawei E3372
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit f8c0cfa5eca902d388c0b57c7ca29a1ff2e6d8c6 ]

The Huawei E3372 (12d1:157d) needs this quirk in MBIM mode
as well. Allow this by forcing the NTB to contain only a
single NDP, and add a device specific entry for this ID.

Due to the way Huawei use device IDs, this might be applied
to other modems as well.  It is assumed that those modems
will be based on the same firmware and will need this quirk
too.  If not, it will still not harm normal usage, although
multiplexing performance could be impacted.

Cc: Enrico Mioso <mrkiko.rs@gmail.com>
Reported-by: Sami Farin <hvtaifwkbgefbaei@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Acked-By: Enrico Mioso <mrkiko.rs@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_mbim.c | 26 +++++++++++++++++++++++++-
 drivers/net/usb/cdc_ncm.c  | 10 +++++++++-
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index efc18e0..b6ea6ff 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -158,7 +158,7 @@ static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf)
 	if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
 		goto err;
 
-	ret = cdc_ncm_bind_common(dev, intf, data_altsetting, 0);
+	ret = cdc_ncm_bind_common(dev, intf, data_altsetting, dev->driver_info->data);
 	if (ret)
 		goto err;
 
@@ -582,6 +582,26 @@ static const struct driver_info cdc_mbim_info_zlp = {
 	.tx_fixup = cdc_mbim_tx_fixup,
 };
 
+/* The spefication explicitly allows NDPs to be placed anywhere in the
+ * frame, but some devices fail unless the NDP is placed after the IP
+ * packets.  Using the CDC_NCM_FLAG_NDP_TO_END flags to force this
+ * behaviour.
+ *
+ * Note: The current implementation of this feature restricts each NTB
+ * to a single NDP, implying that multiplexed sessions cannot share an
+ * NTB. This might affect performace for multiplexed sessions.
+ */
+static const struct driver_info cdc_mbim_info_ndp_to_end = {
+	.description = "CDC MBIM",
+	.flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN,
+	.bind = cdc_mbim_bind,
+	.unbind = cdc_mbim_unbind,
+	.manage_power = cdc_mbim_manage_power,
+	.rx_fixup = cdc_mbim_rx_fixup,
+	.tx_fixup = cdc_mbim_tx_fixup,
+	.data = CDC_NCM_FLAG_NDP_TO_END,
+};
+
 static const struct usb_device_id mbim_devs[] = {
 	/* This duplicate NCM entry is intentional. MBIM devices can
 	 * be disguised as NCM by default, and this is necessary to
@@ -597,6 +617,10 @@ static const struct usb_device_id mbim_devs[] = {
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x0bdb, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
 	  .driver_info = (unsigned long)&cdc_mbim_info,
 	},
+	/* Huawei E3372 fails unless NDP comes after the IP packets */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x12d1, 0x157d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+	  .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end,
+	},
 	/* default entry */
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
 	  .driver_info = (unsigned long)&cdc_mbim_info_zlp,
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index db40175..fa41a6d 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1006,10 +1006,18 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
 	* NTH16 header as we would normally do. NDP isn't written to the SKB yet, and
 	* the wNdpIndex field in the header is actually not consistent with reality. It will be later.
 	*/
-	if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)
+	if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) {
 		if (ctx->delayed_ndp16->dwSignature == sign)
 			return ctx->delayed_ndp16;
 
+		/* We can only push a single NDP to the end. Return
+		 * NULL to send what we've already got and queue this
+		 * skb for later.
+		 */
+		else if (ctx->delayed_ndp16->dwSignature)
+			return NULL;
+	}
+
 	/* follow the chain of NDPs, looking for a match */
 	while (ndpoffset) {
 		ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset);
-- 
2.4.1


From 9ae2d6c090172e8f1782af19b10679f15bd42350 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Fri, 4 Dec 2015 16:29:10 +0100
Subject: [PATCH 10/34] net: qca_spi: fix transmit queue timeout handling

[ Upstream commit ed7d42e24effbd3681e909711a7a2119a85e9217 ]

In case of a tx queue timeout every transmit is blocked until the
QCA7000 resets himself and triggers a sync which makes the driver
flushs the tx ring. So avoid this blocking situation by triggering
the sync immediately after the timeout. Waking the queue doesn't
make sense in this situation.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/qca_spi.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 2f87909..60ccc29 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -736,9 +736,8 @@ qcaspi_netdev_tx_timeout(struct net_device *dev)
 	netdev_info(qca->net_dev, "Transmit timeout at %ld, latency %ld\n",
 		    jiffies, jiffies - dev->trans_start);
 	qca->net_dev->stats.tx_errors++;
-	/* wake the queue if there is room */
-	if (qcaspi_tx_ring_has_space(&qca->txr))
-		netif_wake_queue(dev);
+	/* Trigger tx queue flush and QCA7000 reset */
+	qca->sync = QCASPI_SYNC_UNKNOWN;
 }
 
 static int
-- 
2.4.1


From 43979766bb4c6f2e2c549793e0e659b22fb3c6de Mon Sep 17 00:00:00 2001
From: Peter Wu <peter@lekensteyn.nl>
Date: Tue, 8 Dec 2015 12:17:42 +0100
Subject: [PATCH 11/34] r8152: fix lockup when runtime PM is enabled

[ Upstream commit 90186af404ada5a47b875bf3c16d0b02bb023ea0 ]

When an interface is brought up which was previously suspended (via
runtime PM), it would hang. This happens because napi_disable is called
before napi_enable.

Solve this by avoiding napi_enable in the resume during open function
(netif_running is true when open is called, IFF_UP is set after a
successful open; netif_running is false when close is called, but IFF_UP
is then still set).

While at it, remove WORK_ENABLE check from rtl8152_open (introduced with
the original change) because it cannot happen:

 - After this patch, runtime resume will not set it during rtl8152_open.
 - When link is up, rtl8152_open is not called.
 - When link is down during system/auto suspend/resume, it is not set.

Fixes: 41cec84cf285 ("r8152: don't enable napi before rx ready")
Link: https://lkml.kernel.org/r/20151205105912.GA1766@al
Signed-off-by: Peter Wu <peter@lekensteyn.nl>
Acked-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index d9427ca..2e32c41 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3067,17 +3067,6 @@ static int rtl8152_open(struct net_device *netdev)
 
 	mutex_lock(&tp->control);
 
-	/* The WORK_ENABLE may be set when autoresume occurs */
-	if (test_bit(WORK_ENABLE, &tp->flags)) {
-		clear_bit(WORK_ENABLE, &tp->flags);
-		usb_kill_urb(tp->intr_urb);
-		cancel_delayed_work_sync(&tp->schedule);
-
-		/* disable the tx/rx, if the workqueue has enabled them. */
-		if (netif_carrier_ok(netdev))
-			tp->rtl_ops.disable(tp);
-	}
-
 	tp->rtl_ops.up(tp);
 
 	rtl8152_set_speed(tp, AUTONEG_ENABLE,
@@ -3124,12 +3113,6 @@ static int rtl8152_close(struct net_device *netdev)
 	} else {
 		mutex_lock(&tp->control);
 
-		/* The autosuspend may have been enabled and wouldn't
-		 * be disable when autoresume occurs, because the
-		 * netif_running() would be false.
-		 */
-		rtl_runtime_suspend_enable(tp, false);
-
 		tp->rtl_ops.down(tp);
 
 		mutex_unlock(&tp->control);
@@ -3512,7 +3495,7 @@ static int rtl8152_resume(struct usb_interface *intf)
 		netif_device_attach(tp->netdev);
 	}
 
-	if (netif_running(tp->netdev)) {
+	if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) {
 		if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
 			rtl_runtime_suspend_enable(tp, false);
 			clear_bit(SELECTIVE_SUSPEND, &tp->flags);
@@ -3532,6 +3515,8 @@ static int rtl8152_resume(struct usb_interface *intf)
 		}
 		usb_submit_urb(tp->intr_urb, GFP_KERNEL);
 	} else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
+		if (tp->netdev->flags & IFF_UP)
+			rtl_runtime_suspend_enable(tp, false);
 		clear_bit(SELECTIVE_SUSPEND, &tp->flags);
 	}
 
-- 
2.4.1


From bc386e51daba6547cb14ac06baf1f56c9274abde Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 9 Dec 2015 07:25:06 -0800
Subject: [PATCH 12/34] ipv6: sctp: clone options to avoid use after free

[ Upstream commit 9470e24f35ab81574da54e69df90c1eb4a96b43f ]

SCTP is lacking proper np->opt cloning at accept() time.

TCP and DCCP use ipv6_dup_options() helper, do the same
in SCTP.

We might later factorize this code in a common helper to avoid
future mistakes.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e917d27..40677cf 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -635,6 +635,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
 	struct sock *newsk;
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct sctp6_sock *newsctp6sk;
+	struct ipv6_txoptions *opt;
 
 	newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0);
 	if (!newsk)
@@ -654,6 +655,13 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
+	rcu_read_lock();
+	opt = rcu_dereference(np->opt);
+	if (opt)
+		opt = ipv6_dup_options(newsk, opt);
+	RCU_INIT_POINTER(newnp->opt, opt);
+	rcu_read_unlock();
+
 	/* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname()
 	 * and getpeername().
 	 */
-- 
2.4.1


From 15287a6a3cbca306a66640e371b2684e50a22565 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 9 Dec 2015 19:56:31 +0100
Subject: [PATCH 13/34] phy: micrel: Fix finding PHY properties in MAC node.

[ Upstream commit 651df2183543bc92f5dbcf99cd9e236ead0bc4c5 ]

commit 8b63ec1837fa ("phylib: Make PHYs children of their MDIO bus,
not the bus' parent.")  changed the parenting of PHY devices, making
them a child of the MDIO bus, instead of the MAC device. This broken
the Micrel PHY driver which has a deprecated feature of allowing PHY
properties to be placed into the MAC node.

In order to find the MAC node, we need to walk up the tree of devices
until we find one with an OF node attached.

Reported-by: Dinh Nguyen <dinguyen@opensource.altera.com>
Suggested-by: David Daney <david.daney@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Fixes: 8b63ec1837fa ("phylib: Make PHYs children of their MDIO bus, not the bus' parent.")
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Dinh Nguyen <dinguyen@opensource.altera.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index cf6312f..e13ad6c 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -339,9 +339,18 @@ static int ksz9021_config_init(struct phy_device *phydev)
 {
 	const struct device *dev = &phydev->dev;
 	const struct device_node *of_node = dev->of_node;
+	const struct device *dev_walker;
 
-	if (!of_node && dev->parent->of_node)
-		of_node = dev->parent->of_node;
+	/* The Micrel driver has a deprecated option to place phy OF
+	 * properties in the MAC node. Walk up the tree of devices to
+	 * find a device with an OF node.
+	 */
+	dev_walker = &phydev->dev;
+	do {
+		of_node = dev_walker->of_node;
+		dev_walker = dev_walker->parent;
+
+	} while (!of_node && dev_walker);
 
 	if (of_node) {
 		ksz9021_load_values_from_of(phydev, of_node,
-- 
2.4.1


From 73e71dcb1df404661314ec7cb9aa27d209407d70 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@ovn.org>
Date: Wed, 9 Dec 2015 14:07:39 -0800
Subject: [PATCH 14/34] openvswitch: Fix helper reference leak

[ Upstream commit 2f3ab9f9fc23811188b9d07d86e4d99ffee887f4 ]

If the actions (re)allocation fails, or the actions list is larger than the
maximum size, and the conntrack action is the last action when these
problems are hit, then references to helper modules may be leaked. Fix
the issue.

Fixes: cae3a2627520 ("openvswitch: Allow attaching helpers to ct action")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 5009582..a808b0f 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -53,6 +53,8 @@ struct ovs_conntrack_info {
 	struct md_labels labels;
 };
 
+static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
+
 static u16 key_to_nfproto(const struct sw_flow_key *key)
 {
 	switch (ntohs(key->eth.type)) {
@@ -708,7 +710,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
 	nf_conntrack_get(&ct_info.ct->ct_general);
 	return 0;
 err_free_ct:
-	nf_conntrack_free(ct_info.ct);
+	__ovs_ct_free_action(&ct_info);
 	return err;
 }
 
@@ -750,6 +752,11 @@ void ovs_ct_free_action(const struct nlattr *a)
 {
 	struct ovs_conntrack_info *ct_info = nla_data(a);
 
+	__ovs_ct_free_action(ct_info);
+}
+
+static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
+{
 	if (ct_info->helper)
 		module_put(ct_info->helper->me);
 	if (ct_info->ct)
-- 
2.4.1


From 7539fb022a74bb9b69fe5ad9125b899b83001a6d Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@ovn.org>
Date: Wed, 9 Dec 2015 14:07:40 -0800
Subject: [PATCH 15/34] openvswitch: Respect conntrack zone even if invalid

[ Upstream commit d110986c5ddb1caf576e8576044c0c831e3e7fa4 ]

If userspace executes ct(zone=1), and the connection tracker determines
that the packet is invalid, then the ct_zone flow key field is populated
with the default zone rather than the zone that was specified. Even
though connection tracking failed, this field should be updated with the
value that the action specified. Fix the issue.

Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index a808b0f..cad8c4b 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -143,6 +143,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
  * previously sent the packet to conntrack via the ct action.
  */
 static void ovs_ct_update_key(const struct sk_buff *skb,
+			      const struct ovs_conntrack_info *info,
 			      struct sw_flow_key *key, bool post_ct)
 {
 	const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
@@ -160,13 +161,15 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
 		zone = nf_ct_zone(ct);
 	} else if (post_ct) {
 		state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
+		if (info)
+			zone = &info->zone;
 	}
 	__ovs_ct_update_key(key, state, zone, ct);
 }
 
 void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
 {
-	ovs_ct_update_key(skb, key, false);
+	ovs_ct_update_key(skb, NULL, key, false);
 }
 
 int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
@@ -420,7 +423,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 		}
 	}
 
-	ovs_ct_update_key(skb, key, true);
+	ovs_ct_update_key(skb, info, key, true);
 
 	return 0;
 }
-- 
2.4.1


From 9226d3bc36e0da4f99f5fe9138f46e34b631fda8 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Thu, 10 Dec 2015 09:14:20 -0800
Subject: [PATCH 16/34] uapi: export ila.h

[ Upstream commit f7fc6bc414121954c45c5f18b70e2a8717d0d5b4 ]

The file ila.h used for lightweight tunnels is being used by iproute2
but is not exported yet.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index f7b2db4..7fc5733 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -186,6 +186,7 @@ header-y += if_tunnel.h
 header-y += if_vlan.h
 header-y += if_x25.h
 header-y += igmp.h
+header-y += ila.h
 header-y += in6.h
 header-y += inet_diag.h
 header-y += in.h
-- 
2.4.1


From 204ce70323cdcff523324a2dd02f3a4fa2c01754 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 14 Dec 2015 22:03:39 +0100
Subject: [PATCH 17/34] net: add validation for the socket syscall protocol
 argument
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 79462ad02e861803b3840cc782248c7359451cd9 ]

郭永刚 reported that one could simply crash the kernel as root by
using a simple program:

	int socket_fd;
	struct sockaddr_in addr;
	addr.sin_port = 0;
	addr.sin_addr.s_addr = INADDR_ANY;
	addr.sin_family = 10;

	socket_fd = socket(10,3,0x40000000);
	connect(socket_fd , &addr,16);

AF_INET, AF_INET6 sockets actually only support 8-bit protocol
identifiers. inet_sock's skc_protocol field thus is sized accordingly,
thus larger protocol identifiers simply cut off the higher bits and
store a zero in the protocol fields.

This could lead to e.g. NULL function pointer because as a result of
the cut off inet_num is zero and we call down to inet_autobind, which
is NULL for raw sockets.

kernel: Call Trace:
kernel:  [<ffffffff816db90e>] ? inet_autobind+0x2e/0x70
kernel:  [<ffffffff816db9a4>] inet_dgram_connect+0x54/0x80
kernel:  [<ffffffff81645069>] SYSC_connect+0xd9/0x110
kernel:  [<ffffffff810ac51b>] ? ptrace_notify+0x5b/0x80
kernel:  [<ffffffff810236d8>] ? syscall_trace_enter_phase2+0x108/0x200
kernel:  [<ffffffff81645e0e>] SyS_connect+0xe/0x10
kernel:  [<ffffffff81779515>] tracesys_phase2+0x84/0x89

I found no particular commit which introduced this problem.

CVE: CVE-2015-8543
Cc: Cong Wang <cwang@twopensource.com>
Reported-by: 郭永刚 <guoyonggang@360.cn>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h     | 1 +
 net/ax25/af_ax25.c     | 3 +++
 net/decnet/af_decnet.c | 3 +++
 net/ipv4/af_inet.c     | 3 +++
 net/ipv6/af_inet6.c    | 3 +++
 net/irda/af_irda.c     | 3 +++
 6 files changed, 16 insertions(+)

diff --git a/include/net/sock.h b/include/net/sock.h
index 492855d..7ae032e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -387,6 +387,7 @@ struct sock {
 				sk_no_check_rx : 1,
 				sk_userlocks : 4,
 				sk_protocol  : 8,
+#define SK_PROTOCOL_MAX U8_MAX
 				sk_type      : 16;
 	kmemcheck_bitfield_end(flags);
 	int			sk_wmem_queued;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index ae3a47f..fbd0acf 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -805,6 +805,9 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
 	struct sock *sk;
 	ax25_cb *ax25;
 
+	if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+		return -EINVAL;
+
 	if (!net_eq(net, &init_net))
 		return -EAFNOSUPPORT;
 
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 675cf94..6feddca 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -678,6 +678,9 @@ static int dn_create(struct net *net, struct socket *sock, int protocol,
 {
 	struct sock *sk;
 
+	if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+		return -EINVAL;
+
 	if (!net_eq(net, &init_net))
 		return -EAFNOSUPPORT;
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1d0c3ad..4b16cf3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -261,6 +261,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
 	int try_loading_module = 0;
 	int err;
 
+	if (protocol < 0 || protocol >= IPPROTO_MAX)
+		return -EINVAL;
+
 	sock->state = SS_UNCONNECTED;
 
 	/* Look for the requested type/protocol pair. */
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 38d66dd..df095ee 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -109,6 +109,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
 	int try_loading_module = 0;
 	int err;
 
+	if (protocol < 0 || protocol >= IPPROTO_MAX)
+		return -EINVAL;
+
 	/* Look for the requested type/protocol pair. */
 lookup_protocol:
 	err = -ESOCKTNOSUPPORT;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index fae6822..25f63a8 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1086,6 +1086,9 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
 	struct sock *sk;
 	struct irda_sock *self;
 
+	if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+		return -EINVAL;
+
 	if (net != &init_net)
 		return -EAFNOSUPPORT;
 
-- 
2.4.1


From 3b1d8cc00ea00bb6451a2db42b98179e109ac291 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Fri, 4 Dec 2015 01:45:40 +0300
Subject: [PATCH 18/34] sh_eth: fix kernel oops in skb_put()

[ Upstream commit 248be83dcb3feb3f6332eb3d010a016402138484 ]

In a low memory situation the following kernel oops occurs:

Unable to handle kernel NULL pointer dereference at virtual address 00000050
pgd = 8490c000
[00000050] *pgd=4651e831, *pte=00000000, *ppte=00000000
Internal error: Oops: 17 [#1] PREEMPT ARM
Modules linked in:
CPU: 0    Not tainted  (3.4-at16 #9)
PC is at skb_put+0x10/0x98
LR is at sh_eth_poll+0x2c8/0xa10
pc : [<8035f780>]    lr : [<8028bf50>]    psr: 60000113
sp : 84eb1a90  ip : 84eb1ac8  fp : 84eb1ac4
r10: 0000003f  r9 : 000005ea  r8 : 00000000
r7 : 00000000  r6 : 940453b0  r5 : 00030000  r4 : 9381b180
r3 : 00000000  r2 : 00000000  r1 : 000005ea  r0 : 00000000
Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
Control: 10c53c7d  Table: 4248c059  DAC: 00000015
Process klogd (pid: 2046, stack limit = 0x84eb02e8)
[...]

This is  because netdev_alloc_skb() fails and 'mdp->rx_skbuff[entry]' is left
NULL but sh_eth_rx() later  uses it without checking.  Add such check...

Reported-by: Yasushi SHOJI <yashi@atmark-techno.com>
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index a484d8b..f3cbf90c 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1481,6 +1481,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
 		if (mdp->cd->shift_rd0)
 			desc_status >>= 16;
 
+		skb = mdp->rx_skbuff[entry];
 		if (desc_status & (RD_RFS1 | RD_RFS2 | RD_RFS3 | RD_RFS4 |
 				   RD_RFS5 | RD_RFS6 | RD_RFS10)) {
 			ndev->stats.rx_errors++;
@@ -1496,12 +1497,11 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
 				ndev->stats.rx_missed_errors++;
 			if (desc_status & RD_RFS10)
 				ndev->stats.rx_over_errors++;
-		} else {
+		} else	if (skb) {
 			if (!mdp->cd->hw_swap)
 				sh_eth_soft_swap(
 					phys_to_virt(ALIGN(rxdesc->addr, 4)),
 					pkt_len + 2);
-			skb = mdp->rx_skbuff[entry];
 			mdp->rx_skbuff[entry] = NULL;
 			if (mdp->cd->rpadir)
 				skb_reserve(skb, NET_IP_ALIGN);
-- 
2.4.1


From 864f5d3880ba7e9e3d11f8ba725f29b7f45ae508 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 14 Dec 2015 14:08:53 -0800
Subject: [PATCH 19/34] net: fix IP early demux races

[ Upstream commit 5037e9ef9454917b047f9f3a19b4dd179fbf7cd4 ]

David Wilder reported crashes caused by dst reuse.

<quote David>
  I am seeing a crash on a distro V4.2.3 kernel caused by a double
  release of a dst_entry.  In ipv4_dst_destroy() the call to
  list_empty() finds a poisoned next pointer, indicating the dst_entry
  has already been removed from the list and freed. The crash occurs
  18 to 24 hours into a run of a network stress exerciser.
</quote>

Thanks to his detailed report and analysis, we were able to understand
the core issue.

IP early demux can associate a dst to skb, after a lookup in TCP/UDP
sockets.

When socket cache is not properly set, we want to store into
sk->sk_dst_cache the dst for future IP early demux lookups,
by acquiring a stable refcount on the dst.

Problem is this acquisition is simply using an atomic_inc(),
which works well, unless the dst was queued for destruction from
dst_release() noticing dst refcount went to zero, if DST_NOCACHE
was set on dst.

We need to make sure current refcount is not zero before incrementing
it, or risk double free as David reported.

This patch, being a stable candidate, adds two new helpers, and use
them only from IP early demux problematic paths.

It might be possible to merge in net-next skb_dst_force() and
skb_dst_force_safe(), but I prefer having the smallest patch for stable
kernels : Maybe some skb_dst_force() callers do not expect skb->dst
can suddenly be cleared.

Can probably be backported back to linux-3.6 kernels

Reported-by: David J. Wilder <dwilder@us.ibm.com>
Tested-by: David J. Wilder <dwilder@us.ibm.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h   | 33 +++++++++++++++++++++++++++++++++
 include/net/sock.h  |  2 +-
 net/ipv4/tcp_ipv4.c |  5 ++---
 net/ipv6/tcp_ipv6.c |  3 +--
 4 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 9261d92..e7fa2e2 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -322,6 +322,39 @@ static inline void skb_dst_force(struct sk_buff *skb)
 	}
 }
 
+/**
+ * dst_hold_safe - Take a reference on a dst if possible
+ * @dst: pointer to dst entry
+ *
+ * This helper returns false if it could not safely
+ * take a reference on a dst.
+ */
+static inline bool dst_hold_safe(struct dst_entry *dst)
+{
+	if (dst->flags & DST_NOCACHE)
+		return atomic_inc_not_zero(&dst->__refcnt);
+	dst_hold(dst);
+	return true;
+}
+
+/**
+ * skb_dst_force_safe - makes sure skb dst is refcounted
+ * @skb: buffer
+ *
+ * If dst is not yet refcounted and not destroyed, grab a ref on it.
+ */
+static inline void skb_dst_force_safe(struct sk_buff *skb)
+{
+	if (skb_dst_is_noref(skb)) {
+		struct dst_entry *dst = skb_dst(skb);
+
+		if (!dst_hold_safe(dst))
+			dst = NULL;
+
+		skb->_skb_refdst = (unsigned long)dst;
+	}
+}
+
 
 /**
  *	__skb_tunnel_rx - prepare skb for rx reinsert
diff --git a/include/net/sock.h b/include/net/sock.h
index 7ae032e..bca709a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -801,7 +801,7 @@ void sk_stream_write_space(struct sock *sk);
 static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	/* dont let skb dst not refcounted, we are going to leave rcu lock */
-	skb_dst_force(skb);
+	skb_dst_force_safe(skb);
 
 	if (!sk->sk_backlog.tail)
 		sk->sk_backlog.head = skb;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a7739c8..d77be28 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1509,7 +1509,7 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
 	if (likely(sk->sk_rx_dst))
 		skb_dst_drop(skb);
 	else
-		skb_dst_force(skb);
+		skb_dst_force_safe(skb);
 
 	__skb_queue_tail(&tp->ucopy.prequeue, skb);
 	tp->ucopy.memory += skb->truesize;
@@ -1710,8 +1710,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 
-	if (dst) {
-		dst_hold(dst);
+	if (dst && dst_hold_safe(dst)) {
 		sk->sk_rx_dst = dst;
 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9e9b77b..8935dc1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -93,10 +93,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 
-	if (dst) {
+	if (dst && dst_hold_safe(dst)) {
 		const struct rt6_info *rt = (const struct rt6_info *)dst;
 
-		dst_hold(dst);
 		sk->sk_rx_dst = dst;
 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
 		inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
-- 
2.4.1


From 68743b8922086a882bf9ea135221a3a2229ca6be Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 14 Dec 2015 13:48:36 -0800
Subject: [PATCH 20/34] pptp: verify sockaddr_len in pptp_bind() and
 pptp_connect()

[ Upstream commit 09ccfd238e5a0e670d8178cf50180ea81ae09ae1 ]

Reported-by: Dmitry Vyukov <dvyukov@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pptp.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 686f37d..b910cae 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -418,6 +418,9 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr,
 	struct pptp_opt *opt = &po->proto.pptp;
 	int error = 0;
 
+	if (sockaddr_len < sizeof(struct sockaddr_pppox))
+		return -EINVAL;
+
 	lock_sock(sk);
 
 	opt->src_addr = sp->sa_addr.pptp;
@@ -439,6 +442,9 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	struct flowi4 fl4;
 	int error = 0;
 
+	if (sockaddr_len < sizeof(struct sockaddr_pppox))
+		return -EINVAL;
+
 	if (sp->sa_protocol != PX_PROTO_PPTP)
 		return -EINVAL;
 
-- 
2.4.1


From a745f3ea43700db573910f2231d87a0d0f82e03b Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Mon, 16 Nov 2015 15:43:44 -0500
Subject: [PATCH 21/34] vlan: Fix untag operations of stacked vlans with
 REORDER_HEADER off

[ Upstream commit a6e18ff111701b4ff6947605bfbe9594ec42a6e8 ]

When we have multiple stacked vlan devices all of which have
turned off REORDER_HEADER flag, the untag operation does not
locate the ethernet addresses correctly for nested vlans.
The reason is that in case of REORDER_HEADER flag being off,
the outer vlan headers are put back and the mac_len is adjusted
to account for the presense of the header.  Then, the subsequent
untag operation, for the next level vlan, always use VLAN_ETH_HLEN
to locate the begining of the ethernet header and that ends up
being a multiple of 4 bytes short of the actuall beginning
of the mac header (the multiple depending on the how many vlan
encapsulations ethere are).

As a reslult, if there are multiple levles of vlan devices
with REODER_HEADER being off, the recevied packets end up
being dropped.

To solve this, we use skb->mac_len as the offset.  The value
is always set on receive path and starts out as a ETH_HLEN.
The value is also updated when the vlan header manupations occur
so we know it will be correct.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fab4599..160193f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4268,7 +4268,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 		return NULL;
 	}
 
-	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+	memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len,
+		2 * ETH_ALEN);
 	skb->mac_header += VLAN_HLEN;
 	return skb;
 }
-- 
2.4.1


From 3555f4b7a44fb052205049353ebf858291ae722d Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Mon, 14 Dec 2015 17:44:10 -0500
Subject: [PATCH 22/34] skbuff: Fix offset error in skb_reorder_vlan_header

[ Upstream commit f654861569872d10dcb79d9d7ca219b316f94ff0 ]

skb_reorder_vlan_header is called after the vlan header has
been pulled.  As a result the offset of the begining of
the mac header has been incrased by 4 bytes (VLAN_HLEN).
When moving the mac addresses, include this incrase in
the offset calcualation so that the mac addresses are
copied correctly.

Fixes: a6e18ff1117 (vlan: Fix untag operations of stacked vlans with REORDER_HEADER off)
CC: Nicolas Dichtel <nicolas.dichtel@6wind.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: Vladislav Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 160193f..1883d28 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4268,7 +4268,7 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 		return NULL;
 	}
 
-	memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len,
+	memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
 		2 * ETH_ALEN);
 	skb->mac_header += VLAN_HLEN;
 	return skb;
-- 
2.4.1


From ed74e375974b73498576c5e5305f0e1c0445e4f9 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 16 Dec 2015 23:39:04 -0800
Subject: [PATCH 23/34] net: check both type and procotol for tcp sockets

[ Upstream commit ac5cc977991d2dce85fc734a6c71ddb33f6fe3c1 ]

Dmitry reported the following out-of-bound access:

Call Trace:
 [<ffffffff816cec2e>] __asan_report_load4_noabort+0x3e/0x40
mm/kasan/report.c:294
 [<ffffffff84affb14>] sock_setsockopt+0x1284/0x13d0 net/core/sock.c:880
 [<     inline     >] SYSC_setsockopt net/socket.c:1746
 [<ffffffff84aed7ee>] SyS_setsockopt+0x1fe/0x240 net/socket.c:1729
 [<ffffffff85c18c76>] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185

This is because we mistake a raw socket as a tcp socket.
We should check both sk->sk_type and sk->sk_protocol to ensure
it is a tcp socket.

Willem points out __skb_complete_tx_timestamp() needs to fix as well.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 3 ++-
 net/core/sock.c   | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1883d28..1c1f87c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3643,7 +3643,8 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
 	serr->ee.ee_info = tstype;
 	if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
 		serr->ee.ee_data = skb_shinfo(skb)->tskey;
-		if (sk->sk_protocol == IPPROTO_TCP)
+		if (sk->sk_protocol == IPPROTO_TCP &&
+		    sk->sk_type == SOCK_STREAM)
 			serr->ee.ee_data -= sk->sk_tskey;
 	}
 
diff --git a/net/core/sock.c b/net/core/sock.c
index d7a7fc5..dbbda99 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -860,7 +860,8 @@ set_rcvbuf:
 
 		if (val & SOF_TIMESTAMPING_OPT_ID &&
 		    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
-			if (sk->sk_protocol == IPPROTO_TCP) {
+			if (sk->sk_protocol == IPPROTO_TCP &&
+			    sk->sk_type == SOCK_STREAM) {
 				if (sk->sk_state != TCP_ESTABLISHED) {
 					ret = -EINVAL;
 					break;
-- 
2.4.1


From b5b6dd8ecd8cbbc029b0eaf3acc077c61d2a5611 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Dec 2015 09:43:12 -0800
Subject: [PATCH 24/34] net_sched: make qdisc_tree_decrease_qlen() work for non
 mq

[ Upstream commit 225734de70cd0a9e0b978f3583a4a87939271d5e ]

Stas Nichiporovich reported a regression in his HFSC qdisc setup
on a non multi queue device.

It turns out I mistakenly added a TCQ_F_NOPARENT flag on all qdisc
allocated in qdisc_create() for non multi queue devices, which was
rather buggy. I was clearly mislead by the TCQ_F_ONETXQUEUE that is
also set here for no good reason, since it only matters for the root
qdisc.

Fixes: 4eaf3b84f288 ("net_sched: fix qdisc_tree_decrease_qlen() races")
Reported-by: Stas Nichiporovich <stasn77@gmail.com>
Tested-by: Stas Nichiporovich <stasn77@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 7ec667d..b5c2cf2 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -950,7 +950,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 		}
 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
 		if (!netif_is_multiqueue(dev))
-			sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+			sch->flags |= TCQ_F_ONETXQUEUE;
 	}
 
 	sch->handle = handle;
-- 
2.4.1


From a7ace68f6ce8e50a6c828595e1672c037f40c36a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Dec 2015 15:39:08 -0500
Subject: [PATCH 25/34] bluetooth: Validate socket address length in
 sco_sock_bind().

[ Upstream commit 5233252fce714053f0151680933571a2da9cbfb4 ]

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/sco.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index f315c8d..15cb6c5 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -519,6 +519,9 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le
 	if (!addr || addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
+	if (addr_len < sizeof(struct sockaddr_sco))
+		return -EINVAL;
+
 	lock_sock(sk);
 
 	if (sk->sk_state != BT_OPEN) {
-- 
2.4.1


From 0d4975246467f450ce902e9dc442d647eb7ad516 Mon Sep 17 00:00:00 2001
From: "tadeusz.struk@intel.com" <tadeusz.struk@intel.com>
Date: Tue, 15 Dec 2015 10:46:17 -0800
Subject: [PATCH 26/34] net: fix uninitialized variable issue

[ Upstream commit 130ed5d105dde141e7fe60d5440aa53e0a84f13b ]

msg_iocb needs to be initialized on the recv/recvfrom path.
Otherwise afalg will wrongly interpret it as an async call.

Cc: stable@vger.kernel.org
Reported-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/socket.c b/net/socket.c
index 9963a0b..f3fbe17 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1702,6 +1702,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
 	msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
 	/* We assume all kernel code knows the size of sockaddr_storage */
 	msg.msg_namelen = 0;
+	msg.msg_iocb = NULL;
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
-- 
2.4.1


From 055a98ed38bb76ad811feec778f287e47d20bf41 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 15 Dec 2015 22:59:12 +0100
Subject: [PATCH 27/34] ipv6: automatically enable stable privacy mode if
 stable_secret set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 9b29c6962b70f232cde4076b1020191e1be0889d ]

Bjørn reported that while we switch all interfaces to privacy stable mode
when setting the secret, we don't set this mode for new interfaces. This
does not make sense, so change this behaviour.

Fixes: 622c81d57b392cc ("ipv6: generation of stable privacy addresses for link-local and autoconf")
Reported-by: Bjørn Mork <bjorn@mork.no>
Cc: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ff873c8..ddd3511 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -349,6 +349,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	setup_timer(&ndev->rs_timer, addrconf_rs_timer,
 		    (unsigned long)ndev);
 	memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
+
+	if (ndev->cnf.stable_secret.initialized)
+		ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+	else
+		ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64;
+
 	ndev->cnf.mtu6 = dev->mtu;
 	ndev->cnf.sysctl = NULL;
 	ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
-- 
2.4.1


From e7763d6d443faf1a4a4026d3e8e98843784606c2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Dec 2015 20:56:44 -0800
Subject: [PATCH 28/34] inet: tcp: fix inetpeer_set_addr_v4()

[ Upstream commit 887dc9f2cef6e98dcccf807da5e6faf4f60ba483 ]

David Ahern added a vif field in the a4 part of inetpeer_addr struct.

This broke IPv4 TCP fast open client side and more generally tcp metrics
cache, because inetpeer_addr_cmp() is now comparing two u32 instead of
one.

inetpeer_set_addr_v4() needs to properly init vif field, otherwise
the comparison result depends on uninitialized data.

Fixes: 192132b9a034 ("net: Add support for VRFs to inetpeer cache")
Reported-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 4a6009d..235c781 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -78,6 +78,7 @@ void inet_initpeers(void) __init;
 static inline void inetpeer_set_addr_v4(struct inetpeer_addr *iaddr, __be32 ip)
 {
 	iaddr->a4.addr = ip;
+	iaddr->a4.vif = 0;
 	iaddr->family = AF_INET;
 }
 
-- 
2.4.1


From dc6f25bdfbf1027db505506d8795fef1234f342c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 16 Dec 2015 18:13:14 +0800
Subject: [PATCH 29/34] rhashtable: Enforce minimum size on initial hash table

[ Upstream commit 3a324606bbabfc30084ce9d08169910773ba9a92 ]

William Hua <william.hua@canonical.com> wrote:
>
> I wasn't aware there was an enforced minimum size. I simply set the
> nelem_hint in the rhastable_params struct to 1, expecting it to grow as
> needed. This caused a segfault afterwards when trying to insert an
> element.

OK we're doing the size computation before we enforce the limit
on min_size.

---8<---
We need to do the initial hash table size computation after we
have obtained the correct min_size/max_size parameters.  Otherwise
we may end up with a hash table whose size is outside the allowed
envelope.

Fixes: a998f712f77e ("rhashtable: Round up/down min/max_size to...")
Reported-by: William Hua <william.hua@canonical.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index a54ff89..d2daf67e 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -723,9 +723,6 @@ int rhashtable_init(struct rhashtable *ht,
 	if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
 		return -EINVAL;
 
-	if (params->nelem_hint)
-		size = rounded_hashtable_size(params);
-
 	memset(ht, 0, sizeof(*ht));
 	mutex_init(&ht->mutex);
 	spin_lock_init(&ht->lock);
@@ -745,6 +742,9 @@ int rhashtable_init(struct rhashtable *ht,
 
 	ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
 
+	if (params->nelem_hint)
+		size = rounded_hashtable_size(&ht->p);
+
 	/* The maximum (not average) chain length grows with the
 	 * size of the hash table, at a rate of (log N)/(log log N).
 	 * The value of 16 is selected so that even if the hash
-- 
2.4.1


From b91eef0e59aebcdaceb9bfeb61aafe0a345d2d81 Mon Sep 17 00:00:00 2001
From: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
Date: Tue, 15 Dec 2015 14:14:50 +1300
Subject: [PATCH 30/34] gianfar: Don't enable RX Filer if not supported

[ Upstream commit 7bff47da1ee23d00d1257905f2944c29594f799d ]

After commit 15bf176db1fb ("gianfar: Don't enable the Filer w/o the
Parser"), 'TSEC' model controllers (for example as seen on MPC8541E)
always have 8 bytes stripped from the front of received frames.
Only 'eTSEC' gianfar controllers have the RX Filer capability (amongst
other enhancements). Previously this was treated as always enabled
for both 'TSEC' and 'eTSEC' controllers.
In commit 15bf176db1fb ("gianfar: Don't enable the Filer w/o the Parser")
a subtle change was made to the setting of 'uses_rxfcb' to effectively
always set it (since 'rx_filer_enable' was always true). This had the
side-effect of always stripping 8 bytes from the front of received frames
on 'TSEC' type controllers.

We now only enable the RX Filer capability on controller types that
support it, thereby avoiding the issue for 'TSEC' type controllers.

Reviewed-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Mark Tomlinson <mark.tomlinson@alliedtelesis.co.nz>
Signed-off-by: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
Reviewed-by: Claudiu Manoil <claudiu.manoil@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.c | 8 +++++---
 drivers/net/ethernet/freescale/gianfar.h | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index ce38d26..bcb933e 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -894,7 +894,8 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 				     FSL_GIANFAR_DEV_HAS_VLAN |
 				     FSL_GIANFAR_DEV_HAS_MAGIC_PACKET |
 				     FSL_GIANFAR_DEV_HAS_EXTENDED_HASH |
-				     FSL_GIANFAR_DEV_HAS_TIMER;
+				     FSL_GIANFAR_DEV_HAS_TIMER |
+				     FSL_GIANFAR_DEV_HAS_RX_FILER;
 
 	err = of_property_read_string(np, "phy-connection-type", &ctype);
 
@@ -1393,8 +1394,9 @@ static int gfar_probe(struct platform_device *ofdev)
 		priv->rx_queue[i]->rxic = DEFAULT_RXIC;
 	}
 
-	/* always enable rx filer */
-	priv->rx_filer_enable = 1;
+	/* Always enable rx filer if available */
+	priv->rx_filer_enable =
+	    (priv->device_flags & FSL_GIANFAR_DEV_HAS_RX_FILER) ? 1 : 0;
 	/* Enable most messages by default */
 	priv->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1;
 	/* use pritority h/w tx queue scheduling for single queue devices */
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 8c19948..3755372 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -917,6 +917,7 @@ struct gfar {
 #define FSL_GIANFAR_DEV_HAS_BD_STASHING		0x00000200
 #define FSL_GIANFAR_DEV_HAS_BUF_STASHING	0x00000400
 #define FSL_GIANFAR_DEV_HAS_TIMER		0x00000800
+#define FSL_GIANFAR_DEV_HAS_RX_FILER		0x00002000
 
 #if (MAXGROUPS == 2)
 #define DEFAULT_MAPPING 	0xAA
-- 
2.4.1


From e8fcabcaa1cc5d503b1dd7d94d9bb83e13610e96 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 15 Dec 2015 21:01:53 +0100
Subject: [PATCH 31/34] fou: clean up socket with kfree_rcu

[ Upstream commit 3036facbb7be3a169e35be3b271162b0fa564a2d ]

fou->udp_offloads is managed by RCU. As it is actually included inside
the fou sockets, we cannot let the memory go out of scope before a grace
period. We either can synchronize_rcu or switch over to kfree_rcu to
manage the sockets. kfree_rcu seems appropriate as it is used by vxlan
and geneve.

Fixes: 23461551c00628c ("fou: Support for foo-over-udp RX path")
Cc: Tom Herbert <tom@herbertland.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fou.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index e0fcbbb..bd903fe 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -24,6 +24,7 @@ struct fou {
 	u16 type;
 	struct udp_offload udp_offloads;
 	struct list_head list;
+	struct rcu_head rcu;
 };
 
 #define FOU_F_REMCSUM_NOPARTIAL BIT(0)
@@ -417,7 +418,7 @@ static void fou_release(struct fou *fou)
 	list_del(&fou->list);
 	udp_tunnel_sock_release(sock);
 
-	kfree(fou);
+	kfree_rcu(fou, rcu);
 }
 
 static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
-- 
2.4.1


From 99450d55526ae8c64f343738542527c42e30ff78 Mon Sep 17 00:00:00 2001
From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
Date: Wed, 16 Dec 2015 20:09:25 +0000
Subject: [PATCH 32/34] af_unix: Revert 'lock_interruptible' in stream receive
 code

[ Upstream commit 3822b5c2fc62e3de8a0f33806ff279fb7df92432 ]

With b3ca9b02b00704053a38bfe4c31dbbb9c13595d0, the AF_UNIX SOCK_STREAM
receive code was changed from using mutex_lock(&u->readlock) to
mutex_lock_interruptible(&u->readlock) to prevent signals from being
delayed for an indefinite time if a thread sleeping on the mutex
happened to be selected for handling the signal. But this was never a
problem with the stream receive code (as opposed to its datagram
counterpart) as that never went to sleep waiting for new messages with the
mutex held and thus, wouldn't cause secondary readers to block on the
mutex waiting for the sleeping primary reader. As the interruptible
locking makes the code more complicated in exchange for no benefit,
change it back to using mutex_lock.

Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 128b098..0fc6dba 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2255,14 +2255,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
 	/* Lock the socket to prevent queue disordering
 	 * while sleeps in memcpy_tomsg
 	 */
-	err = mutex_lock_interruptible(&u->readlock);
-	if (unlikely(err)) {
-		/* recvmsg() in non blocking mode is supposed to return -EAGAIN
-		 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
-		 */
-		err = noblock ? -EAGAIN : -ERESTARTSYS;
-		goto out;
-	}
+	mutex_lock(&u->readlock);
 
 	if (flags & MSG_PEEK)
 		skip = sk_peek_offset(sk, flags);
@@ -2306,12 +2299,12 @@ again:
 			timeo = unix_stream_data_wait(sk, timeo, last,
 						      last_len);
 
-			if (signal_pending(current) ||
-			    mutex_lock_interruptible(&u->readlock)) {
+			if (signal_pending(current)) {
 				err = sock_intr_errno(timeo);
 				goto out;
 			}
 
+			mutex_lock(&u->readlock);
 			continue;
 unlock:
 			unix_state_unlock(sk);
-- 
2.4.1


From dc9c6c2fb77bda57cc2b06d2a2b1d1befd3819fc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 16 Dec 2015 13:53:10 -0800
Subject: [PATCH 33/34] tcp: restore fastopen with no data in SYN packet

[ Upstream commit 07e100f984975cb0417a7d5e626d0409efbad478 ]

Yuchung tracked a regression caused by commit 57be5bdad759 ("ip: convert
tcp_sendmsg() to iov_iter primitives") for TCP Fast Open.

Some Fast Open users do not actually add any data in the SYN packet.

Fixes: 57be5bdad759 ("ip: convert tcp_sendmsg() to iov_iter primitives")
Reported-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3dbee0d..c958596 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3147,7 +3147,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_fastopen_request *fo = tp->fastopen_req;
-	int syn_loss = 0, space, err = 0, copied;
+	int syn_loss = 0, space, err = 0;
 	unsigned long last_syn_loss = 0;
 	struct sk_buff *syn_data;
 
@@ -3185,17 +3185,18 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 		goto fallback;
 	syn_data->ip_summed = CHECKSUM_PARTIAL;
 	memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
-	copied = copy_from_iter(skb_put(syn_data, space), space,
-				&fo->data->msg_iter);
-	if (unlikely(!copied)) {
-		kfree_skb(syn_data);
-		goto fallback;
-	}
-	if (copied != space) {
-		skb_trim(syn_data, copied);
-		space = copied;
+	if (space) {
+		int copied = copy_from_iter(skb_put(syn_data, space), space,
+					    &fo->data->msg_iter);
+		if (unlikely(!copied)) {
+			kfree_skb(syn_data);
+			goto fallback;
+		}
+		if (copied != space) {
+			skb_trim(syn_data, copied);
+			space = copied;
+		}
 	}
-
 	/* No more data pending in inet_wait_for_connect() */
 	if (space == fo->size)
 		fo->data = NULL;
-- 
2.4.1


From 51a41cd14356285e5e69c7657c55061a92e7ed79 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 16 Dec 2015 16:45:54 +0800
Subject: [PATCH 34/34] rhashtable: Fix walker list corruption

[ Upstream commit c6ff5268293ef98e48a99597e765ffc417e39fa5 ]

The commit ba7c95ea3870fe7b847466d39a049ab6f156aa2c ("rhashtable:
Fix sleeping inside RCU critical section in walk_stop") introduced
a new spinlock for the walker list.  However, it did not convert
all existing users of the list over to the new spin lock.  Some
continued to use the old mutext for this purpose.  This obviously
led to corruption of the list.

The fix is to use the spin lock everywhere where we touch the list.

This also allows us to do rcu_rad_lock before we take the lock in
rhashtable_walk_start.  With the old mutex this would've deadlocked
but it's safe with the new spin lock.

Fixes: ba7c95ea3870 ("rhashtable: Fix sleeping inside RCU...")
Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index d2daf67e..aa388a7 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -503,10 +503,11 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
 	if (!iter->walker)
 		return -ENOMEM;
 
-	mutex_lock(&ht->mutex);
-	iter->walker->tbl = rht_dereference(ht->tbl, ht);
+	spin_lock(&ht->lock);
+	iter->walker->tbl =
+		rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
 	list_add(&iter->walker->list, &iter->walker->tbl->walkers);
-	mutex_unlock(&ht->mutex);
+	spin_unlock(&ht->lock);
 
 	return 0;
 }
@@ -520,10 +521,10 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
  */
 void rhashtable_walk_exit(struct rhashtable_iter *iter)
 {
-	mutex_lock(&iter->ht->mutex);
+	spin_lock(&iter->ht->lock);
 	if (iter->walker->tbl)
 		list_del(&iter->walker->list);
-	mutex_unlock(&iter->ht->mutex);
+	spin_unlock(&iter->ht->lock);
 	kfree(iter->walker);
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
@@ -547,14 +548,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter)
 {
 	struct rhashtable *ht = iter->ht;
 
-	mutex_lock(&ht->mutex);
+	rcu_read_lock();
 
+	spin_lock(&ht->lock);
 	if (iter->walker->tbl)
 		list_del(&iter->walker->list);
-
-	rcu_read_lock();
-
-	mutex_unlock(&ht->mutex);
+	spin_unlock(&ht->lock);
 
 	if (!iter->walker->tbl) {
 		iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
-- 
2.4.1