From 62c169cbc376249a4e1994067edc62c7b64d4c47 Mon Sep 17 00:00:00 2001 From: Justin M. Forbes Date: Mar 27 2012 01:22:58 +0000 Subject: Linux v3.3-6972-ge22057c --- diff --git a/fix-dentry-hash.patch b/fix-dentry-hash.patch deleted file mode 100644 index c5b2f47..0000000 --- a/fix-dentry-hash.patch +++ /dev/null @@ -1,113 +0,0 @@ -From davej Thu Mar 22 16:38:38 2012 -Return-Path: linux-kernel-owner@vger.kernel.org -X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on - gelk.kernelslacker.org -X-Spam-Level: -X-Spam-Status: No, score=-1.2 required=5.0 tests=KB_DATE_CONTAINS_TAB, - RCVD_IN_DNSWL_HI,T_RP_MATCHES_RCVD,UNPARSEABLE_RELAY autolearn=unavailable - version=3.3.2 -Received: from mail.corp.redhat.com [10.5.5.51] - by gelk.kernelslacker.org with IMAP (fetchmail-6.3.21) - for (single-drop); Thu, 22 Mar 2012 16:38:38 -0400 (EDT) -Received: from zmta02.collab.prod.int.phx2.redhat.com (LHLO - zmta02.collab.prod.int.phx2.redhat.com) (10.5.5.32) by - zmail11.collab.prod.int.phx2.redhat.com with LMTP; Thu, 22 Mar 2012 - 16:37:12 -0400 (EDT) -Received: from localhost (localhost.localdomain [127.0.0.1]) - by zmta02.collab.prod.int.phx2.redhat.com (Postfix) with ESMTP id BE4B31280F5; - Thu, 22 Mar 2012 16:37:12 -0400 (EDT) -X-Quarantine-ID: -Received: from zmta02.collab.prod.int.phx2.redhat.com ([127.0.0.1]) - by localhost (zmta02.collab.prod.int.phx2.redhat.com [127.0.0.1]) (amavisd-new, port 10024) - with ESMTP id rVyHUDnYJs0w; Thu, 22 Mar 2012 16:37:12 -0400 (EDT) -Received: from int-mx11.intmail.prod.int.phx2.redhat.com (int-mx11.intmail.prod.int.phx2.redhat.com [10.5.11.24]) - by zmta02.collab.prod.int.phx2.redhat.com (Postfix) with ESMTP id 34CCC1280EF; - Thu, 22 Mar 2012 16:37:12 -0400 (EDT) -Received: from mx1.redhat.com (ext-mx14.extmail.prod.ext.phx2.redhat.com [10.5.110.19]) - by int-mx11.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id q2MKbBbO012811; - Thu, 22 Mar 2012 16:37:11 -0400 -Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) - by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id q2MIJPCS018091; - Thu, 22 Mar 2012 16:37:10 -0400 -Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand - id S1759738Ab2CVUhD (ORCPT + 54 others); - Thu, 22 Mar 2012 16:37:03 -0400 -Received: from zeniv.linux.org.uk ([195.92.253.2]:35901 "EHLO - ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org - with ESMTP id S1758619Ab2CVUg7 (ORCPT - ); - Thu, 22 Mar 2012 16:36:59 -0400 -Received: from viro by ZenIV.linux.org.uk with local (Exim 4.76 #1 (Red Hat Linux)) - id 1SAokk-0008Fi-MR; Thu, 22 Mar 2012 20:36:58 +0000 -Date: Thu, 22 Mar 2012 20:36:58 +0000 -From: Al Viro -To: Linus Torvalds -Cc: linux-kernel@vger.kernel.org, xen-devel@lists.xensource.com, - Konrad Rzeszutek Wilk -Subject: Re: Regression introduced by - bfcfaa77bdf0f775263e906015982a608df01c76 (vfs: use 'unsigned long' accesses - for dcache name comparison and hashing) -Message-ID: <20120322203658.GC6589@ZenIV.linux.org.uk> -References: <20120322183845.GA17264@phenom.dumpdata.com> - <20120322200918.GZ6589@ZenIV.linux.org.uk> - <20120322202445.GB6589@ZenIV.linux.org.uk> -MIME-Version: 1.0 -Content-Type: text/plain; charset=us-ascii -Content-Disposition: inline -In-Reply-To: <20120322202445.GB6589@ZenIV.linux.org.uk> -User-Agent: Mutt/1.5.21 (2010-09-15) -Sender: linux-kernel-owner@vger.kernel.org -Precedence: bulk -List-ID: -X-Mailing-List: linux-kernel@vger.kernel.org -X-RedHat-Spam-Score: -5.01 (RCVD_IN_DNSWL_HI,T_RP_MATCHES_RCVD) -X-Scanned-By: MIMEDefang 2.68 on 10.5.11.24 -X-Scanned-By: MIMEDefang 2.68 on 10.5.110.19 -Status: RO -Content-Length: 1440 -Lines: 43 - -On Thu, Mar 22, 2012 at 08:24:45PM +0000, Al Viro wrote: -> -> OK, full_name_hash()/hash_name() definitely have a mismatch and it's on the -> names of length 8*n: trivial experiment shows that we have -> name hash_name full_name_hash -> a 61 61 -> ab 6261 6261 -> abc 636261 636261 -> abcd 64636261 64636261 -> abcdabc 64c6c4c2 64c6c4c2 -> abcdabcd efcead5 c8c6c4c2 -> abcdabcd9 efceb0e efceb0e -> -> Linus, which way do you prefer to shift it? Should hash_name() change to -> match full_name_hash() or should it be the other way round? -> -> What happens is that you get multiplication by 9 and adding 0 in the former, -> after having added the last full word. In the latter we add the last full -> word, see that there's nothing left and bugger off. - -Guys, could you check if this fixes it? - -diff --git a/fs/namei.c b/fs/namei.c -index 13e6a1f..7451d6f8 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -1439,10 +1439,10 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len) - - for (;;) { - a = *(unsigned long *)name; -- hash *= 9; - if (len < sizeof(unsigned long)) - break; - hash += a; -+ hash *= 9; - name += sizeof(unsigned long); - len -= sizeof(unsigned long); - if (!len) --- -To unsubscribe from this list: send the line "unsubscribe linux-kernel" in -the body of a message to majordomo@vger.kernel.org -More majordomo info at http://vger.kernel.org/majordomo-info.html -Please read the FAQ at http://www.tux.org/lkml/ - diff --git a/kernel.spec b/kernel.spec index c8b13b4..b4da37f 100644 --- a/kernel.spec +++ b/kernel.spec @@ -62,7 +62,7 @@ Summary: The Linux kernel # For non-released -rc kernels, this will be appended after the rcX and # gitX tags, so a 3 here would become part of release "0.rcX.gitX.3" # -%global baserelease 2 +%global baserelease 1 %global fedora_build %{baserelease} # base_sublevel is the kernel version we're starting with and patching @@ -95,7 +95,7 @@ Summary: The Linux kernel # The rc snapshot level %define rcrev 0 # The git snapshot level -%define gitrev 1 +%define gitrev 2 # Set rpm version accordingly %define rpmversion 3.%{upstream_sublevel}.0 %endif @@ -653,8 +653,6 @@ Patch100: taint-vbox.patch Patch160: linux-2.6-32bit-mmap-exec-randomization.patch Patch161: linux-2.6-i386-nx-emulation.patch -Patch383: linux-2.6-defaults-aspm.patch - Patch390: linux-2.6-defaults-acpi-video.patch Patch391: linux-2.6-acpi-video-dos.patch Patch394: linux-2.6-acpi-debug-infinite-loop.patch @@ -682,7 +680,6 @@ Patch900: modsign-20111207.patch # virt + ksm patches Patch1555: fix_xen_guest_on_old_EC2.patch -Patch1556: linux-3.3-virtio-scsi.patch # DRM #atch1700: drm-edid-try-harder-to-fix-up-broken-headers.patch @@ -708,9 +705,6 @@ Patch2901: linux-2.6-v4l-dvb-experimental.patch Patch4000: ext4-fix-resize-when-resizing-within-single-group.patch # NFSv4 -Patch1102: linux-3.3-newidmapper-01.patch -Patch1103: linux-3.3-newidmapper-02.patch -Patch1104: linux-3.3-newidmapper-03.patch # patches headed upstream Patch12016: disable-i8042-check-on-apple-mac.patch @@ -734,8 +728,6 @@ Patch21010: highbank-export-clock-functions.patch Patch21070: ext4-Support-check-none-nocheck-mount-options.patch -Patch21092: udlfb-remove-sysfs-framebuffer-device-with-USB-disconnect.patch - Patch21094: power-x86-destdir.patch #rhbz 788260 @@ -744,7 +736,6 @@ Patch21233: jbd2-clear-BH_Delay-and-BH_Unwritten-in-journal_unmap_buf.patch #rhbz 754518 Patch21235: scsi-sd_revalidate_disk-prevent-NULL-ptr-deref.patch -Patch21250: mcelog-rcu-splat.patch Patch21260: x86-Avoid-invoking-RCU-when-CPU-is-idle.patch #rhbz 727865 730007 @@ -753,9 +744,6 @@ Patch21300: ACPICA-Fix-regression-in-FADT-revision-checks.patch #rhbz 728478 Patch21302: sony-laptop-Enable-keyboard-backlight-by-default.patch -#rhbz 803809 CVE-2012-1179 -Patch21304: mm-thp-fix-pmd_bad-triggering.patch - #rhbz 804007 Patch21305: mac80211-fix-possible-tid_rx-reorder_timer-use-after-free.patch @@ -766,8 +754,6 @@ Patch21400: unhandled-irqs-switch-to-polling.patch Patch22000: weird-root-dentry-name-debug.patch -Patch23000: fix-dentry-hash.patch - %endif BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root @@ -1344,9 +1330,6 @@ ApplyPatch ext4-fix-resize-when-resizing-within-single-group.patch # eCryptfs # NFSv4 -ApplyPatch linux-3.3-newidmapper-01.patch -ApplyPatch linux-3.3-newidmapper-02.patch -ApplyPatch linux-3.3-newidmapper-03.patch # USB @@ -1362,8 +1345,6 @@ ApplyPatch acpi-sony-nonvs-blacklist.patch # # PCI # -# enable ASPM by default on hardware we expect to work -ApplyPatch linux-2.6-defaults-aspm.patch # # SCSI Bits. @@ -1433,7 +1414,6 @@ ApplyOptionalPatch linux-2.6-v4l-dvb-experimental.patch # Patches headed upstream ApplyPatch disable-i8042-check-on-apple-mac.patch -ApplyPatch linux-3.3-virtio-scsi.patch # rhbz#605888 ApplyPatch dmar-disable-when-ricoh-multifunction.patch @@ -1447,8 +1427,6 @@ ApplyPatch lis3-improve-handling-of-null-rate.patch ApplyPatch ext4-Support-check-none-nocheck-mount-options.patch -ApplyPatch udlfb-remove-sysfs-framebuffer-device-with-USB-disconnect.patch - ApplyPatch power-x86-destdir.patch #rhbz 788269 @@ -1457,8 +1435,6 @@ ApplyPatch jbd2-clear-BH_Delay-and-BH_Unwritten-in-journal_unmap_buf.patch #rhbz 754518 ApplyPatch scsi-sd_revalidate_disk-prevent-NULL-ptr-deref.patch -ApplyPatch mcelog-rcu-splat.patch - #rhbz 727865 730007 ApplyPatch ACPICA-Fix-regression-in-FADT-revision-checks.patch @@ -1475,11 +1451,6 @@ ApplyPatch unhandled-irqs-switch-to-polling.patch ApplyPatch weird-root-dentry-name-debug.patch -ApplyPatch fix-dentry-hash.patch - -#rhbz 803809 CVE-2012-1179 -ApplyPatch mm-thp-fix-pmd_bad-triggering.patch - #Highbank clock functions ApplyPatch highbank-export-clock-functions.patch @@ -2336,6 +2307,9 @@ fi # ||----w | # || || %changelog +* Mon Mar 26 2012 Justin M. Forbes - 3.4.0-0.rc0.git2.1 +- Linux v3.3-6972-ge22057c + * Thu Mar 22 2012 Dave Jones 3.4.0-0.rc0.git1.2 - Fix occasional EBADMSG from signed modules. (rhbz 804345) diff --git a/linux-2.6-defaults-aspm.patch b/linux-2.6-defaults-aspm.patch deleted file mode 100644 index 49b832d..0000000 --- a/linux-2.6-defaults-aspm.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff -up linux-2.6.30.noarch/drivers/pci/pcie/aspm.c.mjg linux-2.6.30.noarch/drivers/pci/pcie/aspm.c ---- linux-2.6.30.noarch/drivers/pci/pcie/aspm.c.mjg 2009-07-16 22:01:11.000000000 +0100 -+++ linux-2.6.30.noarch/drivers/pci/pcie/aspm.c 2009-07-16 22:01:30.000000000 +0100 -@@ -65,7 +65,7 @@ static LIST_HEAD(link_list); - #define POLICY_DEFAULT 0 /* BIOS default setting */ - #define POLICY_PERFORMANCE 1 /* high performance */ - #define POLICY_POWERSAVE 2 /* high power saving */ --static int aspm_policy; -+static int aspm_policy = POLICY_POWERSAVE; - static const char *policy_str[] = { - [POLICY_DEFAULT] = "default", - [POLICY_PERFORMANCE] = "performance", diff --git a/linux-3.3-newidmapper-01.patch b/linux-3.3-newidmapper-01.patch deleted file mode 100644 index 9afbb93..0000000 --- a/linux-3.3-newidmapper-01.patch +++ /dev/null @@ -1,217 +0,0 @@ -commit e6499c6f4b5f56a16f8b8ef60529c1da28b13aea -Author: Bryan Schumaker -Date: Thu Jan 26 16:54:23 2012 -0500 - - NFS: Fall back on old idmapper if request_key() fails - - This patch removes the CONFIG_NFS_USE_NEW_IDMAPPER compile option. - First, the idmapper will attempt to map the id using /sbin/request-key - and nfsidmap. If this fails (if /etc/request-key.conf is not configured - properly) then the idmapper will call the legacy code to perform the - mapping. I left a comment stating where the legacy code begins to make - it easier for somebody to remove in the future. - - Signed-off-by: Bryan Schumaker - Signed-off-by: Trond Myklebust - -diff -up linux-3.2.noarch/fs/nfs/idmap.c.orig linux-3.2.noarch/fs/nfs/idmap.c ---- linux-3.2.noarch/fs/nfs/idmap.c.orig 2012-01-27 10:07:07.209851446 -0500 -+++ linux-3.2.noarch/fs/nfs/idmap.c 2012-01-27 10:15:42.914563082 -0500 -@@ -142,8 +142,6 @@ static int nfs_map_numeric_to_string(__u - return snprintf(buf, buflen, "%u", id); - } - --#ifdef CONFIG_NFS_USE_NEW_IDMAPPER -- - #include - #include - #include -@@ -328,43 +326,7 @@ static int nfs_idmap_lookup_id(const cha - return ret; - } - --int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) --{ -- if (nfs_map_string_to_numeric(name, namelen, uid)) -- return 0; -- return nfs_idmap_lookup_id(name, namelen, "uid", uid); --} -- --int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) --{ -- if (nfs_map_string_to_numeric(name, namelen, gid)) -- return 0; -- return nfs_idmap_lookup_id(name, namelen, "gid", gid); --} -- --int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) --{ -- int ret = -EINVAL; -- -- if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) -- ret = nfs_idmap_lookup_name(uid, "user", buf, buflen); -- if (ret < 0) -- ret = nfs_map_numeric_to_string(uid, buf, buflen); -- return ret; --} --int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) --{ -- int ret = -EINVAL; -- -- if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) -- ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); -- if (ret < 0) -- ret = nfs_map_numeric_to_string(gid, buf, buflen); -- return ret; --} -- --#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */ -- -+/* idmap classic begins here */ - #include - #include - #include -@@ -796,19 +758,27 @@ static unsigned int fnvhash32(const void - int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) - { - struct idmap *idmap = server->nfs_client->cl_idmap; -+ int ret = -EINVAL; - - if (nfs_map_string_to_numeric(name, namelen, uid)) - return 0; -- return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); -+ ret = nfs_idmap_lookup_id(name, namelen, "uid", uid); -+ if (ret < 0) -+ ret = nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); -+ return ret; - } - --int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) -+int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) - { - struct idmap *idmap = server->nfs_client->cl_idmap; -+ int ret = -EINVAL; - -- if (nfs_map_string_to_numeric(name, namelen, uid)) -+ if (nfs_map_string_to_numeric(name, namelen, gid)) - return 0; -- return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); -+ ret = nfs_idmap_lookup_id(name, namelen, "gid", gid); -+ if (ret < 0) -+ ret = nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, gid); -+ return ret; - } - - int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) -@@ -816,22 +786,26 @@ int nfs_map_uid_to_name(const struct nfs - struct idmap *idmap = server->nfs_client->cl_idmap; - int ret = -EINVAL; - -- if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) -- ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); -+ if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) { -+ ret = nfs_idmap_lookup_name(uid, "user", buf, buflen); -+ if (ret < 0) -+ ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); -+ } - if (ret < 0) - ret = nfs_map_numeric_to_string(uid, buf, buflen); - return ret; - } --int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) -+int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) - { - struct idmap *idmap = server->nfs_client->cl_idmap; - int ret = -EINVAL; - -- if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) -- ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); -+ if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) { -+ ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); -+ if (ret < 0) -+ ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, gid, buf); -+ } - if (ret < 0) -- ret = nfs_map_numeric_to_string(uid, buf, buflen); -+ ret = nfs_map_numeric_to_string(gid, buf, buflen); - return ret; - } -- --#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ -diff -up linux-3.2.noarch/fs/nfs/Kconfig.orig linux-3.2.noarch/fs/nfs/Kconfig ---- linux-3.2.noarch/fs/nfs/Kconfig.orig 2012-01-04 18:55:44.000000000 -0500 -+++ linux-3.2.noarch/fs/nfs/Kconfig 2012-01-27 10:15:42.913562572 -0500 -@@ -132,14 +132,3 @@ config NFS_USE_KERNEL_DNS - select DNS_RESOLVER - select KEYS - default y -- --config NFS_USE_NEW_IDMAPPER -- bool "Use the new idmapper upcall routine" -- depends on NFS_V4 && KEYS -- help -- Say Y here if you want NFS to use the new idmapper upcall functions. -- You will need /sbin/request-key (usually provided by the keyutils -- package). For details, read -- . -- -- If you are unsure, say N. -diff -up linux-3.2.noarch/fs/nfs/sysctl.c.orig linux-3.2.noarch/fs/nfs/sysctl.c ---- linux-3.2.noarch/fs/nfs/sysctl.c.orig 2012-01-04 18:55:44.000000000 -0500 -+++ linux-3.2.noarch/fs/nfs/sysctl.c 2012-01-27 10:15:42.914563082 -0500 -@@ -32,7 +32,6 @@ static ctl_table nfs_cb_sysctls[] = { - .extra1 = (int *)&nfs_set_port_min, - .extra2 = (int *)&nfs_set_port_max, - }, --#ifndef CONFIG_NFS_USE_NEW_IDMAPPER - { - .procname = "idmap_cache_timeout", - .data = &nfs_idmap_cache_timeout, -@@ -40,7 +39,6 @@ static ctl_table nfs_cb_sysctls[] = { - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, --#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ - #endif - { - .procname = "nfs_mountpoint_timeout", -diff -up linux-3.2.noarch/include/linux/nfs_idmap.h.orig linux-3.2.noarch/include/linux/nfs_idmap.h ---- linux-3.2.noarch/include/linux/nfs_idmap.h.orig 2012-01-27 10:06:46.783643915 -0500 -+++ linux-3.2.noarch/include/linux/nfs_idmap.h 2012-01-27 10:15:42.915563594 -0500 -@@ -69,36 +69,11 @@ struct nfs_server; - struct nfs_fattr; - struct nfs4_string; - --#ifdef CONFIG_NFS_USE_NEW_IDMAPPER -- - int nfs_idmap_init(void); - void nfs_idmap_quit(void); -- --static inline int nfs_idmap_new(struct nfs_client *clp) --{ -- return 0; --} -- --static inline void nfs_idmap_delete(struct nfs_client *clp) --{ --} -- --#else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */ -- --static inline int nfs_idmap_init(void) --{ -- return 0; --} -- --static inline void nfs_idmap_quit(void) --{ --} -- - int nfs_idmap_new(struct nfs_client *); - void nfs_idmap_delete(struct nfs_client *); - --#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ -- - void nfs_fattr_init_names(struct nfs_fattr *fattr, - struct nfs4_string *owner_name, - struct nfs4_string *group_name); diff --git a/linux-3.3-newidmapper-02.patch b/linux-3.3-newidmapper-02.patch deleted file mode 100644 index 9307ee0..0000000 --- a/linux-3.3-newidmapper-02.patch +++ /dev/null @@ -1,97 +0,0 @@ -commit 3cd0f37a2cc9e4d6188df10041a2441eaa41d991 -Author: Bryan Schumaker -Date: Thu Jan 26 16:54:24 2012 -0500 - - NFS: Keep idmapper include files in one place - - Signed-off-by: Bryan Schumaker - Signed-off-by: Trond Myklebust - -diff -up linux-3.2.noarch/fs/nfs/idmap.c.orig linux-3.2.noarch/fs/nfs/idmap.c ---- linux-3.2.noarch/fs/nfs/idmap.c.orig 2012-01-27 10:15:42.914563082 -0500 -+++ linux-3.2.noarch/fs/nfs/idmap.c 2012-01-27 10:19:22.711401559 -0500 -@@ -39,6 +39,36 @@ - #include - #include - #include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* include files needed by legacy idmapper */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "nfs4_fs.h" -+ -+#define NFS_UINT_MAXLEN 11 -+#define IDMAP_HASH_SZ 128 -+ -+/* Default cache timeout is 10 minutes */ -+unsigned int nfs_idmap_cache_timeout = 600 * HZ; -+const struct cred *id_resolver_cache; -+ - - /** - * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields -@@ -142,21 +172,6 @@ static int nfs_map_numeric_to_string(__u - return snprintf(buf, buflen, "%u", id); - } - --#include --#include --#include --#include --#include --#include --#include --#include -- --#include -- --#define NFS_UINT_MAXLEN 11 -- --const struct cred *id_resolver_cache; -- - struct key_type key_type_id_resolver = { - .name = "id_resolver", - .instantiate = user_instantiate, -@@ -327,25 +342,6 @@ static int nfs_idmap_lookup_id(const cha - } - - /* idmap classic begins here */ --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include -- --#include "nfs4_fs.h" -- --#define IDMAP_HASH_SZ 128 -- --/* Default cache timeout is 10 minutes */ --unsigned int nfs_idmap_cache_timeout = 600 * HZ; -- - static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) - { - char *endp; diff --git a/linux-3.3-newidmapper-03.patch b/linux-3.3-newidmapper-03.patch deleted file mode 100644 index 7018e35..0000000 --- a/linux-3.3-newidmapper-03.patch +++ /dev/null @@ -1,40 +0,0 @@ -commit a602bea3e7ccc5ce3da61d2c18245c4058983926 -Author: Bryan Schumaker -Date: Thu Jan 26 16:54:25 2012 -0500 - - NFS: Update idmapper documentation - - Signed-off-by: Bryan Schumaker - Signed-off-by: Trond Myklebust - -diff -up linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt.orig linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt ---- linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt.orig 2012-01-04 18:55:44.000000000 -0500 -+++ linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt 2012-01-27 10:19:55.406740364 -0500 -@@ -4,13 +4,21 @@ ID Mapper - ========= - Id mapper is used by NFS to translate user and group ids into names, and to - translate user and group names into ids. Part of this translation involves --performing an upcall to userspace to request the information. Id mapper will --user request-key to perform this upcall and cache the result. The program --/usr/sbin/nfs.idmap should be called by request-key, and will perform the --translation and initialize a key with the resulting information. -+performing an upcall to userspace to request the information. There are two -+ways NFS could obtain this information: placing a call to /sbin/request-key -+or by placing a call to the rpc.idmap daemon. -+ -+NFS will attempt to call /sbin/request-key first. If this succeeds, the -+result will be cached using the generic request-key cache. This call should -+only fail if /etc/request-key.conf is not configured for the id_resolver key -+type, see the "Configuring" section below if you wish to use the request-key -+method. -+ -+If the call to /sbin/request-key fails (if /etc/request-key.conf is not -+configured with the id_resolver key type), then the idmapper will ask the -+legacy rpc.idmap daemon for the id mapping. This result will be stored -+in a custom NFS idmap cache. - -- NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this -- feature. - - =========== - Configuring diff --git a/linux-3.3-virtio-scsi.patch b/linux-3.3-virtio-scsi.patch deleted file mode 100644 index 9d944f5..0000000 --- a/linux-3.3-virtio-scsi.patch +++ /dev/null @@ -1,993 +0,0 @@ -From 43cf1b6a4ee31e69581042a0c85d1398f83dcedc Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 20 Jan 2012 17:27:20 +0100 -Cc: -Cc: Rusty Russell -Cc: kvm@vger.kernel.org -Cc: Pekka Enberg -Cc: Michael S. Tsirkin -Cc: Stefan Hajnoczi , Mike Christie -Subject: [PATCH v5 0/3] virtio-scsi driver - -This is the first implementation of the virtio-scsi driver, a virtual -HBA that will be supported by KVM. It implements a subset of the spec, -in particular it does not implement asynchronous notifications for either -LUN reset/removal/addition or CD-ROM media events, but it is already -functional and usable. - -Other matching bits: - -- spec at http://people.redhat.com/pbonzini/virtio-spec.pdf - -- QEMU implementation at git://github.com/bonzini/qemu.git, - branch virtio-scsi - -Please review. Getting this in 3.3 is starting to look like wishful thinking, -but the possibility of regressions is obviously zero so I'm still dreaming. -Otherwise, that would be 3.4. - -Paolo Bonzini (3): - virtio-scsi: first version - virtio-scsi: add error handling - virtio-scsi: add power management support - -v4->v5: change virtio id from 7 to 8 - -v3->v4: renamed VIRTIO_SCSI_S_UNDERRUN to VIRTIO_SCSI_S_OVERRUN; - fixed 32-bit compilation; added power management support; - adjusted calls to virtqueue_add_buf - - drivers/scsi/Kconfig | 8 + - drivers/scsi/Makefile | 1 + - drivers/scsi/virtio_scsi.c | 594 +++++++++++++++++++++++++++++++++++++++++++ - include/linux/virtio_ids.h | 1 + - include/linux/virtio_scsi.h | 114 +++++++++ - 5 files changed, 718 insertions(+), 0 deletions(-) - create mode 100644 drivers/scsi/virtio_scsi.c - create mode 100644 include/linux/virtio_scsi.h - -From 84ad93b7215e18ab1755a625ede0fb00175e79bb Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Tue, 29 Nov 2011 16:31:09 +0100 -Cc: Stefan Hajnoczi , Mike Christie , Pekka Enberg -Subject: [PATCH v5 1/3] virtio-scsi: first version - -The virtio-scsi HBA is the basis of an alternative storage stack -for QEMU-based virtual machines (including KVM). Compared to -virtio-blk it is more scalable, because it supports many LUNs -on a single PCI slot), more powerful (it more easily supports -passthrough of host devices to the guest) and more easily -extensible (new SCSI features implemented by QEMU should not -require updating the driver in the guest). - -Cc: linux-scsi -Cc: Rusty Russell -Cc: Michael S. Tsirkin -Cc: kvm@vger.kernel.org -Acked-by: Pekka Enberg -Signed-off-by: Paolo Bonzini ---- - v4->v5: change virtio id from 7 to 8 - - v3->v4: renamed VIRTIO_SCSI_S_UNDERRUN to VIRTIO_SCSI_S_OVERRUN; - fixed 32-bit compilation; adjust call to virtqueue_add_buf - - v2->v3: added mempool, formatting fixes - - v1->v2: use dbg_dev, sdev_printk, scmd_printk - - renamed lock to vq_lock - - renamed cmd_vq to req_vq (and other similar changes) - - fixed missing break in VIRTIO_SCSI_S_OVERRUN - - added VIRTIO_SCSI_S_BUSY - - removed unused argument from virtscsi_map_cmd - - fixed two tabs that had slipped in - - moved max_sectors and cmd_per_lun from template to config space - - __attribute__((packed)) -> __packed - - drivers/scsi/Kconfig | 8 + - drivers/scsi/Makefile | 1 + - drivers/scsi/virtio_scsi.c | 503 +++++++++++++++++++++++++++++++++++++++++++ - include/linux/virtio_ids.h | 1 + - include/linux/virtio_scsi.h | 114 ++++++++++ - 5 files changed, 627 insertions(+), 0 deletions(-) - create mode 100644 drivers/scsi/virtio_scsi.c - create mode 100644 include/linux/virtio_scsi.h - -diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig -index 16570aa..827ebaf 100644 ---- a/drivers/scsi/Kconfig -+++ b/drivers/scsi/Kconfig -@@ -1897,6 +1897,14 @@ config SCSI_BFA_FC - To compile this driver as a module, choose M here. The module will - be called bfa. - -+config SCSI_VIRTIO -+ tristate "virtio-scsi support (EXPERIMENTAL)" -+ depends on EXPERIMENTAL && VIRTIO -+ help -+ This is the virtual HBA driver for virtio. If the kernel will -+ be used in a virtual machine, say Y or M. -+ -+ - endif # SCSI_LOWLEVEL - - source "drivers/scsi/pcmcia/Kconfig" -diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile -index e4c1a69..ad24e06 100644 ---- a/drivers/scsi/Makefile -+++ b/drivers/scsi/Makefile -@@ -141,6 +141,7 @@ obj-$(CONFIG_SCSI_CXGB4_ISCSI) += libiscsi.o libiscsi_tcp.o cxgbi/ - obj-$(CONFIG_SCSI_BNX2_ISCSI) += libiscsi.o bnx2i/ - obj-$(CONFIG_BE2ISCSI) += libiscsi.o be2iscsi/ - obj-$(CONFIG_SCSI_PMCRAID) += pmcraid.o -+obj-$(CONFIG_SCSI_VIRTIO) += virtio_scsi.o - obj-$(CONFIG_VMWARE_PVSCSI) += vmw_pvscsi.o - obj-$(CONFIG_HYPERV_STORAGE) += hv_storvsc.o - -diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c -new file mode 100644 -index 0000000..3f87ae0 ---- /dev/null -+++ b/drivers/scsi/virtio_scsi.c -@@ -0,0 +1,503 @@ -+/* -+ * Virtio SCSI HBA driver -+ * -+ * Copyright IBM Corp. 2010 -+ * Copyright Red Hat, Inc. 2011 -+ * -+ * Authors: -+ * Stefan Hajnoczi -+ * Paolo Bonzini -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define VIRTIO_SCSI_MEMPOOL_SZ 64 -+ -+/* Command queue element */ -+struct virtio_scsi_cmd { -+ struct scsi_cmnd *sc; -+ union { -+ struct virtio_scsi_cmd_req cmd; -+ struct virtio_scsi_ctrl_tmf_req tmf; -+ struct virtio_scsi_ctrl_an_req an; -+ } req; -+ union { -+ struct virtio_scsi_cmd_resp cmd; -+ struct virtio_scsi_ctrl_tmf_resp tmf; -+ struct virtio_scsi_ctrl_an_resp an; -+ struct virtio_scsi_event evt; -+ } resp; -+} ____cacheline_aligned_in_smp; -+ -+/* Driver instance state */ -+struct virtio_scsi { -+ /* Protects ctrl_vq, req_vq and sg[] */ -+ spinlock_t vq_lock; -+ -+ struct virtio_device *vdev; -+ struct virtqueue *ctrl_vq; -+ struct virtqueue *event_vq; -+ struct virtqueue *req_vq; -+ -+ /* For sglist construction when adding commands to the virtqueue. */ -+ struct scatterlist sg[]; -+}; -+ -+static struct kmem_cache *virtscsi_cmd_cache; -+static mempool_t *virtscsi_cmd_pool; -+ -+static inline struct Scsi_Host *virtio_scsi_host(struct virtio_device *vdev) -+{ -+ return vdev->priv; -+} -+ -+static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid) -+{ -+ if (!resid) -+ return; -+ -+ if (!scsi_bidi_cmnd(sc)) { -+ scsi_set_resid(sc, resid); -+ return; -+ } -+ -+ scsi_in(sc)->resid = min(resid, scsi_in(sc)->length); -+ scsi_out(sc)->resid = resid - scsi_in(sc)->resid; -+} -+ -+/** -+ * virtscsi_complete_cmd - finish a scsi_cmd and invoke scsi_done -+ * -+ * Called with vq_lock held. -+ */ -+static void virtscsi_complete_cmd(void *buf) -+{ -+ struct virtio_scsi_cmd *cmd = buf; -+ struct scsi_cmnd *sc = cmd->sc; -+ struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd; -+ -+ dev_dbg(&sc->device->sdev_gendev, -+ "cmd %p response %u status %#02x sense_len %u\n", -+ sc, resp->response, resp->status, resp->sense_len); -+ -+ sc->result = resp->status; -+ virtscsi_compute_resid(sc, resp->resid); -+ switch (resp->response) { -+ case VIRTIO_SCSI_S_OK: -+ set_host_byte(sc, DID_OK); -+ break; -+ case VIRTIO_SCSI_S_OVERRUN: -+ set_host_byte(sc, DID_ERROR); -+ break; -+ case VIRTIO_SCSI_S_ABORTED: -+ set_host_byte(sc, DID_ABORT); -+ break; -+ case VIRTIO_SCSI_S_BAD_TARGET: -+ set_host_byte(sc, DID_BAD_TARGET); -+ break; -+ case VIRTIO_SCSI_S_RESET: -+ set_host_byte(sc, DID_RESET); -+ break; -+ case VIRTIO_SCSI_S_BUSY: -+ set_host_byte(sc, DID_BUS_BUSY); -+ break; -+ case VIRTIO_SCSI_S_TRANSPORT_FAILURE: -+ set_host_byte(sc, DID_TRANSPORT_DISRUPTED); -+ break; -+ case VIRTIO_SCSI_S_TARGET_FAILURE: -+ set_host_byte(sc, DID_TARGET_FAILURE); -+ break; -+ case VIRTIO_SCSI_S_NEXUS_FAILURE: -+ set_host_byte(sc, DID_NEXUS_FAILURE); -+ break; -+ default: -+ scmd_printk(KERN_WARNING, sc, "Unknown response %d", -+ resp->response); -+ /* fall through */ -+ case VIRTIO_SCSI_S_FAILURE: -+ set_host_byte(sc, DID_ERROR); -+ break; -+ } -+ -+ WARN_ON(resp->sense_len > VIRTIO_SCSI_SENSE_SIZE); -+ if (sc->sense_buffer) { -+ memcpy(sc->sense_buffer, resp->sense, -+ min_t(u32, resp->sense_len, VIRTIO_SCSI_SENSE_SIZE)); -+ if (resp->sense_len) -+ set_driver_byte(sc, DRIVER_SENSE); -+ } -+ -+ mempool_free(cmd, virtscsi_cmd_pool); -+ sc->scsi_done(sc); -+} -+ -+static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) -+{ -+ struct Scsi_Host *sh = virtio_scsi_host(vq->vdev); -+ struct virtio_scsi *vscsi = shost_priv(sh); -+ void *buf; -+ unsigned long flags; -+ unsigned int len; -+ -+ spin_lock_irqsave(&vscsi->vq_lock, flags); -+ -+ do { -+ virtqueue_disable_cb(vq); -+ while ((buf = virtqueue_get_buf(vq, &len)) != NULL) -+ fn(buf); -+ } while (!virtqueue_enable_cb(vq)); -+ -+ spin_unlock_irqrestore(&vscsi->vq_lock, flags); -+} -+ -+static void virtscsi_req_done(struct virtqueue *vq) -+{ -+ virtscsi_vq_done(vq, virtscsi_complete_cmd); -+}; -+ -+/* These are still stubs. */ -+static void virtscsi_complete_free(void *buf) -+{ -+ struct virtio_scsi_cmd *cmd = buf; -+ -+ mempool_free(cmd, virtscsi_cmd_pool); -+} -+ -+static void virtscsi_ctrl_done(struct virtqueue *vq) -+{ -+ virtscsi_vq_done(vq, virtscsi_complete_free); -+}; -+ -+static void virtscsi_event_done(struct virtqueue *vq) -+{ -+ virtscsi_vq_done(vq, virtscsi_complete_free); -+}; -+ -+static void virtscsi_map_sgl(struct scatterlist *sg, unsigned int *p_idx, -+ struct scsi_data_buffer *sdb) -+{ -+ struct sg_table *table = &sdb->table; -+ struct scatterlist *sg_elem; -+ unsigned int idx = *p_idx; -+ int i; -+ -+ for_each_sg(table->sgl, sg_elem, table->nents, i) -+ sg_set_buf(&sg[idx++], sg_virt(sg_elem), sg_elem->length); -+ -+ *p_idx = idx; -+} -+ -+/** -+ * virtscsi_map_cmd - map a scsi_cmd to a virtqueue scatterlist -+ * @vscsi : virtio_scsi state -+ * @cmd : command structure -+ * @out_num : number of read-only elements -+ * @in_num : number of write-only elements -+ * @req_size : size of the request buffer -+ * @resp_size : size of the response buffer -+ * -+ * Called with vq_lock held. -+ */ -+static void virtscsi_map_cmd(struct virtio_scsi *vscsi, -+ struct virtio_scsi_cmd *cmd, -+ unsigned *out_num, unsigned *in_num, -+ size_t req_size, size_t resp_size) -+{ -+ struct scsi_cmnd *sc = cmd->sc; -+ struct scatterlist *sg = vscsi->sg; -+ unsigned int idx = 0; -+ -+ if (sc) { -+ struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev); -+ BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize); -+ -+ /* TODO: check feature bit and fail if unsupported? */ -+ BUG_ON(sc->sc_data_direction == DMA_BIDIRECTIONAL); -+ } -+ -+ /* Request header. */ -+ sg_set_buf(&sg[idx++], &cmd->req, req_size); -+ -+ /* Data-out buffer. */ -+ if (sc && sc->sc_data_direction != DMA_FROM_DEVICE) -+ virtscsi_map_sgl(sg, &idx, scsi_out(sc)); -+ -+ *out_num = idx; -+ -+ /* Response header. */ -+ sg_set_buf(&sg[idx++], &cmd->resp, resp_size); -+ -+ /* Data-in buffer */ -+ if (sc && sc->sc_data_direction != DMA_TO_DEVICE) -+ virtscsi_map_sgl(sg, &idx, scsi_in(sc)); -+ -+ *in_num = idx - *out_num; -+} -+ -+static int virtscsi_kick_cmd(struct virtio_scsi *vscsi, struct virtqueue *vq, -+ struct virtio_scsi_cmd *cmd, -+ size_t req_size, size_t resp_size, gfp_t gfp) -+{ -+ unsigned int out_num, in_num; -+ unsigned long flags; -+ int ret; -+ -+ spin_lock_irqsave(&vscsi->vq_lock, flags); -+ -+ virtscsi_map_cmd(vscsi, cmd, &out_num, &in_num, req_size, resp_size); -+ -+ ret = virtqueue_add_buf(vq, vscsi->sg, out_num, in_num, cmd, gfp); -+ if (ret >= 0) -+ virtqueue_kick(vq); -+ -+ spin_unlock_irqrestore(&vscsi->vq_lock, flags); -+ return ret; -+} -+ -+static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) -+{ -+ struct virtio_scsi *vscsi = shost_priv(sh); -+ struct virtio_scsi_cmd *cmd; -+ int ret; -+ -+ dev_dbg(&sc->device->sdev_gendev, -+ "cmd %p CDB: %#02x\n", sc, sc->cmnd[0]); -+ -+ ret = SCSI_MLQUEUE_HOST_BUSY; -+ cmd = mempool_alloc(virtscsi_cmd_pool, GFP_ATOMIC); -+ if (!cmd) -+ goto out; -+ -+ memset(cmd, 0, sizeof(*cmd)); -+ cmd->sc = sc; -+ cmd->req.cmd = (struct virtio_scsi_cmd_req){ -+ .lun[0] = 1, -+ .lun[1] = sc->device->id, -+ .lun[2] = (sc->device->lun >> 8) | 0x40, -+ .lun[3] = sc->device->lun & 0xff, -+ .tag = (unsigned long)sc, -+ .task_attr = VIRTIO_SCSI_S_SIMPLE, -+ .prio = 0, -+ .crn = 0, -+ }; -+ -+ BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE); -+ memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len); -+ -+ if (virtscsi_kick_cmd(vscsi, vscsi->req_vq, cmd, -+ sizeof cmd->req.cmd, sizeof cmd->resp.cmd, -+ GFP_ATOMIC) >= 0) -+ ret = 0; -+ -+out: -+ return ret; -+} -+ -+static struct scsi_host_template virtscsi_host_template = { -+ .module = THIS_MODULE, -+ .name = "Virtio SCSI HBA", -+ .proc_name = "virtio_scsi", -+ .queuecommand = virtscsi_queuecommand, -+ .this_id = -1, -+ -+ .can_queue = 1024, -+ .dma_boundary = UINT_MAX, -+ .use_clustering = ENABLE_CLUSTERING, -+}; -+ -+#define virtscsi_config_get(vdev, fld) \ -+ ({ \ -+ typeof(((struct virtio_scsi_config *)0)->fld) __val; \ -+ vdev->config->get(vdev, \ -+ offsetof(struct virtio_scsi_config, fld), \ -+ &__val, sizeof(__val)); \ -+ __val; \ -+ }) -+ -+#define virtscsi_config_set(vdev, fld, val) \ -+ (void)({ \ -+ typeof(((struct virtio_scsi_config *)0)->fld) __val = (val); \ -+ vdev->config->set(vdev, \ -+ offsetof(struct virtio_scsi_config, fld), \ -+ &__val, sizeof(__val)); \ -+ }) -+ -+static int __devinit virtscsi_init(struct virtio_device *vdev, -+ struct virtio_scsi *vscsi) -+{ -+ int err; -+ struct virtqueue *vqs[3]; -+ vq_callback_t *callbacks[] = { -+ virtscsi_ctrl_done, -+ virtscsi_event_done, -+ virtscsi_req_done -+ }; -+ const char *names[] = { -+ "control", -+ "event", -+ "request" -+ }; -+ -+ /* Discover virtqueues and write information to configuration. */ -+ err = vdev->config->find_vqs(vdev, 3, vqs, callbacks, names); -+ if (err) -+ return err; -+ -+ vscsi->ctrl_vq = vqs[0]; -+ vscsi->event_vq = vqs[1]; -+ vscsi->req_vq = vqs[2]; -+ -+ virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE); -+ virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE); -+ return 0; -+} -+ -+static int __devinit virtscsi_probe(struct virtio_device *vdev) -+{ -+ struct Scsi_Host *shost; -+ struct virtio_scsi *vscsi; -+ int err; -+ u32 sg_elems; -+ u32 cmd_per_lun; -+ -+ /* We need to know how many segments before we allocate. -+ * We need an extra sg elements at head and tail. -+ */ -+ sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1; -+ -+ /* Allocate memory and link the structs together. */ -+ shost = scsi_host_alloc(&virtscsi_host_template, -+ sizeof(*vscsi) + sizeof(vscsi->sg[0]) * (sg_elems + 2)); -+ -+ if (!shost) -+ return -ENOMEM; -+ -+ shost->sg_tablesize = sg_elems; -+ vscsi = shost_priv(shost); -+ vscsi->vdev = vdev; -+ vdev->priv = shost; -+ -+ /* Random initializations. */ -+ spin_lock_init(&vscsi->vq_lock); -+ sg_init_table(vscsi->sg, sg_elems + 2); -+ -+ err = virtscsi_init(vdev, vscsi); -+ if (err) -+ goto virtscsi_init_failed; -+ -+ cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; -+ shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue); -+ shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF; -+ shost->max_lun = virtscsi_config_get(vdev, max_lun) + 1; -+ shost->max_id = virtscsi_config_get(vdev, max_target) + 1; -+ shost->max_channel = 0; -+ shost->max_cmd_len = VIRTIO_SCSI_CDB_SIZE; -+ err = scsi_add_host(shost, &vdev->dev); -+ if (err) -+ goto scsi_add_host_failed; -+ -+ scsi_scan_host(shost); -+ -+ return 0; -+ -+scsi_add_host_failed: -+ vdev->config->del_vqs(vdev); -+virtscsi_init_failed: -+ scsi_host_put(shost); -+ return err; -+} -+ -+static void __devexit virtscsi_remove_vqs(struct virtio_device *vdev) -+{ -+ /* Stop all the virtqueues. */ -+ vdev->config->reset(vdev); -+ -+ vdev->config->del_vqs(vdev); -+} -+ -+static void __devexit virtscsi_remove(struct virtio_device *vdev) -+{ -+ struct Scsi_Host *shost = virtio_scsi_host(vdev); -+ -+ scsi_remove_host(shost); -+ -+ virtscsi_remove_vqs(vdev); -+ scsi_host_put(shost); -+} -+ -+static struct virtio_device_id id_table[] = { -+ { VIRTIO_ID_SCSI, VIRTIO_DEV_ANY_ID }, -+ { 0 }, -+}; -+ -+static struct virtio_driver virtio_scsi_driver = { -+ .driver.name = KBUILD_MODNAME, -+ .driver.owner = THIS_MODULE, -+ .id_table = id_table, -+ .probe = virtscsi_probe, -+ .remove = __devexit_p(virtscsi_remove), -+}; -+ -+static int __init init(void) -+{ -+ int ret = -ENOMEM; -+ -+ virtscsi_cmd_cache = KMEM_CACHE(virtio_scsi_cmd, 0); -+ if (!virtscsi_cmd_cache) { -+ printk(KERN_ERR "kmem_cache_create() for " -+ "virtscsi_cmd_cache failed\n"); -+ goto error; -+ } -+ -+ -+ virtscsi_cmd_pool = -+ mempool_create_slab_pool(VIRTIO_SCSI_MEMPOOL_SZ, -+ virtscsi_cmd_cache); -+ if (!virtscsi_cmd_pool) { -+ printk(KERN_ERR "mempool_create() for" -+ "virtscsi_cmd_pool failed\n"); -+ goto error; -+ } -+ ret = register_virtio_driver(&virtio_scsi_driver); -+ if (ret < 0) -+ goto error; -+ -+ return 0; -+ -+error: -+ if (virtscsi_cmd_pool) { -+ mempool_destroy(virtscsi_cmd_pool); -+ virtscsi_cmd_pool = NULL; -+ } -+ if (virtscsi_cmd_cache) { -+ kmem_cache_destroy(virtscsi_cmd_cache); -+ virtscsi_cmd_cache = NULL; -+ } -+ return ret; -+} -+ -+static void __exit fini(void) -+{ -+ unregister_virtio_driver(&virtio_scsi_driver); -+ mempool_destroy(virtscsi_cmd_pool); -+ kmem_cache_destroy(virtscsi_cmd_cache); -+} -+module_init(init); -+module_exit(fini); -+ -+MODULE_DEVICE_TABLE(virtio, id_table); -+MODULE_DESCRIPTION("Virtio SCSI HBA driver"); -+MODULE_LICENSE("GPL"); -diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h -index 85bb0bb..d83ae52 100644 ---- a/include/linux/virtio_ids.h -+++ b/include/linux/virtio_ids.h -@@ -34,6 +34,7 @@ - #define VIRTIO_ID_CONSOLE 3 /* virtio console */ - #define VIRTIO_ID_RNG 4 /* virtio ring */ - #define VIRTIO_ID_BALLOON 5 /* virtio balloon */ -+#define VIRTIO_ID_SCSI 8 /* virtio scsi */ - #define VIRTIO_ID_9P 9 /* 9p virtio console */ - - #endif /* _LINUX_VIRTIO_IDS_H */ -diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h -new file mode 100644 -index 0000000..8ddeafd ---- /dev/null -+++ b/include/linux/virtio_scsi.h -@@ -0,0 +1,114 @@ -+#ifndef _LINUX_VIRTIO_SCSI_H -+#define _LINUX_VIRTIO_SCSI_H -+/* This header is BSD licensed so anyone can use the definitions to implement -+ * compatible drivers/servers. */ -+ -+#define VIRTIO_SCSI_CDB_SIZE 32 -+#define VIRTIO_SCSI_SENSE_SIZE 96 -+ -+/* SCSI command request, followed by data-out */ -+struct virtio_scsi_cmd_req { -+ u8 lun[8]; /* Logical Unit Number */ -+ u64 tag; /* Command identifier */ -+ u8 task_attr; /* Task attribute */ -+ u8 prio; -+ u8 crn; -+ u8 cdb[VIRTIO_SCSI_CDB_SIZE]; -+} __packed; -+ -+/* Response, followed by sense data and data-in */ -+struct virtio_scsi_cmd_resp { -+ u32 sense_len; /* Sense data length */ -+ u32 resid; /* Residual bytes in data buffer */ -+ u16 status_qualifier; /* Status qualifier */ -+ u8 status; /* Command completion status */ -+ u8 response; /* Response values */ -+ u8 sense[VIRTIO_SCSI_SENSE_SIZE]; -+} __packed; -+ -+/* Task Management Request */ -+struct virtio_scsi_ctrl_tmf_req { -+ u32 type; -+ u32 subtype; -+ u8 lun[8]; -+ u64 tag; -+} __packed; -+ -+struct virtio_scsi_ctrl_tmf_resp { -+ u8 response; -+} __packed; -+ -+/* Asynchronous notification query/subscription */ -+struct virtio_scsi_ctrl_an_req { -+ u32 type; -+ u8 lun[8]; -+ u32 event_requested; -+} __packed; -+ -+struct virtio_scsi_ctrl_an_resp { -+ u32 event_actual; -+ u8 response; -+} __packed; -+ -+struct virtio_scsi_event { -+ u32 event; -+ u8 lun[8]; -+ u32 reason; -+} __packed; -+ -+struct virtio_scsi_config { -+ u32 num_queues; -+ u32 seg_max; -+ u32 max_sectors; -+ u32 cmd_per_lun; -+ u32 event_info_size; -+ u32 sense_size; -+ u32 cdb_size; -+ u16 max_channel; -+ u16 max_target; -+ u32 max_lun; -+} __packed; -+ -+/* Response codes */ -+#define VIRTIO_SCSI_S_OK 0 -+#define VIRTIO_SCSI_S_OVERRUN 1 -+#define VIRTIO_SCSI_S_ABORTED 2 -+#define VIRTIO_SCSI_S_BAD_TARGET 3 -+#define VIRTIO_SCSI_S_RESET 4 -+#define VIRTIO_SCSI_S_BUSY 5 -+#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6 -+#define VIRTIO_SCSI_S_TARGET_FAILURE 7 -+#define VIRTIO_SCSI_S_NEXUS_FAILURE 8 -+#define VIRTIO_SCSI_S_FAILURE 9 -+#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10 -+#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11 -+#define VIRTIO_SCSI_S_INCORRECT_LUN 12 -+ -+/* Controlq type codes. */ -+#define VIRTIO_SCSI_T_TMF 0 -+#define VIRTIO_SCSI_T_AN_QUERY 1 -+#define VIRTIO_SCSI_T_AN_SUBSCRIBE 2 -+ -+/* Valid TMF subtypes. */ -+#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0 -+#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1 -+#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2 -+#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3 -+#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4 -+#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5 -+#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6 -+#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7 -+ -+/* Events. */ -+#define VIRTIO_SCSI_T_EVENTS_MISSED 0x80000000 -+#define VIRTIO_SCSI_T_NO_EVENT 0 -+#define VIRTIO_SCSI_T_TRANSPORT_RESET 1 -+#define VIRTIO_SCSI_T_ASYNC_NOTIFY 2 -+ -+#define VIRTIO_SCSI_S_SIMPLE 0 -+#define VIRTIO_SCSI_S_ORDERED 1 -+#define VIRTIO_SCSI_S_HEAD 2 -+#define VIRTIO_SCSI_S_ACA 3 -+ -+ -+#endif /* _LINUX_VIRTIO_SCSI_H */ --- -1.7.1 - - -From 3c0e8846ac0fc2175dd0e06f495b16a30b549762 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Tue, 29 Nov 2011 16:33:28 +0100 -Cc: Stefan Hajnoczi , Mike Christie , Pekka Enberg -Subject: [PATCH v5 2/3] virtio-scsi: add error handling - -This commit adds basic error handling to the virtio-scsi -HBA device. Task management functions are sent synchronously -via the control virtqueue. - -Cc: linux-scsi -Cc: Rusty Russell -Cc: Michael S. Tsirkin -Cc: kvm@vger.kernel.org -Acked-by: Pekka Enberg -Signed-off-by: Paolo Bonzini ---- - v3->v4: fixed 32-bit compilation; adjusted call to virtscsi_kick_cmd - - v2->v3: added mempool, used GFP_NOIO instead of GFP_ATOMIC, - formatting fixes - - v1->v2: use scmd_printk - - drivers/scsi/virtio_scsi.c | 73 +++++++++++++++++++++++++++++++++++++++++++- - 1 files changed, 72 insertions(+), 1 deletions(-) - -diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c -index 3f87ae0..68104cd 100644 ---- a/drivers/scsi/virtio_scsi.c -+++ b/drivers/scsi/virtio_scsi.c -@@ -29,6 +29,7 @@ - /* Command queue element */ - struct virtio_scsi_cmd { - struct scsi_cmnd *sc; -+ struct completion *comp; - union { - struct virtio_scsi_cmd_req cmd; - struct virtio_scsi_ctrl_tmf_req tmf; -@@ -168,11 +169,12 @@ static void virtscsi_req_done(struct virtqueue *vq) - virtscsi_vq_done(vq, virtscsi_complete_cmd); - }; - --/* These are still stubs. */ - static void virtscsi_complete_free(void *buf) - { - struct virtio_scsi_cmd *cmd = buf; - -+ if (cmd->comp) -+ complete_all(cmd->comp); - mempool_free(cmd, virtscsi_cmd_pool); - } - -@@ -306,12 +308,81 @@ out: - return ret; - } - -+static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd) -+{ -+ DECLARE_COMPLETION_ONSTACK(comp); -+ int ret; -+ -+ cmd->comp = ∁ -+ ret = virtscsi_kick_cmd(vscsi, vscsi->ctrl_vq, cmd, -+ sizeof cmd->req.tmf, sizeof cmd->resp.tmf, -+ GFP_NOIO); -+ if (ret < 0) -+ return FAILED; -+ -+ wait_for_completion(&comp); -+ if (cmd->resp.tmf.response != VIRTIO_SCSI_S_OK && -+ cmd->resp.tmf.response != VIRTIO_SCSI_S_FUNCTION_SUCCEEDED) -+ return FAILED; -+ -+ return SUCCESS; -+} -+ -+static int virtscsi_device_reset(struct scsi_cmnd *sc) -+{ -+ struct virtio_scsi *vscsi = shost_priv(sc->device->host); -+ struct virtio_scsi_cmd *cmd; -+ -+ sdev_printk(KERN_INFO, sc->device, "device reset\n"); -+ cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO); -+ if (!cmd) -+ return FAILED; -+ -+ memset(cmd, 0, sizeof(*cmd)); -+ cmd->sc = sc; -+ cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){ -+ .type = VIRTIO_SCSI_T_TMF, -+ .subtype = VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET, -+ .lun[0] = 1, -+ .lun[1] = sc->device->id, -+ .lun[2] = (sc->device->lun >> 8) | 0x40, -+ .lun[3] = sc->device->lun & 0xff, -+ }; -+ return virtscsi_tmf(vscsi, cmd); -+} -+ -+static int virtscsi_abort(struct scsi_cmnd *sc) -+{ -+ struct virtio_scsi *vscsi = shost_priv(sc->device->host); -+ struct virtio_scsi_cmd *cmd; -+ -+ scmd_printk(KERN_INFO, sc, "abort\n"); -+ cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO); -+ if (!cmd) -+ return FAILED; -+ -+ memset(cmd, 0, sizeof(*cmd)); -+ cmd->sc = sc; -+ cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){ -+ .type = VIRTIO_SCSI_T_TMF, -+ .subtype = VIRTIO_SCSI_T_TMF_ABORT_TASK, -+ .lun[0] = 1, -+ .lun[1] = sc->device->id, -+ .lun[2] = (sc->device->lun >> 8) | 0x40, -+ .lun[3] = sc->device->lun & 0xff, -+ .tag = (unsigned long)sc, -+ }; -+ return virtscsi_tmf(vscsi, cmd); -+} -+ - static struct scsi_host_template virtscsi_host_template = { - .module = THIS_MODULE, - .name = "Virtio SCSI HBA", - .proc_name = "virtio_scsi", - .queuecommand = virtscsi_queuecommand, - .this_id = -1, -+ .eh_abort_handler = virtscsi_abort, -+ .eh_device_reset_handler = virtscsi_device_reset, - - .can_queue = 1024, - .dma_boundary = UINT_MAX, --- -1.7.1 - - -From 43cf1b6a4ee31e69581042a0c85d1398f83dcedc Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 13 Jan 2012 15:30:08 +0100 -Cc: Stefan Hajnoczi , Mike Christie , Pekka Enberg -Subject: [PATCH v5 3/3] virtio-scsi: add power management support - -This patch adds freeze/restore handlers for the HBA. Block queues -are managed independently by the disk devices. - -Cc: linux-scsi -Cc: Rusty Russell -Cc: Michael S. Tsirkin -Cc: kvm@vger.kernel.org -Acked-by: Pekka Enberg -Signed-off-by: Paolo Bonzini ---- - The feature has been merged in the virtio core for 3.3, so the patch - is new in v4. - - drivers/scsi/virtio_scsi.c | 26 +++++++++++++++++++++++--- - 1 files changed, 23 insertions(+), 3 deletions(-) - -diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c -index 68104cd..efccd72 100644 ---- a/drivers/scsi/virtio_scsi.c -+++ b/drivers/scsi/virtio_scsi.c -@@ -406,8 +406,8 @@ static struct scsi_host_template virtscsi_host_template = { - &__val, sizeof(__val)); \ - }) - --static int __devinit virtscsi_init(struct virtio_device *vdev, -- struct virtio_scsi *vscsi) -+static int virtscsi_init(struct virtio_device *vdev, -+ struct virtio_scsi *vscsi) - { - int err; - struct virtqueue *vqs[3]; -@@ -491,7 +491,7 @@ virtscsi_init_failed: - return err; - } - --static void __devexit virtscsi_remove_vqs(struct virtio_device *vdev) -+static void virtscsi_remove_vqs(struct virtio_device *vdev) - { - /* Stop all the virtqueues. */ - vdev->config->reset(vdev); -@@ -509,6 +509,22 @@ static void __devexit virtscsi_remove(struct virtio_device *vdev) - scsi_host_put(shost); - } - -+#ifdef CONFIG_PM -+static int virtscsi_freeze(struct virtio_device *vdev) -+{ -+ virtscsi_remove_vqs(vdev); -+ return 0; -+} -+ -+static int virtscsi_restore(struct virtio_device *vdev) -+{ -+ struct Scsi_Host *sh = virtio_scsi_host(vdev); -+ struct virtio_scsi *vscsi = shost_priv(sh); -+ -+ return virtscsi_init(vdev, vscsi); -+} -+#endif -+ - static struct virtio_device_id id_table[] = { - { VIRTIO_ID_SCSI, VIRTIO_DEV_ANY_ID }, - { 0 }, -@@ -519,6 +535,10 @@ static struct virtio_driver virtio_scsi_driver = { - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = virtscsi_probe, -+#ifdef CONFIG_PM -+ .freeze = virtscsi_freeze, -+ .restore = virtscsi_restore, -+#endif - .remove = __devexit_p(virtscsi_remove), - }; - --- -1.7.1 - diff --git a/mcelog-rcu-splat.patch b/mcelog-rcu-splat.patch deleted file mode 100644 index 12c1fe3..0000000 --- a/mcelog-rcu-splat.patch +++ /dev/null @@ -1,15 +0,0 @@ -diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c -index f22a9f7..f525f99 100644 ---- a/arch/x86/kernel/cpu/mcheck/mce.c -+++ b/arch/x86/kernel/cpu/mcheck/mce.c -@@ -191,7 +191,7 @@ static void drain_mcelog_buffer(void) - { - unsigned int next, i, prev = 0; - -- next = rcu_dereference_check_mce(mcelog.next); -+ next = ACCESS_ONCE(mcelog.next); - - do { - struct mce *m; - - \ No newline at end of file diff --git a/mm-thp-fix-pmd_bad-triggering.patch b/mm-thp-fix-pmd_bad-triggering.patch deleted file mode 100644 index 8e1a77c..0000000 --- a/mm-thp-fix-pmd_bad-triggering.patch +++ /dev/null @@ -1,447 +0,0 @@ -In some cases it may happen that pmd_none_or_clear_bad() is called -with the mmap_sem hold in read mode. In those cases the huge page -faults can allocate hugepmds under pmd_none_or_clear_bad() and that -can trigger a false positive from pmd_bad() that will not like to see -a pmd materializing as trans huge. - -It's not khugepaged the problem, khugepaged holds the mmap_sem in -write mode (and all those sites must hold the mmap_sem in read mode to -prevent pagetables to go away from under them, during code review it -seems vm86 mode on 32bit kernels requires that too unless it's -restricted to 1 thread per process or UP builds). The race is only -with the huge pagefaults that can convert a pmd_none() into a -pmd_trans_huge(). - -Effectively all these pmd_none_or_clear_bad() sites running with -mmap_sem in read mode are somewhat speculative with the page faults, -and the result is always undefined when they run simultaneously. This -is probably why it wasn't common to run into this. For example if the -madvise(MADV_DONTNEED) runs zap_page_range() shortly before the page -fault, the hugepage will not be zapped, if the page fault runs first -it will be zapped. - -Altering pmd_bad() not to error out if it finds hugepmds won't be -enough to fix this, because zap_pmd_range would then proceed to call -zap_pte_range (which would be incorrect if the pmd become a -pmd_trans_huge()). - -The simplest way to fix this is to read the pmd in the local stack -(regardless of what we read, no need of actual CPU barriers, only -compiler barrier needed), and be sure it is not changing under the -code that computes its value. Even if the real pmd is changing under -the value we hold on the stack, we don't care. If we actually end up -in zap_pte_range it means the pmd was not none already and it was not -huge, and it can't become huge from under us (khugepaged locking -explained above). - -All we need is to enforce that there is no way anymore that in a code -path like below, pmd_trans_huge can be false, but -pmd_none_or_clear_bad can run into a hugepmd. The overhead of a -barrier() is just a compiler tweak and should not be measurable (I -only added it for THP builds). I don't exclude different compiler -versions may have prevented the race too by caching the value of *pmd -on the stack (that hasn't been verified, but it wouldn't be impossible -considering pmd_none_or_clear_bad, pmd_bad, pmd_trans_huge, pmd_none -are all inlines and there's no external function called in between -pmd_trans_huge and pmd_none_or_clear_bad). - - if (pmd_trans_huge(*pmd)) { - if (next-addr != HPAGE_PMD_SIZE) { - VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem)); - split_huge_page_pmd(vma->vm_mm, pmd); - } else if (zap_huge_pmd(tlb, vma, pmd, addr)) - continue; - /* fall through */ - } - if (pmd_none_or_clear_bad(pmd)) - -Because this race condition could be exercised without special -privileges this was reported in CVE-2012-1179. - -The race was identified and fully explained by Ulrich who debugged it. -I'm quoting his accurate explanation below, for reference. - -====== start quote ======= - mapcount 0 page_mapcount 1 - kernel BUG at mm/huge_memory.c:1384! - -At some point prior to the panic, a "bad pmd ..." message similar to the -following is logged on the console: - - mm/memory.c:145: bad pmd ffff8800376e1f98(80000000314000e7). - -The "bad pmd ..." message is logged by pmd_clear_bad() before it clears -the page's PMD table entry. - - 143 void pmd_clear_bad(pmd_t *pmd) - 144 { --> 145 pmd_ERROR(*pmd); - 146 pmd_clear(pmd); - 147 } - -After the PMD table entry has been cleared, there is an inconsistency -between the actual number of PMD table entries that are mapping the page -and the page's map count (_mapcount field in struct page). When the page -is subsequently reclaimed, __split_huge_page() detects this inconsistency. - - 1381 if (mapcount != page_mapcount(page)) - 1382 printk(KERN_ERR "mapcount %d page_mapcount %d\n", - 1383 mapcount, page_mapcount(page)); --> 1384 BUG_ON(mapcount != page_mapcount(page)); - -The root cause of the problem is a race of two threads in a multithreaded -process. Thread B incurs a page fault on a virtual address that has never -been accessed (PMD entry is zero) while Thread A is executing an madvise() -system call on a virtual address within the same 2 MB (huge page) range. - - virtual address space - .---------------------. - | | - | | - .-|---------------------| - | | | - | | |<-- B(fault) - | | | - 2 MB | |/////////////////////|-. - huge < |/////////////////////| > A(range) - page | |/////////////////////|-' - | | | - | | | - '-|---------------------| - | | - | | - '---------------------' - -- Thread A is executing an madvise(..., MADV_DONTNEED) system call - on the virtual address range "A(range)" shown in the picture. - -sys_madvise - // Acquire the semaphore in shared mode. - down_read(¤t->mm->mmap_sem) - ... - madvise_vma - switch (behavior) - case MADV_DONTNEED: - madvise_dontneed - zap_page_range - unmap_vmas - unmap_page_range - zap_pud_range - zap_pmd_range - // - // Assume that this huge page has never been accessed. - // I.e. content of the PMD entry is zero (not mapped). - // - if (pmd_trans_huge(*pmd)) { - // We don't get here due to the above assumption. - } - // - // Assume that Thread B incurred a page fault and - .---------> // sneaks in here as shown below. - | // - | if (pmd_none_or_clear_bad(pmd)) - | { - | if (unlikely(pmd_bad(*pmd))) - | pmd_clear_bad - | { - | pmd_ERROR - | // Log "bad pmd ..." message here. - | pmd_clear - | // Clear the page's PMD entry. - | // Thread B incremented the map count - | // in page_add_new_anon_rmap(), but - | // now the page is no longer mapped - | // by a PMD entry (-> inconsistency). - | } - | } - | - v -- Thread B is handling a page fault on virtual address "B(fault)" shown - in the picture. - -... -do_page_fault - __do_page_fault - // Acquire the semaphore in shared mode. - down_read_trylock(&mm->mmap_sem) - ... - handle_mm_fault - if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) - // We get here due to the above assumption (PMD entry is zero). - do_huge_pmd_anonymous_page - alloc_hugepage_vma - // Allocate a new transparent huge page here. - ... - __do_huge_pmd_anonymous_page - ... - spin_lock(&mm->page_table_lock) - ... - page_add_new_anon_rmap - // Here we increment the page's map count (starts at -1). - atomic_set(&page->_mapcount, 0) - set_pmd_at - // Here we set the page's PMD entry which will be cleared - // when Thread A calls pmd_clear_bad(). - ... - spin_unlock(&mm->page_table_lock) - -The mmap_sem does not prevent the race because both threads are acquiring -it in shared mode (down_read). Thread B holds the page_table_lock while -the page's map count and PMD table entry are updated. However, Thread A -does not synchronize on that lock. -====== end quote ======= - -Reported-by: Ulrich Obergfell -Signed-off-by: Andrea Arcangeli ---- - arch/x86/kernel/vm86_32.c | 2 + - fs/proc/task_mmu.c | 9 ++++++ - include/asm-generic/pgtable.h | 57 +++++++++++++++++++++++++++++++++++++++++ - mm/memcontrol.c | 4 +++ - mm/memory.c | 14 ++++++++-- - mm/mempolicy.c | 2 +- - mm/mincore.c | 2 +- - mm/pagewalk.c | 2 +- - mm/swapfile.c | 4 +-- - 9 files changed, 87 insertions(+), 9 deletions(-) - -diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c -index b466cab..328cb37 100644 ---- a/arch/x86/kernel/vm86_32.c -+++ b/arch/x86/kernel/vm86_32.c -@@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) - spinlock_t *ptl; - int i; - -+ down_write(&mm->mmap_sem); - pgd = pgd_offset(mm, 0xA0000); - if (pgd_none_or_clear_bad(pgd)) - goto out; -@@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) - } - pte_unmap_unlock(pte, ptl); - out: -+ up_write(&mm->mmap_sem); - flush_tlb(); - } - -diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c -index 7dcd2a2..3efa725 100644 ---- a/fs/proc/task_mmu.c -+++ b/fs/proc/task_mmu.c -@@ -409,6 +409,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - } else { - spin_unlock(&walk->mm->page_table_lock); - } -+ -+ if (pmd_trans_unstable(pmd)) -+ return 0; - /* - * The mmap_sem held all the way back in m_start() is what - * keeps khugepaged out of here and from collapsing things -@@ -507,6 +510,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, - struct page *page; - - split_huge_page_pmd(walk->mm, pmd); -+ if (pmd_trans_unstable(pmd)) -+ return 0; - - pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - for (; addr != end; pte++, addr += PAGE_SIZE) { -@@ -670,6 +675,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - int err = 0; - - split_huge_page_pmd(walk->mm, pmd); -+ if (pmd_trans_unstable(pmd)) -+ return 0; - - /* find the first VMA at or above 'addr' */ - vma = find_vma(walk->mm, addr); -@@ -961,6 +968,8 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, - spin_unlock(&walk->mm->page_table_lock); - } - -+ if (pmd_trans_unstable(pmd)) -+ return 0; - orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); - do { - struct page *page = can_gather_numa_stats(*pte, md->vma, addr); -diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h -index 76bff2b..10f8291 100644 ---- a/include/asm-generic/pgtable.h -+++ b/include/asm-generic/pgtable.h -@@ -443,6 +443,63 @@ static inline int pmd_write(pmd_t pmd) - #endif /* __HAVE_ARCH_PMD_WRITE */ - #endif - -+/* -+ * This function is meant to be used by sites walking pagetables with -+ * the mmap_sem hold in read mode to protect against MADV_DONTNEED and -+ * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd -+ * into a null pmd and the transhuge page fault can convert a null pmd -+ * into an hugepmd or into a regular pmd (if the hugepage allocation -+ * fails). While holding the mmap_sem in read mode the pmd becomes -+ * stable and stops changing under us only if it's not null and not a -+ * transhuge pmd. When those races occurs and this function makes a -+ * difference vs the standard pmd_none_or_clear_bad, the result is -+ * undefined so behaving like if the pmd was none is safe (because it -+ * can return none anyway). The compiler level barrier() is critically -+ * important to compute the two checks atomically on the same pmdval. -+ */ -+static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) -+{ -+ /* depend on compiler for an atomic pmd read */ -+ pmd_t pmdval = *pmd; -+ /* -+ * The barrier will stabilize the pmdval in a register or on -+ * the stack so that it will stop changing under the code. -+ */ -+#ifdef CONFIG_TRANSPARENT_HUGEPAGE -+ barrier(); -+#endif -+ if (pmd_none(pmdval)) -+ return 1; -+ if (unlikely(pmd_bad(pmdval))) { -+ if (!pmd_trans_huge(pmdval)) -+ pmd_clear_bad(pmd); -+ return 1; -+ } -+ return 0; -+} -+ -+/* -+ * This is a noop if Transparent Hugepage Support is not built into -+ * the kernel. Otherwise it is equivalent to -+ * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in -+ * places that already verified the pmd is not none and they want to -+ * walk ptes while holding the mmap sem in read mode (write mode don't -+ * need this). If THP is not enabled, the pmd can't go away under the -+ * code even if MADV_DONTNEED runs, but if THP is enabled we need to -+ * run a pmd_trans_unstable before walking the ptes after -+ * split_huge_page_pmd returns (because it may have run when the pmd -+ * become null, but then a page fault can map in a THP and not a -+ * regular page). -+ */ -+static inline int pmd_trans_unstable(pmd_t *pmd) -+{ -+#ifdef CONFIG_TRANSPARENT_HUGEPAGE -+ return pmd_none_or_trans_huge_or_clear_bad(pmd); -+#else -+ return 0; -+#endif -+} -+ - #endif /* !__ASSEMBLY__ */ - - #endif /* _ASM_GENERIC_PGTABLE_H */ -diff --git a/mm/memcontrol.c b/mm/memcontrol.c -index d0e57a3..67b0578 100644 ---- a/mm/memcontrol.c -+++ b/mm/memcontrol.c -@@ -5193,6 +5193,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, - spinlock_t *ptl; - - split_huge_page_pmd(walk->mm, pmd); -+ if (pmd_trans_unstable(pmd)) -+ return 0; - - pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - for (; addr != end; pte++, addr += PAGE_SIZE) -@@ -5355,6 +5357,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, - spinlock_t *ptl; - - split_huge_page_pmd(walk->mm, pmd); -+ if (pmd_trans_unstable(pmd)) -+ return 0; - retry: - pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - for (; addr != end; addr += PAGE_SIZE) { -diff --git a/mm/memory.c b/mm/memory.c -index fa2f04e..e3090fc 100644 ---- a/mm/memory.c -+++ b/mm/memory.c -@@ -1251,12 +1251,20 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, - VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem)); - split_huge_page_pmd(vma->vm_mm, pmd); - } else if (zap_huge_pmd(tlb, vma, pmd, addr)) -- continue; -+ goto next; - /* fall through */ - } -- if (pmd_none_or_clear_bad(pmd)) -- continue; -+ /* -+ * Here there can be other concurrent MADV_DONTNEED or -+ * trans huge page faults running, and if the pmd is -+ * none or trans huge it can change under us. This is -+ * because MADV_DONTNEED holds the mmap_sem in read -+ * mode. -+ */ -+ if (pmd_none_or_trans_huge_or_clear_bad(pmd)) -+ goto next; - next = zap_pte_range(tlb, vma, pmd, addr, next, details); -+ next: - cond_resched(); - } while (pmd++, addr = next, addr != end); - -diff --git a/mm/mempolicy.c b/mm/mempolicy.c -index 47296fe..0a37570 100644 ---- a/mm/mempolicy.c -+++ b/mm/mempolicy.c -@@ -512,7 +512,7 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, - do { - next = pmd_addr_end(addr, end); - split_huge_page_pmd(vma->vm_mm, pmd); -- if (pmd_none_or_clear_bad(pmd)) -+ if (pmd_none_or_trans_huge_or_clear_bad(pmd)) - continue; - if (check_pte_range(vma, pmd, addr, next, nodes, - flags, private)) -diff --git a/mm/mincore.c b/mm/mincore.c -index 636a868..936b4ce 100644 ---- a/mm/mincore.c -+++ b/mm/mincore.c -@@ -164,7 +164,7 @@ static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud, - } - /* fall through */ - } -- if (pmd_none_or_clear_bad(pmd)) -+ if (pmd_none_or_trans_huge_or_clear_bad(pmd)) - mincore_unmapped_range(vma, addr, next, vec); - else - mincore_pte_range(vma, pmd, addr, next, vec); -diff --git a/mm/pagewalk.c b/mm/pagewalk.c -index 2f5cf10..aa9701e 100644 ---- a/mm/pagewalk.c -+++ b/mm/pagewalk.c -@@ -59,7 +59,7 @@ again: - continue; - - split_huge_page_pmd(walk->mm, pmd); -- if (pmd_none_or_clear_bad(pmd)) -+ if (pmd_none_or_trans_huge_or_clear_bad(pmd)) - goto again; - err = walk_pte_range(pmd, addr, next, walk); - if (err) -diff --git a/mm/swapfile.c b/mm/swapfile.c -index d999f09..f31b29d 100644 ---- a/mm/swapfile.c -+++ b/mm/swapfile.c -@@ -932,9 +932,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); -- if (unlikely(pmd_trans_huge(*pmd))) -- continue; -- if (pmd_none_or_clear_bad(pmd)) -+ if (pmd_none_or_trans_huge_or_clear_bad(pmd)) - continue; - ret = unuse_pte_range(vma, pmd, addr, next, entry, page); - if (ret) - --- -To unsubscribe, send a message with 'unsubscribe linux-mm' in -the body to majordomo@kvack.org. For more info on Linux MM, -see: http://www.linux-mm.org/ . -Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ -Don't email: email@kvack.org \ No newline at end of file diff --git a/sources b/sources index f2a9472..653e9ff 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ 7133f5a2086a7d7ef97abac610c094f5 linux-3.3.tar.xz -fe8e2b8e93695cb876cc8394b3db83c4 patch-3.3-git1.xz +72643cb2a29683201f2049d151564c56 patch-3.3-git2.xz diff --git a/udlfb-remove-sysfs-framebuffer-device-with-USB-disconnect.patch b/udlfb-remove-sysfs-framebuffer-device-with-USB-disconnect.patch deleted file mode 100644 index c304713..0000000 --- a/udlfb-remove-sysfs-framebuffer-device-with-USB-disconnect.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 92a9c19a89af2ca219fbb040a0059f414a4b7223 Mon Sep 17 00:00:00 2001 -From: Kay Sievers -Date: Sat, 28 Jan 2012 19:57:46 +0000 -Subject: [PATCH] udlfb: remove sysfs framebuffer device with USB - .disconnect() - -The USB graphics card driver delays the unregistering of the framebuffer -device to a workqueue, which breaks the userspace visible remove uevent -sequence. Recent userspace tools started to support USB graphics card -hotplug out-of-the-box and rely on proper events sent by the kernel. - -The framebuffer device is a direct child of the USB interface which is -removed immediately after the USB .disconnect() callback. But the fb device -in /sys stays around until its final cleanup, at a time where all the parent -devices have been removed already. - -To work around that, we remove the sysfs fb device directly in the USB -.disconnect() callback and leave only the cleanup of the internal fb -data to the delayed work. - -Before: - add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb) - add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb) - add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb0 (graphics) - remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb) - remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb) - remove /2-1.2:1.0/graphics/fb0 (graphics) - -After: - add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb) - add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb) - add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb1 (graphics) - remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb1 (graphics) - remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb) - remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb) - -Cc: stable@vger.kernel.org -Tested-by: Bernie Thompson -Acked-by: Bernie Thompson -Signed-off-by: Kay Sievers -Signed-off-by: Florian Tobias Schandinat ---- - drivers/video/fbmem.c | 18 +++++++++++++++++- - drivers/video/udlfb.c | 2 +- - include/linux/fb.h | 1 + - 3 files changed, 19 insertions(+), 2 deletions(-) - -diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c -index ac9141b..c6ce416 100644 ---- a/drivers/video/fbmem.c -+++ b/drivers/video/fbmem.c -@@ -1665,6 +1665,7 @@ static int do_unregister_framebuffer(struct fb_info *fb_info) - if (ret) - return -EINVAL; - -+ unlink_framebuffer(fb_info); - if (fb_info->pixmap.addr && - (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT)) - kfree(fb_info->pixmap.addr); -@@ -1672,7 +1673,6 @@ static int do_unregister_framebuffer(struct fb_info *fb_info) - registered_fb[i] = NULL; - num_registered_fb--; - fb_cleanup_device(fb_info); -- device_destroy(fb_class, MKDEV(FB_MAJOR, i)); - event.info = fb_info; - fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event); - -@@ -1681,6 +1681,22 @@ static int do_unregister_framebuffer(struct fb_info *fb_info) - return 0; - } - -+int unlink_framebuffer(struct fb_info *fb_info) -+{ -+ int i; -+ -+ i = fb_info->node; -+ if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info) -+ return -EINVAL; -+ -+ if (fb_info->dev) { -+ device_destroy(fb_class, MKDEV(FB_MAJOR, i)); -+ fb_info->dev = NULL; -+ } -+ return 0; -+} -+EXPORT_SYMBOL(unlink_framebuffer); -+ - void remove_conflicting_framebuffers(struct apertures_struct *a, - const char *name, bool primary) - { -diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c -index a197731..a40c05e 100644 ---- a/drivers/video/udlfb.c -+++ b/drivers/video/udlfb.c -@@ -1739,7 +1739,7 @@ static void dlfb_usb_disconnect(struct usb_interface *interface) - for (i = 0; i < ARRAY_SIZE(fb_device_attrs); i++) - device_remove_file(info->dev, &fb_device_attrs[i]); - device_remove_bin_file(info->dev, &edid_attr); -- -+ unlink_framebuffer(info); - usb_set_intfdata(interface, NULL); - - /* if clients still have us open, will be freed on last close */ -diff --git a/include/linux/fb.h b/include/linux/fb.h -index c18122f..a395b8c 100644 ---- a/include/linux/fb.h -+++ b/include/linux/fb.h -@@ -1003,6 +1003,7 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf, - /* drivers/video/fbmem.c */ - extern int register_framebuffer(struct fb_info *fb_info); - extern int unregister_framebuffer(struct fb_info *fb_info); -+extern int unlink_framebuffer(struct fb_info *fb_info); - extern void remove_conflicting_framebuffers(struct apertures_struct *a, - const char *name, bool primary); - extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); --- -1.7.6.5 -