diff --git a/dm-cache-dirty-flag-was-mistakenly-being-cleared-whe.patch b/dm-cache-dirty-flag-was-mistakenly-being-cleared-whe.patch deleted file mode 100644 index e64136a..0000000 --- a/dm-cache-dirty-flag-was-mistakenly-being-cleared-whe.patch +++ /dev/null @@ -1,42 +0,0 @@ -From: Joe Thornber -Date: Thu, 27 Nov 2014 12:26:46 +0000 -Subject: [PATCH] dm cache: dirty flag was mistakenly being cleared when - promoting via overwrite - -If the incoming bio is a WRITE and completely covers a block then we -don't bother to do any copying for a promotion operation. Once this is -done the cache block and origin block will be different, so we need to -set it to 'dirty'. - -Signed-off-by: Joe Thornber -Signed-off-by: Mike Snitzer -Cc: stable@vger.kernel.org ---- - drivers/md/dm-cache-target.c | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - -diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c -index 6f7086355691..387b93d81138 100644 ---- a/drivers/md/dm-cache-target.c -+++ b/drivers/md/dm-cache-target.c -@@ -951,10 +951,14 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) - } - - } else { -- clear_dirty(cache, mg->new_oblock, mg->cblock); -- if (mg->requeue_holder) -+ if (mg->requeue_holder) { -+ clear_dirty(cache, mg->new_oblock, mg->cblock); - cell_defer(cache, mg->new_ocell, true); -- else { -+ } else { -+ /* -+ * The block was promoted via an overwrite, so it's dirty. -+ */ -+ set_dirty(cache, mg->new_oblock, mg->cblock); - bio_endio(mg->new_ocell->holder, 0); - cell_defer(cache, mg->new_ocell, false); - } --- -2.1.0 - diff --git a/dm-cache-fix-spurious-cell_defer-when-dealing-with-p.patch b/dm-cache-fix-spurious-cell_defer-when-dealing-with-p.patch deleted file mode 100644 index 05a6ebd..0000000 --- a/dm-cache-fix-spurious-cell_defer-when-dealing-with-p.patch +++ /dev/null @@ -1,40 +0,0 @@ -From: Joe Thornber -Date: Fri, 28 Nov 2014 09:48:25 +0000 -Subject: [PATCH] dm cache: fix spurious cell_defer when dealing with partial - block at end of device - -We never bother caching a partial block that is at the back end of the -origin device. No cell ever gets locked, but the calling code was -assuming it was and trying to release it. - -Now the code only releases if the cell has been set to a non NULL -value. - -Signed-off-by: Joe Thornber -Signed-off-by: Mike Snitzer -Cc: stable@vger.kernel.org ---- - drivers/md/dm-cache-target.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c -index 387b93d81138..da496cfb458d 100644 ---- a/drivers/md/dm-cache-target.c -+++ b/drivers/md/dm-cache-target.c -@@ -2554,11 +2554,11 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso - static int cache_map(struct dm_target *ti, struct bio *bio) - { - int r; -- struct dm_bio_prison_cell *cell; -+ struct dm_bio_prison_cell *cell = NULL; - struct cache *cache = ti->private; - - r = __cache_map(cache, bio, &cell); -- if (r == DM_MAPIO_REMAPPED) { -+ if (r == DM_MAPIO_REMAPPED && cell) { - inc_ds(cache, bio, cell); - cell_defer(cache, cell, false); - } --- -2.1.0 - diff --git a/dm-cache-only-use-overwrite-optimisation-for-promoti.patch b/dm-cache-only-use-overwrite-optimisation-for-promoti.patch deleted file mode 100644 index 12a7911..0000000 --- a/dm-cache-only-use-overwrite-optimisation-for-promoti.patch +++ /dev/null @@ -1,32 +0,0 @@ -From: Joe Thornber -Date: Thu, 27 Nov 2014 12:21:08 +0000 -Subject: [PATCH] dm cache: only use overwrite optimisation for promotion when - in writeback mode - -Overwrite causes the cache block and origin blocks to diverge, which -is only allowed in writeback mode. - -Signed-off-by: Joe Thornber -Signed-off-by: Mike Snitzer -Cc: stable@vger.kernel.org ---- - drivers/md/dm-cache-target.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c -index 7130505c2425..6f7086355691 100644 ---- a/drivers/md/dm-cache-target.c -+++ b/drivers/md/dm-cache-target.c -@@ -1070,7 +1070,8 @@ static void issue_copy(struct dm_cache_migration *mg) - - avoid = is_discarded_oblock(cache, mg->new_oblock); - -- if (!avoid && bio_writes_complete_block(cache, bio)) { -+ if (writeback_mode(&cache->features) && -+ !avoid && bio_writes_complete_block(cache, bio)) { - issue_overwrite(mg, bio); - return; - } --- -2.1.0 - diff --git a/groups-Consolidate-the-setgroups-permission-checks.patch b/groups-Consolidate-the-setgroups-permission-checks.patch deleted file mode 100644 index e65ea26..0000000 --- a/groups-Consolidate-the-setgroups-permission-checks.patch +++ /dev/null @@ -1,90 +0,0 @@ -From: "Eric W. Biederman" -Date: Fri, 5 Dec 2014 17:19:27 -0600 -Subject: [PATCH] groups: Consolidate the setgroups permission checks - -Today there are 3 instances of setgroups and due to an oversight their -permission checking has diverged. Add a common function so that -they may all share the same permission checking code. - -This corrects the current oversight in the current permission checks -and adds a helper to avoid this in the future. - -A user namespace security fix will update this new helper, shortly. - -Cc: stable@vger.kernel.org -Signed-off-by: "Eric W. Biederman" ---- - arch/s390/kernel/compat_linux.c | 2 +- - include/linux/cred.h | 1 + - kernel/groups.c | 9 ++++++++- - kernel/uid16.c | 2 +- - 4 files changed, 11 insertions(+), 3 deletions(-) - -diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c -index ca38139423ae..437e61159279 100644 ---- a/arch/s390/kernel/compat_linux.c -+++ b/arch/s390/kernel/compat_linux.c -@@ -249,7 +249,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis - struct group_info *group_info; - int retval; - -- if (!capable(CAP_SETGID)) -+ if (!may_setgroups()) - return -EPERM; - if ((unsigned)gidsetsize > NGROUPS_MAX) - return -EINVAL; -diff --git a/include/linux/cred.h b/include/linux/cred.h -index b2d0820837c4..2fb2ca2127ed 100644 ---- a/include/linux/cred.h -+++ b/include/linux/cred.h -@@ -68,6 +68,7 @@ extern void groups_free(struct group_info *); - extern int set_current_groups(struct group_info *); - extern void set_groups(struct cred *, struct group_info *); - extern int groups_search(const struct group_info *, kgid_t); -+extern bool may_setgroups(void); - - /* access the groups "array" with this macro */ - #define GROUP_AT(gi, i) \ -diff --git a/kernel/groups.c b/kernel/groups.c -index 451698f86cfa..02d8a251c476 100644 ---- a/kernel/groups.c -+++ b/kernel/groups.c -@@ -213,6 +213,13 @@ out: - return i; - } - -+bool may_setgroups(void) -+{ -+ struct user_namespace *user_ns = current_user_ns(); -+ -+ return ns_capable(user_ns, CAP_SETGID); -+} -+ - /* - * SMP: Our groups are copy-on-write. We can set them safely - * without another task interfering. -@@ -223,7 +230,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) - struct group_info *group_info; - int retval; - -- if (!ns_capable(current_user_ns(), CAP_SETGID)) -+ if (!may_setgroups()) - return -EPERM; - if ((unsigned)gidsetsize > NGROUPS_MAX) - return -EINVAL; -diff --git a/kernel/uid16.c b/kernel/uid16.c -index 602e5bbbceff..d58cc4d8f0d1 100644 ---- a/kernel/uid16.c -+++ b/kernel/uid16.c -@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) - struct group_info *group_info; - int retval; - -- if (!ns_capable(current_user_ns(), CAP_SETGID)) -+ if (!may_setgroups()) - return -EPERM; - if ((unsigned)gidsetsize > NGROUPS_MAX) - return -EINVAL; --- -2.1.0 - diff --git a/isofs-Fix-infinite-looping-over-CE-entries.patch b/isofs-Fix-infinite-looping-over-CE-entries.patch deleted file mode 100644 index bff25ac..0000000 --- a/isofs-Fix-infinite-looping-over-CE-entries.patch +++ /dev/null @@ -1,54 +0,0 @@ -From: Jan Kara -Date: Mon, 15 Dec 2014 14:22:46 +0100 -Subject: [PATCH] isofs: Fix infinite looping over CE entries - -Rock Ridge extensions define so called Continuation Entries (CE) which -define where is further space with Rock Ridge data. Corrupted isofs -image can contain arbitrarily long chain of these, including a one -containing loop and thus causing kernel to end in an infinite loop when -traversing these entries. - -Limit the traversal to 32 entries which should be more than enough space -to store all the Rock Ridge data. - -Reported-by: P J P -CC: stable@vger.kernel.org -Signed-off-by: Jan Kara ---- - fs/isofs/rock.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c -index f488bbae541a..bb63254ed848 100644 ---- a/fs/isofs/rock.c -+++ b/fs/isofs/rock.c -@@ -30,6 +30,7 @@ struct rock_state { - int cont_size; - int cont_extent; - int cont_offset; -+ int cont_loops; - struct inode *inode; - }; - -@@ -73,6 +74,9 @@ static void init_rock_state(struct rock_state *rs, struct inode *inode) - rs->inode = inode; - } - -+/* Maximum number of Rock Ridge continuation entries */ -+#define RR_MAX_CE_ENTRIES 32 -+ - /* - * Returns 0 if the caller should continue scanning, 1 if the scan must end - * and -ve on error. -@@ -105,6 +109,8 @@ static int rock_continue(struct rock_state *rs) - goto out; - } - ret = -EIO; -+ if (++rs->cont_loops >= RR_MAX_CE_ENTRIES) -+ goto out; - bh = sb_bread(rs->inode->i_sb, rs->cont_extent); - if (bh) { - memcpy(rs->buffer, bh->b_data + rs->cont_offset, --- -2.1.0 - diff --git a/kernel.spec b/kernel.spec index a980623..ca8a724 100644 --- a/kernel.spec +++ b/kernel.spec @@ -62,7 +62,7 @@ Summary: The Linux kernel # For non-released -rc kernels, this will be appended after the rcX and # gitX tags, so a 3 here would become part of release "0.rcX.gitX.3" # -%global baserelease 201 +%global baserelease 200 %global fedora_build %{baserelease} # base_sublevel is the kernel version we're starting with and patching @@ -74,7 +74,7 @@ Summary: The Linux kernel %if 0%{?released_kernel} # Do we have a -stable update to apply? -%define stable_update 7 +%define stable_update 8 # Is it a -stable RC? %define stable_rc 0 # Set rpm version accordingly @@ -738,9 +738,6 @@ Patch26058: asus-nb-wmi-Add-wapf4-quirk-for-the-X550VB.patch #rhbz 1135338 Patch26090: HID-add-support-for-MS-Surface-Pro-3-Type-Cover.patch -#CVE-2014-8134 rhbz 1172765 1172769 -Patch26091: x86-kvm-Clear-paravirt_enabled-on-KVM-guests-for-esp.patch - #rhbz 1164945 Patch26092: xhci-Add-broken-streams-quirk-for-Fresco-Logic-FL100.patch Patch26093: uas-Add-US_FL_NO_ATA_1X-for-Seagate-devices-with-usb.patch @@ -749,9 +746,6 @@ Patch26094: uas-Add-US_FL_NO_REPORT_OPCODES-for-JMicron-JMS566-w.patch #rhbz 1172543 Patch26096: cfg80211-don-t-WARN-about-two-consecutive-Country-IE.patch -#CVE-2014-8133 rhbz 1172797 1174374 -Patch26100: x86-tls-Validate-TLS-entries-to-protect-espfix.patch - #rhbz 1173806 Patch26101: powerpc-powernv-force-all-CPUs-to-be-bootable.patch @@ -759,43 +753,15 @@ Patch26101: powerpc-powernv-force-all-CPUs-to-be-bootable.patch Patch26098: move-d_rcu-from-overlapping-d_child-to-overlapping-d.patch Patch26099: deal-with-deadlock-in-d_walk.patch -#CVE-2014-XXXX rhbz 1175235 1175250 -Patch26102: isofs-Fix-infinite-looping-over-CE-entries.patch - #rhbz 1175261 Patch26103: blk-mq-Fix-uninitialized-kobject-at-CPU-hotplugging.patch -#rhbz 1168434 -Patch26104: dm-cache-only-use-overwrite-optimisation-for-promoti.patch -Patch26105: dm-cache-dirty-flag-was-mistakenly-being-cleared-whe.patch -Patch26106: dm-cache-fix-spurious-cell_defer-when-dealing-with-p.patch - -#mount fixes for stable -Patch26108: mnt-Implicitly-add-MNT_NODEV-on-remount-when-it-was-.patch -Patch26109: mnt-Update-unprivileged-remount-test.patch -Patch26110: umount-Disallow-unprivileged-mount-force.patch - -#CVE-2014-8989 rhbz 1170684 1170688 -Patch26111: groups-Consolidate-the-setgroups-permission-checks.patch -Patch26112: userns-Document-what-the-invariant-required-for-safe.patch -Patch26113: userns-Don-t-allow-setgroups-until-a-gid-mapping-has.patch -Patch26114: userns-Don-t-allow-unprivileged-creation-of-gid-mapp.patch -Patch26115: userns-Check-euid-no-fsuid-when-establishing-an-unpr.patch -Patch26116: userns-Only-allow-the-creator-of-the-userns-unprivil.patch -Patch26117: userns-Rename-id_map_mutex-to-userns_state_mutex.patch -Patch26118: userns-Add-a-knob-to-disable-setgroups-on-a-per-user.patch -Patch26119: userns-Allow-setting-gid_maps-without-privilege-when.patch -Patch26120: userns-Unbreak-the-unprivileged-remount-tests.patch - #rhbz 1163927 Patch26121: Set-UID-in-sess_auth_rawntlmssp_authenticate-too.patch #CVE-2014-9428 rhbz 1178826,1178833 Patch26122: batman-adv-Calculate-extra-tail-size-based-on-queued.patch -#CVE-2014-9419 rhbz 1177260,1177263 -Patch26123: x86_64-switch_to-Load-TLS-descriptors-before-switchi.patch - #CVE-2014-9529 rhbz 1179813 1179853 Patch26124: KEYS-close-race-between-key-lookup-and-freeing.patch @@ -1507,9 +1473,6 @@ ApplyPatch asus-nb-wmi-Add-wapf4-quirk-for-the-X550VB.patch #rhbz 1135338 ApplyPatch HID-add-support-for-MS-Surface-Pro-3-Type-Cover.patch -#CVE-2014-8134 rhbz 1172765 1172769 -ApplyPatch x86-kvm-Clear-paravirt_enabled-on-KVM-guests-for-esp.patch - #rhbz 1164945 ApplyPatch xhci-Add-broken-streams-quirk-for-Fresco-Logic-FL100.patch ApplyPatch uas-Add-US_FL_NO_ATA_1X-for-Seagate-devices-with-usb.patch @@ -1518,9 +1481,6 @@ ApplyPatch uas-Add-US_FL_NO_REPORT_OPCODES-for-JMicron-JMS566-w.patch #rhbz 1172543 ApplyPatch cfg80211-don-t-WARN-about-two-consecutive-Country-IE.patch -#CVE-2014-8133 rhbz 1172797 1174374 -ApplyPatch x86-tls-Validate-TLS-entries-to-protect-espfix.patch - #rhbz 1173806 ApplyPatch powerpc-powernv-force-all-CPUs-to-be-bootable.patch @@ -1528,43 +1488,15 @@ ApplyPatch powerpc-powernv-force-all-CPUs-to-be-bootable.patch ApplyPatch move-d_rcu-from-overlapping-d_child-to-overlapping-d.patch ApplyPatch deal-with-deadlock-in-d_walk.patch -#CVE-2014-XXXX rhbz 1175235 1175250 -ApplyPatch isofs-Fix-infinite-looping-over-CE-entries.patch - #rhbz 1175261 ApplyPatch blk-mq-Fix-uninitialized-kobject-at-CPU-hotplugging.patch -#rhbz 1168434 -ApplyPatch dm-cache-only-use-overwrite-optimisation-for-promoti.patch -ApplyPatch dm-cache-dirty-flag-was-mistakenly-being-cleared-whe.patch -ApplyPatch dm-cache-fix-spurious-cell_defer-when-dealing-with-p.patch - -#mount fixes for stable -ApplyPatch mnt-Implicitly-add-MNT_NODEV-on-remount-when-it-was-.patch -ApplyPatch mnt-Update-unprivileged-remount-test.patch -ApplyPatch umount-Disallow-unprivileged-mount-force.patch - -#CVE-2014-8989 rhbz 1170684 1170688 -ApplyPatch groups-Consolidate-the-setgroups-permission-checks.patch -ApplyPatch userns-Document-what-the-invariant-required-for-safe.patch -ApplyPatch userns-Don-t-allow-setgroups-until-a-gid-mapping-has.patch -ApplyPatch userns-Don-t-allow-unprivileged-creation-of-gid-mapp.patch -ApplyPatch userns-Check-euid-no-fsuid-when-establishing-an-unpr.patch -ApplyPatch userns-Only-allow-the-creator-of-the-userns-unprivil.patch -ApplyPatch userns-Rename-id_map_mutex-to-userns_state_mutex.patch -ApplyPatch userns-Add-a-knob-to-disable-setgroups-on-a-per-user.patch -ApplyPatch userns-Allow-setting-gid_maps-without-privilege-when.patch -ApplyPatch userns-Unbreak-the-unprivileged-remount-tests.patch - #rhbz 1163927 ApplyPatch Set-UID-in-sess_auth_rawntlmssp_authenticate-too.patch #CVE-2014-9428 rhbz 1178826,1178833 ApplyPatch batman-adv-Calculate-extra-tail-size-based-on-queued.patch -#CVE-2014-9419 rhbz 1177260,1177263 -ApplyPatch x86_64-switch_to-Load-TLS-descriptors-before-switchi.patch - #CVE-2014-9529 rhbz 1179813 1179853 ApplyPatch KEYS-close-race-between-key-lookup-and-freeing.patch @@ -2386,6 +2318,9 @@ fi # ||----w | # || || %changelog +* Thu Jan 08 2015 Justin M. Forbes - 3.17.8-300 +- Linux v3.17.8 + * Wed Jan 07 2015 Josh Boyer - CVE-2014-9529 memory corruption or panic during key gc (rhbz 1179813 1179853) - Enable POWERCAP and INTEL_RAPL diff --git a/mnt-Implicitly-add-MNT_NODEV-on-remount-when-it-was-.patch b/mnt-Implicitly-add-MNT_NODEV-on-remount-when-it-was-.patch deleted file mode 100644 index 6059902..0000000 --- a/mnt-Implicitly-add-MNT_NODEV-on-remount-when-it-was-.patch +++ /dev/null @@ -1,41 +0,0 @@ -From: "Eric W. Biederman" -Date: Wed, 13 Aug 2014 01:33:38 -0700 -Subject: [PATCH] mnt: Implicitly add MNT_NODEV on remount when it was - implicitly added by mount - -Now that remount is properly enforcing the rule that you can't remove -nodev at least sandstorm.io is breaking when performing a remount. - -It turns out that there is an easy intuitive solution implicitly -add nodev on remount when nodev was implicitly added on mount. - -Tested-by: Cedric Bosdonnat -Tested-by: Richard Weinberger -Cc: stable@vger.kernel.org -Signed-off-by: "Eric W. Biederman" ---- - fs/namespace.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/fs/namespace.c b/fs/namespace.c -index 550dbff08677..72b41e49772d 100644 ---- a/fs/namespace.c -+++ b/fs/namespace.c -@@ -1955,7 +1955,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags, - } - if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && - !(mnt_flags & MNT_NODEV)) { -- return -EPERM; -+ /* Was the nodev implicitly added in mount? */ -+ if ((mnt->mnt_ns->user_ns != &init_user_ns) && -+ !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) { -+ mnt_flags |= MNT_NODEV; -+ } else { -+ return -EPERM; -+ } - } - if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && - !(mnt_flags & MNT_NOSUID)) { --- -2.1.0 - diff --git a/mnt-Update-unprivileged-remount-test.patch b/mnt-Update-unprivileged-remount-test.patch deleted file mode 100644 index 5913d82..0000000 --- a/mnt-Update-unprivileged-remount-test.patch +++ /dev/null @@ -1,280 +0,0 @@ -From: "Eric W. Biederman" -Date: Fri, 22 Aug 2014 16:39:03 -0500 -Subject: [PATCH] mnt: Update unprivileged remount test - -- MNT_NODEV should be irrelevant except when reading back mount flags, - no longer specify MNT_NODEV on remount. - -- Test MNT_NODEV on devpts where it is meaningful even for unprivileged mounts. - -- Add a test to verify that remount of a prexisting mount with the same flags - is allowed and does not change those flags. - -- Cleanup up the definitions of MS_REC, MS_RELATIME, MS_STRICTATIME that are used - when the code is built in an environment without them. - -- Correct the test error messages when tests fail. There were not 5 tests - that tested MS_RELATIME. - -Cc: stable@vger.kernel.org -Signed-off-by: Eric W. Biederman ---- - .../selftests/mount/unprivileged-remount-test.c | 172 +++++++++++++++++---- - 1 file changed, 142 insertions(+), 30 deletions(-) - -diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c -index 1b3ff2fda4d0..9669d375625a 100644 ---- a/tools/testing/selftests/mount/unprivileged-remount-test.c -+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c -@@ -6,6 +6,8 @@ - #include - #include - #include -+#include -+#include - #include - #include - #include -@@ -32,11 +34,14 @@ - # define CLONE_NEWPID 0x20000000 - #endif - -+#ifndef MS_REC -+# define MS_REC 16384 -+#endif - #ifndef MS_RELATIME --#define MS_RELATIME (1 << 21) -+# define MS_RELATIME (1 << 21) - #endif - #ifndef MS_STRICTATIME --#define MS_STRICTATIME (1 << 24) -+# define MS_STRICTATIME (1 << 24) - #endif - - static void die(char *fmt, ...) -@@ -87,6 +92,45 @@ static void write_file(char *filename, char *fmt, ...) - } - } - -+static int read_mnt_flags(const char *path) -+{ -+ int ret; -+ struct statvfs stat; -+ int mnt_flags; -+ -+ ret = statvfs(path, &stat); -+ if (ret != 0) { -+ die("statvfs of %s failed: %s\n", -+ path, strerror(errno)); -+ } -+ if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \ -+ ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \ -+ ST_SYNCHRONOUS | ST_MANDLOCK)) { -+ die("Unrecognized mount flags\n"); -+ } -+ mnt_flags = 0; -+ if (stat.f_flag & ST_RDONLY) -+ mnt_flags |= MS_RDONLY; -+ if (stat.f_flag & ST_NOSUID) -+ mnt_flags |= MS_NOSUID; -+ if (stat.f_flag & ST_NODEV) -+ mnt_flags |= MS_NODEV; -+ if (stat.f_flag & ST_NOEXEC) -+ mnt_flags |= MS_NOEXEC; -+ if (stat.f_flag & ST_NOATIME) -+ mnt_flags |= MS_NOATIME; -+ if (stat.f_flag & ST_NODIRATIME) -+ mnt_flags |= MS_NODIRATIME; -+ if (stat.f_flag & ST_RELATIME) -+ mnt_flags |= MS_RELATIME; -+ if (stat.f_flag & ST_SYNCHRONOUS) -+ mnt_flags |= MS_SYNCHRONOUS; -+ if (stat.f_flag & ST_MANDLOCK) -+ mnt_flags |= ST_MANDLOCK; -+ -+ return mnt_flags; -+} -+ - static void create_and_enter_userns(void) - { - uid_t uid; -@@ -118,7 +162,8 @@ static void create_and_enter_userns(void) - } - - static --bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) -+bool test_unpriv_remount(const char *fstype, const char *mount_options, -+ int mount_flags, int remount_flags, int invalid_flags) - { - pid_t child; - -@@ -151,9 +196,11 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) - strerror(errno)); - } - -- if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) { -- die("mount of /tmp failed: %s\n", -- strerror(errno)); -+ if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) { -+ die("mount of %s with options '%s' on /tmp failed: %s\n", -+ fstype, -+ mount_options? mount_options : "", -+ strerror(errno)); - } - - create_and_enter_userns(); -@@ -181,62 +228,127 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) - - static bool test_unpriv_remount_simple(int mount_flags) - { -- return test_unpriv_remount(mount_flags, mount_flags, 0); -+ return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0); - } - - static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags) - { -- return test_unpriv_remount(mount_flags, mount_flags, invalid_flags); -+ return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, -+ invalid_flags); -+} -+ -+static bool test_priv_mount_unpriv_remount(void) -+{ -+ pid_t child; -+ int ret; -+ const char *orig_path = "/dev"; -+ const char *dest_path = "/tmp"; -+ int orig_mnt_flags, remount_mnt_flags; -+ -+ child = fork(); -+ if (child == -1) { -+ die("fork failed: %s\n", -+ strerror(errno)); -+ } -+ if (child != 0) { /* parent */ -+ pid_t pid; -+ int status; -+ pid = waitpid(child, &status, 0); -+ if (pid == -1) { -+ die("waitpid failed: %s\n", -+ strerror(errno)); -+ } -+ if (pid != child) { -+ die("waited for %d got %d\n", -+ child, pid); -+ } -+ if (!WIFEXITED(status)) { -+ die("child did not terminate cleanly\n"); -+ } -+ return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; -+ } -+ -+ orig_mnt_flags = read_mnt_flags(orig_path); -+ -+ create_and_enter_userns(); -+ ret = unshare(CLONE_NEWNS); -+ if (ret != 0) { -+ die("unshare(CLONE_NEWNS) failed: %s\n", -+ strerror(errno)); -+ } -+ -+ ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL); -+ if (ret != 0) { -+ die("recursive bind mount of %s onto %s failed: %s\n", -+ orig_path, dest_path, strerror(errno)); -+ } -+ -+ ret = mount(dest_path, dest_path, "none", -+ MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL); -+ if (ret != 0) { -+ /* system("cat /proc/self/mounts"); */ -+ die("remount of /tmp failed: %s\n", -+ strerror(errno)); -+ } -+ -+ remount_mnt_flags = read_mnt_flags(dest_path); -+ if (orig_mnt_flags != remount_mnt_flags) { -+ die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n", -+ dest_path, orig_path); -+ } -+ exit(EXIT_SUCCESS); - } - - int main(int argc, char **argv) - { -- if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) { -+ if (!test_unpriv_remount_simple(MS_RDONLY)) { - die("MS_RDONLY malfunctions\n"); - } -- if (!test_unpriv_remount_simple(MS_NODEV)) { -+ if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) { - die("MS_NODEV malfunctions\n"); - } -- if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) { -+ if (!test_unpriv_remount_simple(MS_NOSUID)) { - die("MS_NOSUID malfunctions\n"); - } -- if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) { -+ if (!test_unpriv_remount_simple(MS_NOEXEC)) { - die("MS_NOEXEC malfunctions\n"); - } -- if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV, -- MS_NOATIME|MS_NODEV)) -+ if (!test_unpriv_remount_atime(MS_RELATIME, -+ MS_NOATIME)) - { - die("MS_RELATIME malfunctions\n"); - } -- if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV, -- MS_NOATIME|MS_NODEV)) -+ if (!test_unpriv_remount_atime(MS_STRICTATIME, -+ MS_NOATIME)) - { - die("MS_STRICTATIME malfunctions\n"); - } -- if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV, -- MS_STRICTATIME|MS_NODEV)) -+ if (!test_unpriv_remount_atime(MS_NOATIME, -+ MS_STRICTATIME)) - { -- die("MS_RELATIME malfunctions\n"); -+ die("MS_NOATIME malfunctions\n"); - } -- if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV, -- MS_NOATIME|MS_NODEV)) -+ if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME, -+ MS_NOATIME)) - { -- die("MS_RELATIME malfunctions\n"); -+ die("MS_RELATIME|MS_NODIRATIME malfunctions\n"); - } -- if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV, -- MS_NOATIME|MS_NODEV)) -+ if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME, -+ MS_NOATIME)) - { -- die("MS_RELATIME malfunctions\n"); -+ die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n"); - } -- if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV, -- MS_STRICTATIME|MS_NODEV)) -+ if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME, -+ MS_STRICTATIME)) - { -- die("MS_RELATIME malfunctions\n"); -+ die("MS_NOATIME|MS_DIRATIME malfunctions\n"); - } -- if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV, -- MS_NOATIME|MS_NODEV)) -+ if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME)) - { - die("Default atime malfunctions\n"); - } -+ if (!test_priv_mount_unpriv_remount()) { -+ die("Mount flags unexpectedly changed after remount\n"); -+ } - return EXIT_SUCCESS; - } --- -2.1.0 - diff --git a/sources b/sources index 396a8ab..35129af 100644 --- a/sources +++ b/sources @@ -1,3 +1,3 @@ fb30d0f29214d75cddd2faa94f73d5cf linux-3.17.tar.xz 159e969cbc27201d8e2fa0f609dc722f perf-man-3.17.tar.gz -96d5959bdc223fa6aa0ed132c93cf814 patch-3.17.7.xz +4ea1c0e18b18406bcd248bf06b95aec3 patch-3.17.8.xz diff --git a/umount-Disallow-unprivileged-mount-force.patch b/umount-Disallow-unprivileged-mount-force.patch deleted file mode 100644 index 3acbe35..0000000 --- a/umount-Disallow-unprivileged-mount-force.patch +++ /dev/null @@ -1,33 +0,0 @@ -From: "Eric W. Biederman" -Date: Sat, 4 Oct 2014 14:44:03 -0700 -Subject: [PATCH] umount: Disallow unprivileged mount force - -Forced unmount affects not just the mount namespace but the underlying -superblock as well. Restrict forced unmount to the global root user -for now. Otherwise it becomes possible a user in a less privileged -mount namespace to force the shutdown of a superblock of a filesystem -in a more privileged mount namespace, allowing a DOS attack on root. - -Cc: stable@vger.kernel.org -Signed-off-by: "Eric W. Biederman" ---- - fs/namespace.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/fs/namespace.c b/fs/namespace.c -index 72b41e49772d..90707be662f2 100644 ---- a/fs/namespace.c -+++ b/fs/namespace.c -@@ -1430,6 +1430,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) - goto dput_and_out; - if (mnt->mnt.mnt_flags & MNT_LOCKED) - goto dput_and_out; -+ retval = -EPERM; -+ if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) -+ goto dput_and_out; - - retval = do_umount(mnt, flags); - dput_and_out: --- -2.1.0 - diff --git a/userns-Add-a-knob-to-disable-setgroups-on-a-per-user.patch b/userns-Add-a-knob-to-disable-setgroups-on-a-per-user.patch deleted file mode 100644 index e87c897..0000000 --- a/userns-Add-a-knob-to-disable-setgroups-on-a-per-user.patch +++ /dev/null @@ -1,280 +0,0 @@ -From: "Eric W. Biederman" -Date: Tue, 2 Dec 2014 12:27:26 -0600 -Subject: [PATCH] userns: Add a knob to disable setgroups on a per user - namespace basis - -- Expose the knob to user space through a proc file /proc//setgroups - - A value of "deny" means the setgroups system call is disabled in the - current processes user namespace and can not be enabled in the - future in this user namespace. - - A value of "allow" means the segtoups system call is enabled. - -- Descendant user namespaces inherit the value of setgroups from - their parents. - -- A proc file is used (instead of a sysctl) as sysctls currently do - not allow checking the permissions at open time. - -- Writing to the proc file is restricted to before the gid_map - for the user namespace is set. - - This ensures that disabling setgroups at a user namespace - level will never remove the ability to call setgroups - from a process that already has that ability. - - A process may opt in to the setgroups disable for itself by - creating, entering and configuring a user namespace or by calling - setns on an existing user namespace with setgroups disabled. - Processes without privileges already can not call setgroups so this - is a noop. Prodcess with privilege become processes without - privilege when entering a user namespace and as with any other path - to dropping privilege they would not have the ability to call - setgroups. So this remains within the bounds of what is possible - without a knob to disable setgroups permanently in a user namespace. - -Cc: stable@vger.kernel.org -Signed-off-by: "Eric W. Biederman" ---- - fs/proc/base.c | 53 ++++++++++++++++++++++++++ - include/linux/user_namespace.h | 7 ++++ - kernel/user.c | 1 + - kernel/user_namespace.c | 85 ++++++++++++++++++++++++++++++++++++++++++ - 4 files changed, 146 insertions(+) - -diff --git a/fs/proc/base.c b/fs/proc/base.c -index baf852b648ad..3ec60dee75da 100644 ---- a/fs/proc/base.c -+++ b/fs/proc/base.c -@@ -2493,6 +2493,57 @@ static const struct file_operations proc_projid_map_operations = { - .llseek = seq_lseek, - .release = proc_id_map_release, - }; -+ -+static int proc_setgroups_open(struct inode *inode, struct file *file) -+{ -+ struct user_namespace *ns = NULL; -+ struct task_struct *task; -+ int ret; -+ -+ ret = -ESRCH; -+ task = get_proc_task(inode); -+ if (task) { -+ rcu_read_lock(); -+ ns = get_user_ns(task_cred_xxx(task, user_ns)); -+ rcu_read_unlock(); -+ put_task_struct(task); -+ } -+ if (!ns) -+ goto err; -+ -+ if (file->f_mode & FMODE_WRITE) { -+ ret = -EACCES; -+ if (!ns_capable(ns, CAP_SYS_ADMIN)) -+ goto err_put_ns; -+ } -+ -+ ret = single_open(file, &proc_setgroups_show, ns); -+ if (ret) -+ goto err_put_ns; -+ -+ return 0; -+err_put_ns: -+ put_user_ns(ns); -+err: -+ return ret; -+} -+ -+static int proc_setgroups_release(struct inode *inode, struct file *file) -+{ -+ struct seq_file *seq = file->private_data; -+ struct user_namespace *ns = seq->private; -+ int ret = single_release(inode, file); -+ put_user_ns(ns); -+ return ret; -+} -+ -+static const struct file_operations proc_setgroups_operations = { -+ .open = proc_setgroups_open, -+ .write = proc_setgroups_write, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = proc_setgroups_release, -+}; - #endif /* CONFIG_USER_NS */ - - static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, -@@ -2601,6 +2652,7 @@ static const struct pid_entry tgid_base_stuff[] = { - REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), - REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), - REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), -+ REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), - #endif - #ifdef CONFIG_CHECKPOINT_RESTORE - REG("timers", S_IRUGO, proc_timers_operations), -@@ -2944,6 +2996,7 @@ static const struct pid_entry tid_base_stuff[] = { - REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), - REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), - REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), -+ REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), - #endif - }; - -diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h -index 8d493083486a..9f3579ff543d 100644 ---- a/include/linux/user_namespace.h -+++ b/include/linux/user_namespace.h -@@ -17,6 +17,10 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */ - } extent[UID_GID_MAP_MAX_EXTENTS]; - }; - -+#define USERNS_SETGROUPS_ALLOWED 1UL -+ -+#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED -+ - struct user_namespace { - struct uid_gid_map uid_map; - struct uid_gid_map gid_map; -@@ -27,6 +31,7 @@ struct user_namespace { - kuid_t owner; - kgid_t group; - unsigned int proc_inum; -+ unsigned long flags; - - /* Register of per-UID persistent keyrings for this namespace */ - #ifdef CONFIG_PERSISTENT_KEYRINGS -@@ -63,6 +68,8 @@ extern const struct seq_operations proc_projid_seq_operations; - extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); - extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); - extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); -+extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); -+extern int proc_setgroups_show(struct seq_file *m, void *v); - extern bool userns_may_setgroups(const struct user_namespace *ns); - #else - -diff --git a/kernel/user.c b/kernel/user.c -index 4efa39350e44..2d09940c9632 100644 ---- a/kernel/user.c -+++ b/kernel/user.c -@@ -51,6 +51,7 @@ struct user_namespace init_user_ns = { - .owner = GLOBAL_ROOT_UID, - .group = GLOBAL_ROOT_GID, - .proc_inum = PROC_USER_INIT_INO, -+ .flags = USERNS_INIT_FLAGS, - #ifdef CONFIG_PERSISTENT_KEYRINGS - .persistent_keyring_register_sem = - __RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem), -diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c -index 44a555ac6104..6e80f4c1322b 100644 ---- a/kernel/user_namespace.c -+++ b/kernel/user_namespace.c -@@ -100,6 +100,11 @@ int create_user_ns(struct cred *new) - ns->owner = owner; - ns->group = group; - -+ /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ -+ mutex_lock(&userns_state_mutex); -+ ns->flags = parent_ns->flags; -+ mutex_unlock(&userns_state_mutex); -+ - set_cred_user_ns(new, ns); - - #ifdef CONFIG_PERSISTENT_KEYRINGS -@@ -839,6 +844,84 @@ static bool new_idmap_permitted(const struct file *file, - return false; - } - -+int proc_setgroups_show(struct seq_file *seq, void *v) -+{ -+ struct user_namespace *ns = seq->private; -+ unsigned long userns_flags = ACCESS_ONCE(ns->flags); -+ -+ seq_printf(seq, "%s\n", -+ (userns_flags & USERNS_SETGROUPS_ALLOWED) ? -+ "allow" : "deny"); -+ return 0; -+} -+ -+ssize_t proc_setgroups_write(struct file *file, const char __user *buf, -+ size_t count, loff_t *ppos) -+{ -+ struct seq_file *seq = file->private_data; -+ struct user_namespace *ns = seq->private; -+ char kbuf[8], *pos; -+ bool setgroups_allowed; -+ ssize_t ret; -+ -+ /* Only allow a very narrow range of strings to be written */ -+ ret = -EINVAL; -+ if ((*ppos != 0) || (count >= sizeof(kbuf))) -+ goto out; -+ -+ /* What was written? */ -+ ret = -EFAULT; -+ if (copy_from_user(kbuf, buf, count)) -+ goto out; -+ kbuf[count] = '\0'; -+ pos = kbuf; -+ -+ /* What is being requested? */ -+ ret = -EINVAL; -+ if (strncmp(pos, "allow", 5) == 0) { -+ pos += 5; -+ setgroups_allowed = true; -+ } -+ else if (strncmp(pos, "deny", 4) == 0) { -+ pos += 4; -+ setgroups_allowed = false; -+ } -+ else -+ goto out; -+ -+ /* Verify there is not trailing junk on the line */ -+ pos = skip_spaces(pos); -+ if (*pos != '\0') -+ goto out; -+ -+ ret = -EPERM; -+ mutex_lock(&userns_state_mutex); -+ if (setgroups_allowed) { -+ /* Enabling setgroups after setgroups has been disabled -+ * is not allowed. -+ */ -+ if (!(ns->flags & USERNS_SETGROUPS_ALLOWED)) -+ goto out_unlock; -+ } else { -+ /* Permanently disabling setgroups after setgroups has -+ * been enabled by writing the gid_map is not allowed. -+ */ -+ if (ns->gid_map.nr_extents != 0) -+ goto out_unlock; -+ ns->flags &= ~USERNS_SETGROUPS_ALLOWED; -+ } -+ mutex_unlock(&userns_state_mutex); -+ -+ /* Report a successful write */ -+ *ppos = count; -+ ret = count; -+out: -+ return ret; -+out_unlock: -+ mutex_unlock(&userns_state_mutex); -+ goto out; -+} -+ - bool userns_may_setgroups(const struct user_namespace *ns) - { - bool allowed; -@@ -848,6 +931,8 @@ bool userns_may_setgroups(const struct user_namespace *ns) - * the user namespace has been established. - */ - allowed = ns->gid_map.nr_extents != 0; -+ /* Is setgroups allowed? */ -+ allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED); - mutex_unlock(&userns_state_mutex); - - return allowed; --- -2.1.0 - diff --git a/userns-Allow-setting-gid_maps-without-privilege-when.patch b/userns-Allow-setting-gid_maps-without-privilege-when.patch deleted file mode 100644 index 97d3fe6..0000000 --- a/userns-Allow-setting-gid_maps-without-privilege-when.patch +++ /dev/null @@ -1,40 +0,0 @@ -From: "Eric W. Biederman" -Date: Fri, 5 Dec 2014 19:36:04 -0600 -Subject: [PATCH] userns: Allow setting gid_maps without privilege when - setgroups is disabled - -Now that setgroups can be disabled and not reenabled, setting gid_map -without privielge can now be enabled when setgroups is disabled. - -This restores most of the functionality that was lost when unprivileged -setting of gid_map was removed. Applications that use this functionality -will need to check to see if they use setgroups or init_groups, and if they -don't they can be fixed by simply disabling setgroups before writing to -gid_map. - -Cc: stable@vger.kernel.org -Reviewed-by: Andy Lutomirski -Signed-off-by: "Eric W. Biederman" ---- - kernel/user_namespace.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c -index 6e80f4c1322b..a2e37c5d2f63 100644 ---- a/kernel/user_namespace.c -+++ b/kernel/user_namespace.c -@@ -826,6 +826,11 @@ static bool new_idmap_permitted(const struct file *file, - kuid_t uid = make_kuid(ns->parent, id); - if (uid_eq(uid, cred->euid)) - return true; -+ } else if (cap_setid == CAP_SETGID) { -+ kgid_t gid = make_kgid(ns->parent, id); -+ if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) && -+ gid_eq(gid, cred->egid)) -+ return true; - } - } - --- -2.1.0 - diff --git a/userns-Check-euid-no-fsuid-when-establishing-an-unpr.patch b/userns-Check-euid-no-fsuid-when-establishing-an-unpr.patch deleted file mode 100644 index 50830c3..0000000 --- a/userns-Check-euid-no-fsuid-when-establishing-an-unpr.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: "Eric W. Biederman" -Date: Fri, 5 Dec 2014 18:26:30 -0600 -Subject: [PATCH] userns: Check euid no fsuid when establishing an unprivileged - uid mapping - -setresuid allows the euid to be set to any of uid, euid, suid, and -fsuid. Therefor it is safe to allow an unprivileged user to map -their euid and use CAP_SETUID privileged with exactly that uid, -as no new credentials can be obtained. - -I can not find a combination of existing system calls that allows setting -uid, euid, suid, and fsuid from the fsuid making the previous use -of fsuid for allowing unprivileged mappings a bug. - -This is part of a fix for CVE-2014-8989. - -Cc: stable@vger.kernel.org -Reviewed-by: Andy Lutomirski -Signed-off-by: "Eric W. Biederman" ---- - kernel/user_namespace.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c -index 1ce6d67c07b7..9451b12a9b6c 100644 ---- a/kernel/user_namespace.c -+++ b/kernel/user_namespace.c -@@ -819,7 +819,7 @@ static bool new_idmap_permitted(const struct file *file, - u32 id = new_map->extent[0].lower_first; - if (cap_setid == CAP_SETUID) { - kuid_t uid = make_kuid(ns->parent, id); -- if (uid_eq(uid, file->f_cred->fsuid)) -+ if (uid_eq(uid, file->f_cred->euid)) - return true; - } - } --- -2.1.0 - diff --git a/userns-Document-what-the-invariant-required-for-safe.patch b/userns-Document-what-the-invariant-required-for-safe.patch deleted file mode 100644 index c364b2b..0000000 --- a/userns-Document-what-the-invariant-required-for-safe.patch +++ /dev/null @@ -1,48 +0,0 @@ -From: "Eric W. Biederman" -Date: Fri, 5 Dec 2014 17:51:47 -0600 -Subject: [PATCH] userns: Document what the invariant required for safe - unprivileged mappings. - -The rule is simple. Don't allow anything that wouldn't be allowed -without unprivileged mappings. - -It was previously overlooked that establishing gid mappings would -allow dropping groups and potentially gaining permission to files and -directories that had lesser permissions for a specific group than for -all other users. - -This is the rule needed to fix CVE-2014-8989 and prevent any other -security issues with new_idmap_permitted. - -The reason for this rule is that the unix permission model is old and -there are programs out there somewhere that take advantage of every -little corner of it. So allowing a uid or gid mapping to be -established without privielge that would allow anything that would not -be allowed without that mapping will result in expectations from some -code somewhere being violated. Violated expectations about the -behavior of the OS is a long way to say a security issue. - -Cc: stable@vger.kernel.org -Signed-off-by: "Eric W. Biederman" ---- - kernel/user_namespace.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c -index aa312b0dc3ec..b99c862a2e3f 100644 ---- a/kernel/user_namespace.c -+++ b/kernel/user_namespace.c -@@ -812,7 +812,9 @@ static bool new_idmap_permitted(const struct file *file, - struct user_namespace *ns, int cap_setid, - struct uid_gid_map *new_map) - { -- /* Allow mapping to your own filesystem ids */ -+ /* Don't allow mappings that would allow anything that wouldn't -+ * be allowed without the establishment of unprivileged mappings. -+ */ - if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { - u32 id = new_map->extent[0].lower_first; - if (cap_setid == CAP_SETUID) { --- -2.1.0 - diff --git a/userns-Don-t-allow-setgroups-until-a-gid-mapping-has.patch b/userns-Don-t-allow-setgroups-until-a-gid-mapping-has.patch deleted file mode 100644 index 81217d2..0000000 --- a/userns-Don-t-allow-setgroups-until-a-gid-mapping-has.patch +++ /dev/null @@ -1,98 +0,0 @@ -From: "Eric W. Biederman" -Date: Fri, 5 Dec 2014 18:01:11 -0600 -Subject: [PATCH] userns: Don't allow setgroups until a gid mapping has been - setablished - -setgroups is unique in not needing a valid mapping before it can be called, -in the case of setgroups(0, NULL) which drops all supplemental groups. - -The design of the user namespace assumes that CAP_SETGID can not actually -be used until a gid mapping is established. Therefore add a helper function -to see if the user namespace gid mapping has been established and call -that function in the setgroups permission check. - -This is part of the fix for CVE-2014-8989, being able to drop groups -without privilege using user namespaces. - -Cc: stable@vger.kernel.org -Reviewed-by: Andy Lutomirski -Signed-off-by: "Eric W. Biederman" ---- - include/linux/user_namespace.h | 5 +++++ - kernel/groups.c | 4 +++- - kernel/user_namespace.c | 14 ++++++++++++++ - 3 files changed, 22 insertions(+), 1 deletion(-) - -diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h -index e95372654f09..8d493083486a 100644 ---- a/include/linux/user_namespace.h -+++ b/include/linux/user_namespace.h -@@ -63,6 +63,7 @@ extern const struct seq_operations proc_projid_seq_operations; - extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); - extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); - extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); -+extern bool userns_may_setgroups(const struct user_namespace *ns); - #else - - static inline struct user_namespace *get_user_ns(struct user_namespace *ns) -@@ -87,6 +88,10 @@ static inline void put_user_ns(struct user_namespace *ns) - { - } - -+static inline bool userns_may_setgroups(const struct user_namespace *ns) -+{ -+ return true; -+} - #endif - - #endif /* _LINUX_USER_H */ -diff --git a/kernel/groups.c b/kernel/groups.c -index 02d8a251c476..664411f171b5 100644 ---- a/kernel/groups.c -+++ b/kernel/groups.c -@@ -6,6 +6,7 @@ - #include - #include - #include -+#include - #include - - /* init to 2 - one for init_task, one to ensure it is never freed */ -@@ -217,7 +218,8 @@ bool may_setgroups(void) - { - struct user_namespace *user_ns = current_user_ns(); - -- return ns_capable(user_ns, CAP_SETGID); -+ return ns_capable(user_ns, CAP_SETGID) && -+ userns_may_setgroups(user_ns); - } - - /* -diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c -index b99c862a2e3f..27c8dab48c07 100644 ---- a/kernel/user_namespace.c -+++ b/kernel/user_namespace.c -@@ -843,6 +843,20 @@ static bool new_idmap_permitted(const struct file *file, - return false; - } - -+bool userns_may_setgroups(const struct user_namespace *ns) -+{ -+ bool allowed; -+ -+ mutex_lock(&id_map_mutex); -+ /* It is not safe to use setgroups until a gid mapping in -+ * the user namespace has been established. -+ */ -+ allowed = ns->gid_map.nr_extents != 0; -+ mutex_unlock(&id_map_mutex); -+ -+ return allowed; -+} -+ - static void *userns_get(struct task_struct *task) - { - struct user_namespace *user_ns; --- -2.1.0 - diff --git a/userns-Don-t-allow-unprivileged-creation-of-gid-mapp.patch b/userns-Don-t-allow-unprivileged-creation-of-gid-mapp.patch deleted file mode 100644 index b1d5382..0000000 --- a/userns-Don-t-allow-unprivileged-creation-of-gid-mapp.patch +++ /dev/null @@ -1,46 +0,0 @@ -From: "Eric W. Biederman" -Date: Fri, 5 Dec 2014 18:14:19 -0600 -Subject: [PATCH] userns: Don't allow unprivileged creation of gid mappings - -As any gid mapping will allow and must allow for backwards -compatibility dropping groups don't allow any gid mappings to be -established without CAP_SETGID in the parent user namespace. - -For a small class of applications this change breaks userspace -and removes useful functionality. This small class of applications -includes tools/testing/selftests/mount/unprivilged-remount-test.c - -Most of the removed functionality will be added back with the addition -of a one way knob to disable setgroups. Once setgroups is disabled -setting the gid_map becomes as safe as setting the uid_map. - -For more common applications that set the uid_map and the gid_map -with privilege this change will have no affect. - -This is part of a fix for CVE-2014-8989. - -Cc: stable@vger.kernel.org -Reviewed-by: Andy Lutomirski -Signed-off-by: "Eric W. Biederman" ---- - kernel/user_namespace.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c -index 27c8dab48c07..1ce6d67c07b7 100644 ---- a/kernel/user_namespace.c -+++ b/kernel/user_namespace.c -@@ -821,10 +821,6 @@ static bool new_idmap_permitted(const struct file *file, - kuid_t uid = make_kuid(ns->parent, id); - if (uid_eq(uid, file->f_cred->fsuid)) - return true; -- } else if (cap_setid == CAP_SETGID) { -- kgid_t gid = make_kgid(ns->parent, id); -- if (gid_eq(gid, file->f_cred->fsgid)) -- return true; - } - } - --- -2.1.0 - diff --git a/userns-Only-allow-the-creator-of-the-userns-unprivil.patch b/userns-Only-allow-the-creator-of-the-userns-unprivil.patch deleted file mode 100644 index 8381b14..0000000 --- a/userns-Only-allow-the-creator-of-the-userns-unprivil.patch +++ /dev/null @@ -1,54 +0,0 @@ -From: "Eric W. Biederman" -Date: Wed, 26 Nov 2014 23:22:14 -0600 -Subject: [PATCH] userns: Only allow the creator of the userns unprivileged - mappings - -If you did not create the user namespace and are allowed -to write to uid_map or gid_map you should already have the necessary -privilege in the parent user namespace to establish any mapping -you want so this will not affect userspace in practice. - -Limiting unprivileged uid mapping establishment to the creator of the -user namespace makes it easier to verify all credentials obtained with -the uid mapping can be obtained without the uid mapping without -privilege. - -Limiting unprivileged gid mapping establishment (which is temporarily -absent) to the creator of the user namespace also ensures that the -combination of uid and gid can already be obtained without privilege. - -This is part of the fix for CVE-2014-8989. - -Cc: stable@vger.kernel.org -Reviewed-by: Andy Lutomirski -Signed-off-by: "Eric W. Biederman" ---- - kernel/user_namespace.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c -index 9451b12a9b6c..1e34de2fbd60 100644 ---- a/kernel/user_namespace.c -+++ b/kernel/user_namespace.c -@@ -812,14 +812,16 @@ static bool new_idmap_permitted(const struct file *file, - struct user_namespace *ns, int cap_setid, - struct uid_gid_map *new_map) - { -+ const struct cred *cred = file->f_cred; - /* Don't allow mappings that would allow anything that wouldn't - * be allowed without the establishment of unprivileged mappings. - */ -- if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { -+ if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) && -+ uid_eq(ns->owner, cred->euid)) { - u32 id = new_map->extent[0].lower_first; - if (cap_setid == CAP_SETUID) { - kuid_t uid = make_kuid(ns->parent, id); -- if (uid_eq(uid, file->f_cred->euid)) -+ if (uid_eq(uid, cred->euid)) - return true; - } - } --- -2.1.0 - diff --git a/userns-Rename-id_map_mutex-to-userns_state_mutex.patch b/userns-Rename-id_map_mutex-to-userns_state_mutex.patch deleted file mode 100644 index ce6288a..0000000 --- a/userns-Rename-id_map_mutex-to-userns_state_mutex.patch +++ /dev/null @@ -1,80 +0,0 @@ -From: "Eric W. Biederman" -Date: Tue, 9 Dec 2014 14:03:14 -0600 -Subject: [PATCH] userns: Rename id_map_mutex to userns_state_mutex - -Generalize id_map_mutex so it can be used for more state of a user namespace. - -Cc: stable@vger.kernel.org -Reviewed-by: Andy Lutomirski -Signed-off-by: "Eric W. Biederman" ---- - kernel/user_namespace.c | 14 ++++++-------- - 1 file changed, 6 insertions(+), 8 deletions(-) - -diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c -index 1e34de2fbd60..44a555ac6104 100644 ---- a/kernel/user_namespace.c -+++ b/kernel/user_namespace.c -@@ -24,6 +24,7 @@ - #include - - static struct kmem_cache *user_ns_cachep __read_mostly; -+static DEFINE_MUTEX(userns_state_mutex); - - static bool new_idmap_permitted(const struct file *file, - struct user_namespace *ns, int cap_setid, -@@ -583,9 +584,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map, - return false; - } - -- --static DEFINE_MUTEX(id_map_mutex); -- - static ssize_t map_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, - int cap_setid, -@@ -602,7 +600,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, - ssize_t ret = -EINVAL; - - /* -- * The id_map_mutex serializes all writes to any given map. -+ * The userns_state_mutex serializes all writes to any given map. - * - * Any map is only ever written once. - * -@@ -620,7 +618,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, - * order and smp_rmb() is guaranteed that we don't have crazy - * architectures returning stale data. - */ -- mutex_lock(&id_map_mutex); -+ mutex_lock(&userns_state_mutex); - - ret = -EPERM; - /* Only allow one successful write to the map */ -@@ -750,7 +748,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, - *ppos = count; - ret = count; - out: -- mutex_unlock(&id_map_mutex); -+ mutex_unlock(&userns_state_mutex); - if (page) - free_page(page); - return ret; -@@ -845,12 +843,12 @@ bool userns_may_setgroups(const struct user_namespace *ns) - { - bool allowed; - -- mutex_lock(&id_map_mutex); -+ mutex_lock(&userns_state_mutex); - /* It is not safe to use setgroups until a gid mapping in - * the user namespace has been established. - */ - allowed = ns->gid_map.nr_extents != 0; -- mutex_unlock(&id_map_mutex); -+ mutex_unlock(&userns_state_mutex); - - return allowed; - } --- -2.1.0 - diff --git a/userns-Unbreak-the-unprivileged-remount-tests.patch b/userns-Unbreak-the-unprivileged-remount-tests.patch deleted file mode 100644 index 69edd2e..0000000 --- a/userns-Unbreak-the-unprivileged-remount-tests.patch +++ /dev/null @@ -1,91 +0,0 @@ -From: "Eric W. Biederman" -Date: Tue, 2 Dec 2014 13:56:30 -0600 -Subject: [PATCH] userns: Unbreak the unprivileged remount tests - -A security fix in caused the way the unprivileged remount tests were -using user namespaces to break. Tweak the way user namespaces are -being used so the test works again. - -Cc: stable@vger.kernel.org -Signed-off-by: "Eric W. Biederman" ---- - .../selftests/mount/unprivileged-remount-test.c | 32 ++++++++++++++++------ - 1 file changed, 24 insertions(+), 8 deletions(-) - -diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c -index 9669d375625a..517785052f1c 100644 ---- a/tools/testing/selftests/mount/unprivileged-remount-test.c -+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c -@@ -53,17 +53,14 @@ static void die(char *fmt, ...) - exit(EXIT_FAILURE); - } - --static void write_file(char *filename, char *fmt, ...) -+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap) - { - char buf[4096]; - int fd; - ssize_t written; - int buf_len; -- va_list ap; - -- va_start(ap, fmt); - buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); -- va_end(ap); - if (buf_len < 0) { - die("vsnprintf failed: %s\n", - strerror(errno)); -@@ -74,6 +71,8 @@ static void write_file(char *filename, char *fmt, ...) - - fd = open(filename, O_WRONLY); - if (fd < 0) { -+ if ((errno == ENOENT) && enoent_ok) -+ return; - die("open of %s failed: %s\n", - filename, strerror(errno)); - } -@@ -92,6 +91,26 @@ static void write_file(char *filename, char *fmt, ...) - } - } - -+static void maybe_write_file(char *filename, char *fmt, ...) -+{ -+ va_list ap; -+ -+ va_start(ap, fmt); -+ vmaybe_write_file(true, filename, fmt, ap); -+ va_end(ap); -+ -+} -+ -+static void write_file(char *filename, char *fmt, ...) -+{ -+ va_list ap; -+ -+ va_start(ap, fmt); -+ vmaybe_write_file(false, filename, fmt, ap); -+ va_end(ap); -+ -+} -+ - static int read_mnt_flags(const char *path) - { - int ret; -@@ -144,13 +163,10 @@ static void create_and_enter_userns(void) - strerror(errno)); - } - -+ maybe_write_file("/proc/self/setgroups", "deny"); - write_file("/proc/self/uid_map", "0 %d 1", uid); - write_file("/proc/self/gid_map", "0 %d 1", gid); - -- if (setgroups(0, NULL) != 0) { -- die("setgroups failed: %s\n", -- strerror(errno)); -- } - if (setgid(0) != 0) { - die ("setgid(0) failed %s\n", - strerror(errno)); --- -2.1.0 - diff --git a/x86-kvm-Clear-paravirt_enabled-on-KVM-guests-for-esp.patch b/x86-kvm-Clear-paravirt_enabled-on-KVM-guests-for-esp.patch deleted file mode 100644 index ba6928d..0000000 --- a/x86-kvm-Clear-paravirt_enabled-on-KVM-guests-for-esp.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 0fdb006a5af7f391a6de4ce810aba4af46c427e4 Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski -Date: Fri, 5 Dec 2014 19:03:28 -0800 -Subject: [PATCH] x86, kvm: Clear paravirt_enabled on KVM guests for espfix32's - benefit - -paravirt_enabled has the following effects: - - - Disables the F00F bug workaround warning. There is no F00F bug - workaround any more because Linux's standard IDT handling already - works around the F00F bug, but the warning still exists. This - is only cosmetic, and, in any event, there is no such thing as - KVM on a CPU with the F00F bug. - - - Disables 32-bit APM BIOS detection. On a KVM paravirt system, - there should be no APM BIOS anyway. - - - Disables tboot. I think that the tboot code should check the - CPUID hypervisor bit directly if it matters. - - - paravirt_enabled disables espfix32. espfix32 should *not* be - disabled under KVM paravirt. - -The last point is the purpose of this patch. It fixes a leak of the -high 16 bits of the kernel stack address on 32-bit KVM paravirt -guests. - -While I'm at it, this removes pv_info setup from kvmclock. That -code seems to serve no purpose. - -Cc: stable@vger.kernel.org -Signed-off-by: Andy Lutomirski ---- - arch/x86/kernel/kvm.c | 9 ++++++++- - arch/x86/kernel/kvmclock.c | 2 -- - 2 files changed, 8 insertions(+), 3 deletions(-) - -diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c -index 3dd8e2c4d74a..07de51f66deb 100644 ---- a/arch/x86/kernel/kvm.c -+++ b/arch/x86/kernel/kvm.c -@@ -282,7 +282,14 @@ NOKPROBE_SYMBOL(do_async_page_fault); - static void __init paravirt_ops_setup(void) - { - pv_info.name = "KVM"; -- pv_info.paravirt_enabled = 1; -+ -+ /* -+ * KVM isn't paravirt in the sense of paravirt_enabled. A KVM -+ * guest kernel works like a bare metal kernel with additional -+ * features, and paravirt_enabled is about features that are -+ * missing. -+ */ -+ pv_info.paravirt_enabled = 0; - - if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) - pv_cpu_ops.io_delay = kvm_io_delay; -diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c -index d9156ceecdff..d4d9a8ad7893 100644 ---- a/arch/x86/kernel/kvmclock.c -+++ b/arch/x86/kernel/kvmclock.c -@@ -263,8 +263,6 @@ void __init kvmclock_init(void) - #endif - kvm_get_preset_lpj(); - clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); -- pv_info.paravirt_enabled = 1; -- pv_info.name = "KVM"; - - if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) - pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); --- -2.1.0 - diff --git a/x86-tls-Validate-TLS-entries-to-protect-espfix.patch b/x86-tls-Validate-TLS-entries-to-protect-espfix.patch deleted file mode 100644 index 52c0497..0000000 --- a/x86-tls-Validate-TLS-entries-to-protect-espfix.patch +++ /dev/null @@ -1,77 +0,0 @@ -From: Andy Lutomirski -Date: Thu, 4 Dec 2014 16:48:16 -0800 -Subject: [PATCH] x86/tls: Validate TLS entries to protect espfix - -Installing a 16-bit RW data segment into the GDT defeats espfix. -AFAICT this will not affect glibc, Wine, or dosemu at all. - -Signed-off-by: Andy Lutomirski -Acked-by: H. Peter Anvin -Cc: stable@vger.kernel.org -Cc: Konrad Rzeszutek Wilk -Cc: Linus Torvalds -Cc: security@kernel.org -Cc: Willy Tarreau -Signed-off-by: Ingo Molnar ---- - arch/x86/kernel/tls.c | 23 +++++++++++++++++++++++ - 1 file changed, 23 insertions(+) - -diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c -index f7fec09e3e3a..e7650bd71109 100644 ---- a/arch/x86/kernel/tls.c -+++ b/arch/x86/kernel/tls.c -@@ -27,6 +27,21 @@ static int get_free_idx(void) - return -ESRCH; - } - -+static bool tls_desc_okay(const struct user_desc *info) -+{ -+ if (LDT_empty(info)) -+ return true; -+ -+ /* -+ * espfix is required for 16-bit data segments, but espfix -+ * only works for LDT segments. -+ */ -+ if (!info->seg_32bit) -+ return false; -+ -+ return true; -+} -+ - static void set_tls_desc(struct task_struct *p, int idx, - const struct user_desc *info, int n) - { -@@ -66,6 +81,9 @@ int do_set_thread_area(struct task_struct *p, int idx, - if (copy_from_user(&info, u_info, sizeof(info))) - return -EFAULT; - -+ if (!tls_desc_okay(&info)) -+ return -EINVAL; -+ - if (idx == -1) - idx = info.entry_number; - -@@ -192,6 +210,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, - { - struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; - const struct user_desc *info; -+ int i; - - if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || - (pos % sizeof(struct user_desc)) != 0 || -@@ -205,6 +224,10 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, - else - info = infobuf; - -+ for (i = 0; i < count / sizeof(struct user_desc); i++) -+ if (!tls_desc_okay(info + i)) -+ return -EINVAL; -+ - set_tls_desc(target, - GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)), - info, count / sizeof(struct user_desc)); --- -2.1.0 - diff --git a/x86_64-switch_to-Load-TLS-descriptors-before-switchi.patch b/x86_64-switch_to-Load-TLS-descriptors-before-switchi.patch deleted file mode 100644 index 29e57b7..0000000 --- a/x86_64-switch_to-Load-TLS-descriptors-before-switchi.patch +++ /dev/null @@ -1,309 +0,0 @@ -From: Andy Lutomirski -Date: Mon, 8 Dec 2014 13:55:20 -0800 -Subject: [PATCH] x86_64, switch_to(): Load TLS descriptors before switching DS - and ES - -Otherwise, if buggy user code points DS or ES into the TLS -array, they would be corrupted after a context switch. - -This also significantly improves the comments and documents some -gotchas in the code. - -Before this patch, the both tests below failed. With this -patch, the es test passes, although the gsbase test still fails. - - ----- begin es test ----- - -/* - * Copyright (c) 2014 Andy Lutomirski - * GPL v2 - */ - -static unsigned short GDT3(int idx) -{ - return (idx << 3) | 3; -} - -static int create_tls(int idx, unsigned int base) -{ - struct user_desc desc = { - .entry_number = idx, - .base_addr = base, - .limit = 0xfffff, - .seg_32bit = 1, - .contents = 0, /* Data, grow-up */ - .read_exec_only = 0, - .limit_in_pages = 1, - .seg_not_present = 0, - .useable = 0, - }; - - if (syscall(SYS_set_thread_area, &desc) != 0) - err(1, "set_thread_area"); - - return desc.entry_number; -} - -int main() -{ - int idx = create_tls(-1, 0); - printf("Allocated GDT index %d\n", idx); - - unsigned short orig_es; - asm volatile ("mov %%es,%0" : "=rm" (orig_es)); - - int errors = 0; - int total = 1000; - for (int i = 0; i < total; i++) { - asm volatile ("mov %0,%%es" : : "rm" (GDT3(idx))); - usleep(100); - - unsigned short es; - asm volatile ("mov %%es,%0" : "=rm" (es)); - asm volatile ("mov %0,%%es" : : "rm" (orig_es)); - if (es != GDT3(idx)) { - if (errors == 0) - printf("[FAIL]\tES changed from 0x%hx to 0x%hx\n", - GDT3(idx), es); - errors++; - } - } - - if (errors) { - printf("[FAIL]\tES was corrupted %d/%d times\n", errors, total); - return 1; - } else { - printf("[OK]\tES was preserved\n"); - return 0; - } -} - - ----- end es test ----- - - ----- begin gsbase test ----- - -/* - * gsbase.c, a gsbase test - * Copyright (c) 2014 Andy Lutomirski - * GPL v2 - */ - -static unsigned char *testptr, *testptr2; - -static unsigned char read_gs_testvals(void) -{ - unsigned char ret; - asm volatile ("movb %%gs:%1, %0" : "=r" (ret) : "m" (*testptr)); - return ret; -} - -int main() -{ - int errors = 0; - - testptr = mmap((void *)0x200000000UL, 1, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0); - if (testptr == MAP_FAILED) - err(1, "mmap"); - - testptr2 = mmap((void *)0x300000000UL, 1, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0); - if (testptr2 == MAP_FAILED) - err(1, "mmap"); - - *testptr = 0; - *testptr2 = 1; - - if (syscall(SYS_arch_prctl, ARCH_SET_GS, - (unsigned long)testptr2 - (unsigned long)testptr) != 0) - err(1, "ARCH_SET_GS"); - - usleep(100); - - if (read_gs_testvals() == 1) { - printf("[OK]\tARCH_SET_GS worked\n"); - } else { - printf("[FAIL]\tARCH_SET_GS failed\n"); - errors++; - } - - asm volatile ("mov %0,%%gs" : : "r" (0)); - - if (read_gs_testvals() == 0) { - printf("[OK]\tWriting 0 to gs worked\n"); - } else { - printf("[FAIL]\tWriting 0 to gs failed\n"); - errors++; - } - - usleep(100); - - if (read_gs_testvals() == 0) { - printf("[OK]\tgsbase is still zero\n"); - } else { - printf("[FAIL]\tgsbase was corrupted\n"); - errors++; - } - - return errors == 0 ? 0 : 1; -} - - ----- end gsbase test ----- - -Signed-off-by: Andy Lutomirski -Cc: -Cc: Andi Kleen -Cc: Linus Torvalds -Link: http://lkml.kernel.org/r/509d27c9fec78217691c3dad91cec87e1006b34a.1418075657.git.luto@amacapital.net -Signed-off-by: Ingo Molnar ---- - arch/x86/kernel/process_64.c | 101 +++++++++++++++++++++++++++++++------------ - 1 file changed, 73 insertions(+), 28 deletions(-) - -diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c -index ca5b02d405c3..166119618afb 100644 ---- a/arch/x86/kernel/process_64.c -+++ b/arch/x86/kernel/process_64.c -@@ -286,24 +286,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) - - fpu = switch_fpu_prepare(prev_p, next_p, cpu); - -- /* -- * Reload esp0, LDT and the page table pointer: -- */ -+ /* Reload esp0 and ss1. */ - load_sp0(tss, next); - -- /* -- * Switch DS and ES. -- * This won't pick up thread selector changes, but I guess that is ok. -- */ -- savesegment(es, prev->es); -- if (unlikely(next->es | prev->es)) -- loadsegment(es, next->es); -- -- savesegment(ds, prev->ds); -- if (unlikely(next->ds | prev->ds)) -- loadsegment(ds, next->ds); -- -- - /* We must save %fs and %gs before load_TLS() because - * %fs and %gs may be cleared by load_TLS(). - * -@@ -312,41 +297,101 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) - savesegment(fs, fsindex); - savesegment(gs, gsindex); - -+ /* -+ * Load TLS before restoring any segments so that segment loads -+ * reference the correct GDT entries. -+ */ - load_TLS(next, cpu); - - /* -- * Leave lazy mode, flushing any hypercalls made here. -- * This must be done before restoring TLS segments so -- * the GDT and LDT are properly updated, and must be -- * done before math_state_restore, so the TS bit is up -- * to date. -+ * Leave lazy mode, flushing any hypercalls made here. This -+ * must be done after loading TLS entries in the GDT but before -+ * loading segments that might reference them, and and it must -+ * be done before math_state_restore, so the TS bit is up to -+ * date. - */ - arch_end_context_switch(next_p); - -+ /* Switch DS and ES. -+ * -+ * Reading them only returns the selectors, but writing them (if -+ * nonzero) loads the full descriptor from the GDT or LDT. The -+ * LDT for next is loaded in switch_mm, and the GDT is loaded -+ * above. -+ * -+ * We therefore need to write new values to the segment -+ * registers on every context switch unless both the new and old -+ * values are zero. -+ * -+ * Note that we don't need to do anything for CS and SS, as -+ * those are saved and restored as part of pt_regs. -+ */ -+ savesegment(es, prev->es); -+ if (unlikely(next->es | prev->es)) -+ loadsegment(es, next->es); -+ -+ savesegment(ds, prev->ds); -+ if (unlikely(next->ds | prev->ds)) -+ loadsegment(ds, next->ds); -+ - /* - * Switch FS and GS. - * -- * Segment register != 0 always requires a reload. Also -- * reload when it has changed. When prev process used 64bit -- * base always reload to avoid an information leak. -+ * These are even more complicated than FS and GS: they have -+ * 64-bit bases are that controlled by arch_prctl. Those bases -+ * only differ from the values in the GDT or LDT if the selector -+ * is 0. -+ * -+ * Loading the segment register resets the hidden base part of -+ * the register to 0 or the value from the GDT / LDT. If the -+ * next base address zero, writing 0 to the segment register is -+ * much faster than using wrmsr to explicitly zero the base. -+ * -+ * The thread_struct.fs and thread_struct.gs values are 0 -+ * if the fs and gs bases respectively are not overridden -+ * from the values implied by fsindex and gsindex. They -+ * are nonzero, and store the nonzero base addresses, if -+ * the bases are overridden. -+ * -+ * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should -+ * be impossible. -+ * -+ * Therefore we need to reload the segment registers if either -+ * the old or new selector is nonzero, and we need to override -+ * the base address if next thread expects it to be overridden. -+ * -+ * This code is unnecessarily slow in the case where the old and -+ * new indexes are zero and the new base is nonzero -- it will -+ * unnecessarily write 0 to the selector before writing the new -+ * base address. -+ * -+ * Note: This all depends on arch_prctl being the only way that -+ * user code can override the segment base. Once wrfsbase and -+ * wrgsbase are enabled, most of this code will need to change. - */ - if (unlikely(fsindex | next->fsindex | prev->fs)) { - loadsegment(fs, next->fsindex); -+ - /* -- * Check if the user used a selector != 0; if yes -- * clear 64bit base, since overloaded base is always -- * mapped to the Null selector -+ * If user code wrote a nonzero value to FS, then it also -+ * cleared the overridden base address. -+ * -+ * XXX: if user code wrote 0 to FS and cleared the base -+ * address itself, we won't notice and we'll incorrectly -+ * restore the prior base address next time we reschdule -+ * the process. - */ - if (fsindex) - prev->fs = 0; - } -- /* when next process has a 64bit base use it */ - if (next->fs) - wrmsrl(MSR_FS_BASE, next->fs); - prev->fsindex = fsindex; - - if (unlikely(gsindex | next->gsindex | prev->gs)) { - load_gs_index(next->gsindex); -+ -+ /* This works (and fails) the same way as fsindex above. */ - if (gsindex) - prev->gs = 0; - } --- -2.1.0 -