From e4553886cba535a80263dc114fc3374c3338d07e Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: May 09 2018 20:04:27 +0000 Subject: ARM and Raspberry Pi fixes --- diff --git a/arm64-fix-usercopy-whitelist.patch b/arm64-fix-usercopy-whitelist.patch new file mode 100644 index 0000000..cf66dd1 --- /dev/null +++ b/arm64-fix-usercopy-whitelist.patch @@ -0,0 +1,857 @@ +From patchwork Wed Mar 28 09:50:48 2018 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [v2,1/2] arm64: fpsimd: Split cpu field out from struct fpsimd_state +From: Dave P Martin +X-Patchwork-Id: 10312693 +Message-Id: <1522230649-22008-2-git-send-email-Dave.Martin@arm.com> +To: linux-arm-kernel@lists.infradead.org +Cc: Mark Rutland , Will Deacon , + Kees Cook +Date: Wed, 28 Mar 2018 10:50:48 +0100 + +In preparation for using a common representation of the FPSIMD +state for tasks and KVM vcpus, this patch separates out the "cpu" +field that is used to track the cpu on which the state was most +recently loaded. + +This will allow common code to operate on task and vcpu contexts +without requiring the cpu field to be stored at the same offset +from the FPSIMD register data in both cases. This should avoid the +need for messing with the definition of those parts of struct +vcpu_arch that are exposed in the KVM user ABI. + +The resulting change is also convenient for grouping and defining +the set of thread_struct fields that are supposed to be accessible +to copy_{to,from}_user(), which includes user_fpsimd_state but +should exclude the cpu field. This patch does not amend the +usercopy whitelist to match: that will be addressed in a subsequent +patch. + +Signed-off-by: Dave Martin +--- + arch/arm64/include/asm/fpsimd.h | 29 ++------------------------ + arch/arm64/include/asm/processor.h | 4 ++-- + arch/arm64/kernel/fpsimd.c | 42 +++++++++++++++++++++----------------- + arch/arm64/kernel/ptrace.c | 10 ++++----- + arch/arm64/kernel/signal.c | 3 +-- + arch/arm64/kernel/signal32.c | 3 +-- + 6 files changed, 34 insertions(+), 57 deletions(-) + +diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h +index 8857a0f..1bfc920 100644 +--- a/arch/arm64/include/asm/fpsimd.h ++++ b/arch/arm64/include/asm/fpsimd.h +@@ -24,31 +24,6 @@ + #include + #include + +-/* +- * FP/SIMD storage area has: +- * - FPSR and FPCR +- * - 32 128-bit data registers +- * +- * Note that user_fpsimd forms a prefix of this structure, which is +- * relied upon in the ptrace FP/SIMD accessors. +- */ +-struct fpsimd_state { +- union { +- struct user_fpsimd_state user_fpsimd; +- struct { +- __uint128_t vregs[32]; +- u32 fpsr; +- u32 fpcr; +- /* +- * For ptrace compatibility, pad to next 128-bit +- * boundary here if extending this struct. +- */ +- }; +- }; +- /* the id of the last cpu to have restored this state */ +- unsigned int cpu; +-}; +- + #if defined(__KERNEL__) && defined(CONFIG_COMPAT) + /* Masks for extracting the FPSR and FPCR from the FPSCR */ + #define VFP_FPSCR_STAT_MASK 0xf800009f +@@ -62,8 +37,8 @@ struct fpsimd_state { + + struct task_struct; + +-extern void fpsimd_save_state(struct fpsimd_state *state); +-extern void fpsimd_load_state(struct fpsimd_state *state); ++extern void fpsimd_save_state(struct user_fpsimd_state *state); ++extern void fpsimd_load_state(struct user_fpsimd_state *state); + + extern void fpsimd_thread_switch(struct task_struct *next); + extern void fpsimd_flush_thread(void); +diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h +index fce604e..4a04535 100644 +--- a/arch/arm64/include/asm/processor.h ++++ b/arch/arm64/include/asm/processor.h +@@ -37,7 +37,6 @@ + #include + + #include +-#include + #include + #include + #include +@@ -107,7 +106,8 @@ struct thread_struct { + #ifdef CONFIG_COMPAT + unsigned long tp2_value; + #endif +- struct fpsimd_state fpsimd_state; ++ struct user_fpsimd_state fpsimd_state; ++ unsigned int fpsimd_cpu; + void *sve_state; /* SVE registers, if any */ + unsigned int sve_vl; /* SVE vector length */ + unsigned int sve_vl_onexec; /* SVE vl after next exec */ +diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c +index e7226c4..c4be311 100644 +--- a/arch/arm64/kernel/fpsimd.c ++++ b/arch/arm64/kernel/fpsimd.c +@@ -64,7 +64,7 @@ + * been loaded into its FPSIMD registers most recently, or whether it has + * been used to perform kernel mode NEON in the meantime. + * +- * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to ++ * For (a), we add a fpsimd_cpu field to thread_struct, which gets updated to + * the id of the current CPU every time the state is loaded onto a CPU. For (b), + * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the + * address of the userland FPSIMD state of the task that was loaded onto the CPU +@@ -73,7 +73,7 @@ + * With this in place, we no longer have to restore the next FPSIMD state right + * when switching between tasks. Instead, we can defer this check to userland + * resume, at which time we verify whether the CPU's fpsimd_last_state and the +- * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we ++ * task's fpsimd_cpu are still mutually in sync. If this is the case, we + * can omit the FPSIMD restore. + * + * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to +@@ -90,14 +90,14 @@ + * flag with local_bh_disable() unless softirqs are already masked. + * + * For a certain task, the sequence may look something like this: +- * - the task gets scheduled in; if both the task's fpsimd_state.cpu field ++ * - the task gets scheduled in; if both the task's fpsimd_cpu field + * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu + * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is + * cleared, otherwise it is set; + * + * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's + * userland FPSIMD state is copied from memory to the registers, the task's +- * fpsimd_state.cpu field is set to the id of the current CPU, the current ++ * fpsimd_cpu field is set to the id of the current CPU, the current + * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the + * TIF_FOREIGN_FPSTATE flag is cleared; + * +@@ -115,7 +115,7 @@ + * whatever is in the FPSIMD registers is not saved to memory, but discarded. + */ + struct fpsimd_last_state_struct { +- struct fpsimd_state *st; ++ struct user_fpsimd_state *st; + bool sve_in_use; + }; + +@@ -417,7 +417,7 @@ static void fpsimd_to_sve(struct task_struct *task) + { + unsigned int vq; + void *sst = task->thread.sve_state; +- struct fpsimd_state const *fst = &task->thread.fpsimd_state; ++ struct user_fpsimd_state const *fst = &task->thread.fpsimd_state; + unsigned int i; + + if (!system_supports_sve()) +@@ -443,7 +443,7 @@ static void sve_to_fpsimd(struct task_struct *task) + { + unsigned int vq; + void const *sst = task->thread.sve_state; +- struct fpsimd_state *fst = &task->thread.fpsimd_state; ++ struct user_fpsimd_state *fst = &task->thread.fpsimd_state; + unsigned int i; + + if (!system_supports_sve()) +@@ -539,7 +539,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) + { + unsigned int vq; + void *sst = task->thread.sve_state; +- struct fpsimd_state const *fst = &task->thread.fpsimd_state; ++ struct user_fpsimd_state const *fst = &task->thread.fpsimd_state; + unsigned int i; + + if (!test_tsk_thread_flag(task, TIF_SVE)) +@@ -908,10 +908,9 @@ void fpsimd_thread_switch(struct task_struct *next) + * the TIF_FOREIGN_FPSTATE flag so the state will be loaded + * upon the next return to userland. + */ +- struct fpsimd_state *st = &next->thread.fpsimd_state; +- +- if (__this_cpu_read(fpsimd_last_state.st) == st +- && st->cpu == smp_processor_id()) ++ if (__this_cpu_read(fpsimd_last_state.st) == ++ &next->thread.fpsimd_state ++ && next->thread.fpsimd_cpu == smp_processor_id()) + clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); + else + set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); +@@ -927,7 +926,8 @@ void fpsimd_flush_thread(void) + + local_bh_disable(); + +- memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); ++ memset(¤t->thread.fpsimd_state, 0, ++ sizeof current->thread.fpsimd_state); + fpsimd_flush_task_state(current); + + if (system_supports_sve()) { +@@ -1004,11 +1004,10 @@ static void fpsimd_bind_to_cpu(void) + { + struct fpsimd_last_state_struct *last = + this_cpu_ptr(&fpsimd_last_state); +- struct fpsimd_state *st = ¤t->thread.fpsimd_state; + +- last->st = st; ++ last->st = ¤t->thread.fpsimd_state; + last->sve_in_use = test_thread_flag(TIF_SVE); +- st->cpu = smp_processor_id(); ++ current->thread.fpsimd_cpu = smp_processor_id(); + } + + /* +@@ -1043,7 +1042,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) + + local_bh_disable(); + +- current->thread.fpsimd_state.user_fpsimd = *state; ++ current->thread.fpsimd_state = *state; + if (system_supports_sve() && test_thread_flag(TIF_SVE)) + fpsimd_to_sve(current); + +@@ -1055,12 +1054,17 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) + local_bh_enable(); + } + ++void fpsimd_flush_state(unsigned int *cpu) ++{ ++ *cpu = NR_CPUS; ++} ++ + /* + * Invalidate live CPU copies of task t's FPSIMD state + */ + void fpsimd_flush_task_state(struct task_struct *t) + { +- t->thread.fpsimd_state.cpu = NR_CPUS; ++ fpsimd_flush_state(&t->thread.fpsimd_cpu); + } + + static inline void fpsimd_flush_cpu_state(void) +@@ -1159,7 +1163,7 @@ EXPORT_SYMBOL(kernel_neon_end); + + #ifdef CONFIG_EFI + +-static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state); ++static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state); + static DEFINE_PER_CPU(bool, efi_fpsimd_state_used); + static DEFINE_PER_CPU(bool, efi_sve_state_used); + +diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c +index 9ae31f7..fdeaba0de 100644 +--- a/arch/arm64/kernel/ptrace.c ++++ b/arch/arm64/kernel/ptrace.c +@@ -629,7 +629,7 @@ static int __fpr_get(struct task_struct *target, + + sve_sync_to_fpsimd(target); + +- uregs = &target->thread.fpsimd_state.user_fpsimd; ++ uregs = &target->thread.fpsimd_state; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, + start_pos, start_pos + sizeof(*uregs)); +@@ -660,14 +660,14 @@ static int __fpr_set(struct task_struct *target, + */ + sve_sync_to_fpsimd(target); + +- newstate = target->thread.fpsimd_state.user_fpsimd; ++ newstate = target->thread.fpsimd_state; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, + start_pos, start_pos + sizeof(newstate)); + if (ret) + return ret; + +- target->thread.fpsimd_state.user_fpsimd = newstate; ++ target->thread.fpsimd_state = newstate; + + return ret; + } +@@ -1169,7 +1169,7 @@ static int compat_vfp_get(struct task_struct *target, + compat_ulong_t fpscr; + int ret, vregs_end_pos; + +- uregs = &target->thread.fpsimd_state.user_fpsimd; ++ uregs = &target->thread.fpsimd_state; + + if (target == current) + fpsimd_preserve_current_state(); +@@ -1202,7 +1202,7 @@ static int compat_vfp_set(struct task_struct *target, + compat_ulong_t fpscr; + int ret, vregs_end_pos; + +- uregs = &target->thread.fpsimd_state.user_fpsimd; ++ uregs = &target->thread.fpsimd_state; + + vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0, +diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c +index f60c052..d026615 100644 +--- a/arch/arm64/kernel/signal.c ++++ b/arch/arm64/kernel/signal.c +@@ -178,8 +178,7 @@ static void __user *apply_user_offset( + + static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) + { +- struct user_fpsimd_state const *fpsimd = +- ¤t->thread.fpsimd_state.user_fpsimd; ++ struct user_fpsimd_state const *fpsimd = ¤t->thread.fpsimd_state; + int err; + + /* copy the FP and status/control registers */ +diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c +index 79feb86..4ea38d3 100644 +--- a/arch/arm64/kernel/signal32.c ++++ b/arch/arm64/kernel/signal32.c +@@ -148,8 +148,7 @@ union __fpsimd_vreg { + + static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) + { +- struct user_fpsimd_state const *fpsimd = +- ¤t->thread.fpsimd_state.user_fpsimd; ++ struct user_fpsimd_state const *fpsimd = ¤t->thread.fpsimd_state; + compat_ulong_t magic = VFP_MAGIC; + compat_ulong_t size = VFP_STORAGE_SIZE; + compat_ulong_t fpscr, fpexc; +From patchwork Wed Mar 28 09:50:49 2018 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [v2,2/2] arm64: uaccess: Fix omissions from usercopy whitelist +From: Dave P Martin +X-Patchwork-Id: 10312691 +Message-Id: <1522230649-22008-3-git-send-email-Dave.Martin@arm.com> +To: linux-arm-kernel@lists.infradead.org +Cc: Mark Rutland , Will Deacon , + Kees Cook +Date: Wed, 28 Mar 2018 10:50:49 +0100 + +When the hardend usercopy support was added for arm64, it was +concluded that all cases of usercopy into and out of thread_struct +were statically sized and so didn't require explicit whitelisting +of the appropriate fields in thread_struct. + +Testing with usercopy hardening enabled has revealed that this is +not the case for certain ptrace regset manipulation calls on arm64. +This occurs because the sizes of usercopies associated with the +regset API are dynamic by construction, and because arm64 does not +always stage such copies via the stack: indeed the regset API is +designed to avoid the need for that by adding some bounds checking. + +This is currently believed to affect only the fpsimd and TLS +registers. + +Because the whitelisted fields in thread_struct must be contiguous, +this patch groups them together in a nested struct. It is also +necessary to be able to determine the location and size of that +struct, so rather than making the struct anonymous (which would +save on edits elsewhere) or adding an anonymous union containing +named and unnamed instances of the same struct (gross), this patch +gives the struct a name and makes the necessary edits to code that +references it (noisy but simple). + +Care is needed to ensure that the new struct does not contain +padding (which the usercopy hardening would fail to protect). + +For this reason, the presence of tp2_value is made unconditional, +since a padding field would be needed there in any case. This pads +up to the 16-byte alignment required by struct user_fpsimd_state. + +Reported-by: Mark Rutland +Fixes: 9e8084d3f761 ("arm64: Implement thread_struct whitelist for hardened usercopy") +Signed-off-by: Dave Martin +Acked-by: Kees Cook +--- + +Changes since v1: + + * Add a BUILD_BUG_ON() check for padding in the whitelist struct. + * Move to using sizeof_field() for assigning *size; get rid of the + dummy pointer that was used previously. + * Delete bogus comment about why no whitelist is (was) needed. +--- + arch/arm64/include/asm/processor.h | 38 +++++++++++++++++++----------- + arch/arm64/kernel/fpsimd.c | 47 +++++++++++++++++++------------------- + arch/arm64/kernel/process.c | 6 ++--- + arch/arm64/kernel/ptrace.c | 30 ++++++++++++------------ + arch/arm64/kernel/signal.c | 3 ++- + arch/arm64/kernel/signal32.c | 3 ++- + arch/arm64/kernel/sys_compat.c | 2 +- + 7 files changed, 72 insertions(+), 57 deletions(-) + +diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h +index 4a04535..224af48 100644 +--- a/arch/arm64/include/asm/processor.h ++++ b/arch/arm64/include/asm/processor.h +@@ -34,6 +34,8 @@ + + #ifdef __KERNEL__ + ++#include ++#include + #include + + #include +@@ -102,11 +104,18 @@ struct cpu_context { + + struct thread_struct { + struct cpu_context cpu_context; /* cpu context */ +- unsigned long tp_value; /* TLS register */ +-#ifdef CONFIG_COMPAT +- unsigned long tp2_value; +-#endif +- struct user_fpsimd_state fpsimd_state; ++ ++ /* ++ * Whitelisted fields for hardened usercopy: ++ * Maintainers must ensure manually that this contains no ++ * implicit padding. ++ */ ++ struct { ++ unsigned long tp_value; /* TLS register */ ++ unsigned long tp2_value; ++ struct user_fpsimd_state fpsimd_state; ++ } uw; ++ + unsigned int fpsimd_cpu; + void *sve_state; /* SVE registers, if any */ + unsigned int sve_vl; /* SVE vector length */ +@@ -116,14 +125,17 @@ struct thread_struct { + struct debug_info debug; /* debugging */ + }; + +-/* +- * Everything usercopied to/from thread_struct is statically-sized, so +- * no hardened usercopy whitelist is needed. +- */ + static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) + { +- *offset = *size = 0; ++ /* Verify that there is no padding among the whitelisted fields: */ ++ BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) != ++ sizeof_field(struct thread_struct, uw.tp_value) + ++ sizeof_field(struct thread_struct, uw.tp2_value) + ++ sizeof_field(struct thread_struct, uw.fpsimd_state)); ++ ++ *offset = offsetof(struct thread_struct, uw); ++ *size = sizeof_field(struct thread_struct, uw); + } + + #ifdef CONFIG_COMPAT +@@ -131,13 +143,13 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, + ({ \ + unsigned long *__tls; \ + if (is_compat_thread(task_thread_info(t))) \ +- __tls = &(t)->thread.tp2_value; \ ++ __tls = &(t)->thread.uw.tp2_value; \ + else \ +- __tls = &(t)->thread.tp_value; \ ++ __tls = &(t)->thread.uw.tp_value; \ + __tls; \ + }) + #else +-#define task_user_tls(t) (&(t)->thread.tp_value) ++#define task_user_tls(t) (&(t)->thread.uw.tp_value) + #endif + + /* Sync TPIDR_EL0 back to thread_struct for current */ +diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c +index c4be311..7a8ac960b6 100644 +--- a/arch/arm64/kernel/fpsimd.c ++++ b/arch/arm64/kernel/fpsimd.c +@@ -222,7 +222,7 @@ static void sve_user_enable(void) + * sets TIF_SVE. + * + * When stored, FPSIMD registers V0-V31 are encoded in +- * task->fpsimd_state; bits [max : 128] for each of Z0-Z31 are ++ * task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are + * logically zero but not stored anywhere; P0-P15 and FFR are not + * stored and have unspecified values from userspace's point of + * view. For hygiene purposes, the kernel zeroes them on next use, +@@ -231,9 +231,9 @@ static void sve_user_enable(void) + * task->thread.sve_state does not need to be non-NULL, valid or any + * particular size: it must not be dereferenced. + * +- * * FPSR and FPCR are always stored in task->fpsimd_state irrespctive of +- * whether TIF_SVE is clear or set, since these are not vector length +- * dependent. ++ * * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state ++ * irrespective of whether TIF_SVE is clear or set, since these are ++ * not vector length dependent. + */ + + /* +@@ -251,10 +251,10 @@ static void task_fpsimd_load(void) + + if (system_supports_sve() && test_thread_flag(TIF_SVE)) + sve_load_state(sve_pffr(current), +- ¤t->thread.fpsimd_state.fpsr, ++ ¤t->thread.uw.fpsimd_state.fpsr, + sve_vq_from_vl(current->thread.sve_vl) - 1); + else +- fpsimd_load_state(¤t->thread.fpsimd_state); ++ fpsimd_load_state(¤t->thread.uw.fpsimd_state); + + if (system_supports_sve()) { + /* Toggle SVE trapping for userspace if needed */ +@@ -291,9 +291,9 @@ static void task_fpsimd_save(void) + } + + sve_save_state(sve_pffr(current), +- ¤t->thread.fpsimd_state.fpsr); ++ ¤t->thread.uw.fpsimd_state.fpsr); + } else +- fpsimd_save_state(¤t->thread.fpsimd_state); ++ fpsimd_save_state(¤t->thread.uw.fpsimd_state); + } + } + +@@ -404,20 +404,21 @@ static int __init sve_sysctl_init(void) { return 0; } + (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) + + /* +- * Transfer the FPSIMD state in task->thread.fpsimd_state to ++ * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to + * task->thread.sve_state. + * + * Task can be a non-runnable task, or current. In the latter case, + * softirqs (and preemption) must be disabled. + * task->thread.sve_state must point to at least sve_state_size(task) + * bytes of allocated kernel memory. +- * task->thread.fpsimd_state must be up to date before calling this function. ++ * task->thread.uw.fpsimd_state must be up to date before calling this ++ * function. + */ + static void fpsimd_to_sve(struct task_struct *task) + { + unsigned int vq; + void *sst = task->thread.sve_state; +- struct user_fpsimd_state const *fst = &task->thread.fpsimd_state; ++ struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; + unsigned int i; + + if (!system_supports_sve()) +@@ -431,7 +432,7 @@ static void fpsimd_to_sve(struct task_struct *task) + + /* + * Transfer the SVE state in task->thread.sve_state to +- * task->thread.fpsimd_state. ++ * task->thread.uw.fpsimd_state. + * + * Task can be a non-runnable task, or current. In the latter case, + * softirqs (and preemption) must be disabled. +@@ -443,7 +444,7 @@ static void sve_to_fpsimd(struct task_struct *task) + { + unsigned int vq; + void const *sst = task->thread.sve_state; +- struct user_fpsimd_state *fst = &task->thread.fpsimd_state; ++ struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; + unsigned int i; + + if (!system_supports_sve()) +@@ -510,7 +511,7 @@ void fpsimd_sync_to_sve(struct task_struct *task) + } + + /* +- * Ensure that task->thread.fpsimd_state is up to date with respect to ++ * Ensure that task->thread.uw.fpsimd_state is up to date with respect to + * the user task, irrespective of whether SVE is in use or not. + * + * This should only be called by ptrace. task must be non-runnable. +@@ -525,21 +526,21 @@ void sve_sync_to_fpsimd(struct task_struct *task) + + /* + * Ensure that task->thread.sve_state is up to date with respect to +- * the task->thread.fpsimd_state. ++ * the task->thread.uw.fpsimd_state. + * + * This should only be called by ptrace to merge new FPSIMD register + * values into a task for which SVE is currently active. + * task must be non-runnable. + * task->thread.sve_state must point to at least sve_state_size(task) + * bytes of allocated kernel memory. +- * task->thread.fpsimd_state must already have been initialised with ++ * task->thread.uw.fpsimd_state must already have been initialised with + * the new FPSIMD register values to be merged in. + */ + void sve_sync_from_fpsimd_zeropad(struct task_struct *task) + { + unsigned int vq; + void *sst = task->thread.sve_state; +- struct user_fpsimd_state const *fst = &task->thread.fpsimd_state; ++ struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; + unsigned int i; + + if (!test_tsk_thread_flag(task, TIF_SVE)) +@@ -909,7 +910,7 @@ void fpsimd_thread_switch(struct task_struct *next) + * upon the next return to userland. + */ + if (__this_cpu_read(fpsimd_last_state.st) == +- &next->thread.fpsimd_state ++ &next->thread.uw.fpsimd_state + && next->thread.fpsimd_cpu == smp_processor_id()) + clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); + else +@@ -926,8 +927,8 @@ void fpsimd_flush_thread(void) + + local_bh_disable(); + +- memset(¤t->thread.fpsimd_state, 0, +- sizeof current->thread.fpsimd_state); ++ memset(¤t->thread.uw.fpsimd_state, 0, ++ sizeof current->thread.uw.fpsimd_state); + fpsimd_flush_task_state(current); + + if (system_supports_sve()) { +@@ -986,7 +987,7 @@ void fpsimd_preserve_current_state(void) + + /* + * Like fpsimd_preserve_current_state(), but ensure that +- * current->thread.fpsimd_state is updated so that it can be copied to ++ * current->thread.uw.fpsimd_state is updated so that it can be copied to + * the signal frame. + */ + void fpsimd_signal_preserve_current_state(void) +@@ -1005,7 +1006,7 @@ static void fpsimd_bind_to_cpu(void) + struct fpsimd_last_state_struct *last = + this_cpu_ptr(&fpsimd_last_state); + +- last->st = ¤t->thread.fpsimd_state; ++ last->st = ¤t->thread.uw.fpsimd_state; + last->sve_in_use = test_thread_flag(TIF_SVE); + current->thread.fpsimd_cpu = smp_processor_id(); + } +@@ -1042,7 +1043,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) + + local_bh_disable(); + +- current->thread.fpsimd_state = *state; ++ current->thread.uw.fpsimd_state = *state; + if (system_supports_sve() && test_thread_flag(TIF_SVE)) + fpsimd_to_sve(current); + +diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c +index c0da6ef..f08a2ed 100644 +--- a/arch/arm64/kernel/process.c ++++ b/arch/arm64/kernel/process.c +@@ -257,7 +257,7 @@ static void tls_thread_flush(void) + write_sysreg(0, tpidr_el0); + + if (is_compat_task()) { +- current->thread.tp_value = 0; ++ current->thread.uw.tp_value = 0; + + /* + * We need to ensure ordering between the shadow state and the +@@ -351,7 +351,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, + * for the new thread. + */ + if (clone_flags & CLONE_SETTLS) +- p->thread.tp_value = childregs->regs[3]; ++ p->thread.uw.tp_value = childregs->regs[3]; + } else { + memset(childregs, 0, sizeof(struct pt_regs)); + childregs->pstate = PSR_MODE_EL1h; +@@ -379,7 +379,7 @@ static void tls_thread_switch(struct task_struct *next) + tls_preserve_current_state(); + + if (is_compat_thread(task_thread_info(next))) +- write_sysreg(next->thread.tp_value, tpidrro_el0); ++ write_sysreg(next->thread.uw.tp_value, tpidrro_el0); + else if (!arm64_kernel_unmapped_at_el0()) + write_sysreg(0, tpidrro_el0); + +diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c +index fdeaba0de..436a132 100644 +--- a/arch/arm64/kernel/ptrace.c ++++ b/arch/arm64/kernel/ptrace.c +@@ -629,7 +629,7 @@ static int __fpr_get(struct task_struct *target, + + sve_sync_to_fpsimd(target); + +- uregs = &target->thread.fpsimd_state; ++ uregs = &target->thread.uw.fpsimd_state; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, + start_pos, start_pos + sizeof(*uregs)); +@@ -655,19 +655,19 @@ static int __fpr_set(struct task_struct *target, + struct user_fpsimd_state newstate; + + /* +- * Ensure target->thread.fpsimd_state is up to date, so that a ++ * Ensure target->thread.uw.fpsimd_state is up to date, so that a + * short copyin can't resurrect stale data. + */ + sve_sync_to_fpsimd(target); + +- newstate = target->thread.fpsimd_state; ++ newstate = target->thread.uw.fpsimd_state; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, + start_pos, start_pos + sizeof(newstate)); + if (ret) + return ret; + +- target->thread.fpsimd_state = newstate; ++ target->thread.uw.fpsimd_state = newstate; + + return ret; + } +@@ -692,7 +692,7 @@ static int tls_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) + { +- unsigned long *tls = &target->thread.tp_value; ++ unsigned long *tls = &target->thread.uw.tp_value; + + if (target == current) + tls_preserve_current_state(); +@@ -705,13 +705,13 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, + const void *kbuf, const void __user *ubuf) + { + int ret; +- unsigned long tls = target->thread.tp_value; ++ unsigned long tls = target->thread.uw.tp_value; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); + if (ret) + return ret; + +- target->thread.tp_value = tls; ++ target->thread.uw.tp_value = tls; + return ret; + } + +@@ -842,7 +842,7 @@ static int sve_get(struct task_struct *target, + start = end; + end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, +- &target->thread.fpsimd_state.fpsr, ++ &target->thread.uw.fpsimd_state.fpsr, + start, end); + if (ret) + return ret; +@@ -941,7 +941,7 @@ static int sve_set(struct task_struct *target, + start = end; + end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, +- &target->thread.fpsimd_state.fpsr, ++ &target->thread.uw.fpsimd_state.fpsr, + start, end); + + out: +@@ -1169,7 +1169,7 @@ static int compat_vfp_get(struct task_struct *target, + compat_ulong_t fpscr; + int ret, vregs_end_pos; + +- uregs = &target->thread.fpsimd_state; ++ uregs = &target->thread.uw.fpsimd_state; + + if (target == current) + fpsimd_preserve_current_state(); +@@ -1202,7 +1202,7 @@ static int compat_vfp_set(struct task_struct *target, + compat_ulong_t fpscr; + int ret, vregs_end_pos; + +- uregs = &target->thread.fpsimd_state; ++ uregs = &target->thread.uw.fpsimd_state; + + vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0, +@@ -1225,7 +1225,7 @@ static int compat_tls_get(struct task_struct *target, + const struct user_regset *regset, unsigned int pos, + unsigned int count, void *kbuf, void __user *ubuf) + { +- compat_ulong_t tls = (compat_ulong_t)target->thread.tp_value; ++ compat_ulong_t tls = (compat_ulong_t)target->thread.uw.tp_value; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); + } + +@@ -1235,13 +1235,13 @@ static int compat_tls_set(struct task_struct *target, + const void __user *ubuf) + { + int ret; +- compat_ulong_t tls = target->thread.tp_value; ++ compat_ulong_t tls = target->thread.uw.tp_value; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); + if (ret) + return ret; + +- target->thread.tp_value = tls; ++ target->thread.uw.tp_value = tls; + return ret; + } + +@@ -1538,7 +1538,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + break; + + case COMPAT_PTRACE_GET_THREAD_AREA: +- ret = put_user((compat_ulong_t)child->thread.tp_value, ++ ret = put_user((compat_ulong_t)child->thread.uw.tp_value, + (compat_ulong_t __user *)datap); + break; + +diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c +index d026615..a0c4138 100644 +--- a/arch/arm64/kernel/signal.c ++++ b/arch/arm64/kernel/signal.c +@@ -178,7 +178,8 @@ static void __user *apply_user_offset( + + static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) + { +- struct user_fpsimd_state const *fpsimd = ¤t->thread.fpsimd_state; ++ struct user_fpsimd_state const *fpsimd = ++ ¤t->thread.uw.fpsimd_state; + int err; + + /* copy the FP and status/control registers */ +diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c +index 4ea38d3..884177a 100644 +--- a/arch/arm64/kernel/signal32.c ++++ b/arch/arm64/kernel/signal32.c +@@ -148,7 +148,8 @@ union __fpsimd_vreg { + + static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) + { +- struct user_fpsimd_state const *fpsimd = ¤t->thread.fpsimd_state; ++ struct user_fpsimd_state const *fpsimd = ++ ¤t->thread.uw.fpsimd_state; + compat_ulong_t magic = VFP_MAGIC; + compat_ulong_t size = VFP_STORAGE_SIZE; + compat_ulong_t fpscr, fpexc; +diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c +index a382b2a..9155989 100644 +--- a/arch/arm64/kernel/sys_compat.c ++++ b/arch/arm64/kernel/sys_compat.c +@@ -88,7 +88,7 @@ long compat_arm_syscall(struct pt_regs *regs) + return do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]); + + case __ARM_NR_compat_set_tls: +- current->thread.tp_value = regs->regs[0]; ++ current->thread.uw.tp_value = regs->regs[0]; + + /* + * Protect against register corruption from context switch. diff --git a/baseconfig/arm/aarch64/CONFIG_MV_XOR_V2 b/baseconfig/arm/aarch64/CONFIG_MV_XOR_V2 index a6e590e..fdcbbf4 100644 --- a/baseconfig/arm/aarch64/CONFIG_MV_XOR_V2 +++ b/baseconfig/arm/aarch64/CONFIG_MV_XOR_V2 @@ -1 +1 @@ -# CONFIG_MV_XOR_V2 is not set +CONFIG_MV_XOR_V2=y diff --git a/kernel-aarch64-debug.config b/kernel-aarch64-debug.config index dcf76bf..f19d98f 100644 --- a/kernel-aarch64-debug.config +++ b/kernel-aarch64-debug.config @@ -3373,7 +3373,7 @@ CONFIG_MVMDIO=m CONFIG_MVNETA_BM_ENABLE=m CONFIG_MVNETA=m CONFIG_MVPP2=m -# CONFIG_MV_XOR_V2 is not set +CONFIG_MV_XOR_V2=y CONFIG_MV_XOR=y CONFIG_MWAVE=m CONFIG_MWIFIEX=m diff --git a/kernel-aarch64.config b/kernel-aarch64.config index f635205..e4f3618 100644 --- a/kernel-aarch64.config +++ b/kernel-aarch64.config @@ -3351,7 +3351,7 @@ CONFIG_MVMDIO=m CONFIG_MVNETA_BM_ENABLE=m CONFIG_MVNETA=m CONFIG_MVPP2=m -# CONFIG_MV_XOR_V2 is not set +CONFIG_MV_XOR_V2=y CONFIG_MV_XOR=y CONFIG_MWAVE=m CONFIG_MWIFIEX=m diff --git a/kernel.spec b/kernel.spec index de582f1..4a7948d 100644 --- a/kernel.spec +++ b/kernel.spec @@ -618,6 +618,9 @@ Patch320: arm-dts-Add-am335x-pocketbeagle.patch # https://patchwork.kernel.org/patch/10133165/ Patch321: mvebu-a37xx-fixes.patch +# https://www.spinics.net/lists/arm-kernel/msg643991.html +Patch322: arm64-fix-usercopy-whitelist.patch + # Enabling Patches for the RPi3+ Patch330: bcm2837-gpio-expander.patch # http://www.spinics.net/lists/arm-kernel/msg647617.html @@ -625,6 +628,9 @@ Patch331: bcm2837-rpi-initial-3plus-support.patch Patch332: bcm2837-enable-pmu.patch Patch333: bcm2837-lan78xx-fixes.patch +# https://patchwork.freedesktop.org/patch/219644/ +Patch334: vc4-Make-sure-vc4_bo_-inc-dec-_usecnt-calls-are-balanced.patch + # 400 - IBM (ppc/s390x) patches # 500 - Temp fixes/CVEs etc @@ -1927,6 +1933,9 @@ fi # # %changelog +* Sat May 5 2018 Peter Robinson +- ARM and Raspberry Pi fixes + * Fri May 04 2018 Laura Abbott - Fix for building out of tree modules on powerpc (rhbz 1574604) diff --git a/vc4-Make-sure-vc4_bo_-inc-dec-_usecnt-calls-are-balanced.patch b/vc4-Make-sure-vc4_bo_-inc-dec-_usecnt-calls-are-balanced.patch new file mode 100644 index 0000000..4c10771 --- /dev/null +++ b/vc4-Make-sure-vc4_bo_-inc-dec-_usecnt-calls-are-balanced.patch @@ -0,0 +1,125 @@ +From patchwork Mon Apr 30 13:32:32 2018 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: drm/vc4: Make sure vc4_bo_{inc, dec}_usecnt() calls are balanced +From: Boris Brezillon +X-Patchwork-Id: 219644 +Message-Id: <20180430133232.32457-1-boris.brezillon@bootlin.com> +To: Eric Anholt +Cc: David Airlie , stable@vger.kernel.org, + Boris Brezillon , + dri-devel@lists.freedesktop.org, Peter Robinson +Date: Mon, 30 Apr 2018 15:32:32 +0200 + +Commit b9f19259b84d ("drm/vc4: Add the DRM_IOCTL_VC4_GEM_MADVISE ioctl") +introduced a mechanism to mark some BOs as purgeable to allow the driver +to drop them under memory pressure. In order to implement this feature +we had to add a mechanism to mark BOs as currently used by a piece of +hardware which materialized through the ->usecnt counter. + +Plane code is supposed to increment usecnt when it attaches a BO to a +plane and decrement it when it's done with this BO, which was done in +the ->prepare_fb() and ->cleanup_fb() hooks. The problem is, async page +flip logic does not go through the regular atomic update path, and +->prepare_fb() and ->cleanup_fb() are not called in this case. + +Fix that by manually calling vc4_bo_{inc,dec}_usecnt() in the +async-page-flip path. + +Note that all this should go away as soon as we get generic async page +flip support in the core, in the meantime, this fix should do the +trick. + +Fixes: b9f19259b84d ("drm/vc4: Add the DRM_IOCTL_VC4_GEM_MADVISE ioctl") +Reported-by: Peter Robinson +Cc: +Signed-off-by: Boris Brezillon +--- + drivers/gpu/drm/vc4/vc4_crtc.c | 46 +++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 45 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c +index 83d3b7912fc2..c8650bbcbcb3 100644 +--- a/drivers/gpu/drm/vc4/vc4_crtc.c ++++ b/drivers/gpu/drm/vc4/vc4_crtc.c +@@ -741,6 +741,7 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data) + struct vc4_async_flip_state { + struct drm_crtc *crtc; + struct drm_framebuffer *fb; ++ struct drm_framebuffer *old_fb; + struct drm_pending_vblank_event *event; + + struct vc4_seqno_cb cb; +@@ -770,6 +771,23 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) + + drm_crtc_vblank_put(crtc); + drm_framebuffer_put(flip_state->fb); ++ ++ /* Decrement the BO usecnt in order to keep the inc/dec calls balanced ++ * when the planes are updated through the async update path. ++ * FIXME: we should move to generic async-page-flip when it's ++ * available, so that we can get rid of this hand-made cleanup_fb() ++ * logic. ++ */ ++ if (flip_state->old_fb) { ++ struct drm_gem_cma_object *cma_bo; ++ struct vc4_bo *bo; ++ ++ cma_bo = drm_fb_cma_get_gem_obj(flip_state->old_fb, 0); ++ bo = to_vc4_bo(&cma_bo->base); ++ vc4_bo_dec_usecnt(bo); ++ drm_framebuffer_put(flip_state->old_fb); ++ } ++ + kfree(flip_state); + + up(&vc4->async_modeset); +@@ -794,9 +812,22 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, + struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0); + struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + ++ /* Increment the BO usecnt here, so that we never end up with an ++ * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the ++ * plane is later updated through the non-async path. ++ * FIXME: we should move to generic async-page-flip when it's ++ * available, so that we can get rid of this hand-made prepare_fb() ++ * logic. ++ */ ++ ret = vc4_bo_inc_usecnt(bo); ++ if (ret) ++ return ret; ++ + flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL); +- if (!flip_state) ++ if (!flip_state) { ++ vc4_bo_dec_usecnt(bo); + return -ENOMEM; ++ } + + drm_framebuffer_get(fb); + flip_state->fb = fb; +@@ -807,10 +838,23 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, + ret = down_interruptible(&vc4->async_modeset); + if (ret) { + drm_framebuffer_put(fb); ++ vc4_bo_dec_usecnt(bo); + kfree(flip_state); + return ret; + } + ++ /* Save the current FB before it's replaced by the new one in ++ * drm_atomic_set_fb_for_plane(). We'll need the old FB in ++ * vc4_async_page_flip_complete() to decrement the BO usecnt and keep ++ * it consistent. ++ * FIXME: we should move to generic async-page-flip when it's ++ * available, so that we can get rid of this hand-made cleanup_fb() ++ * logic. ++ */ ++ flip_state->old_fb = plane->state->fb; ++ if (flip_state->old_fb) ++ drm_framebuffer_get(flip_state->old_fb); ++ + WARN_ON(drm_crtc_vblank_get(crtc) != 0); + + /* Immediately update the plane's legacy fb pointer, so that later