Jesse Keating 2f82dd
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
Jesse Keating 2f82dd
index e8de2f6..538c2b6 100644
Jesse Keating 2f82dd
--- a/arch/x86/include/asm/desc.h
Jesse Keating 2f82dd
+++ b/arch/x86/include/asm/desc.h
Jesse Keating 2f82dd
@@ -5,6 +5,7 @@
Jesse Keating 2f82dd
 #include <asm ldt.h="">
Jesse Keating 2f82dd
 #include <asm mmu.h="">
Jesse Keating 2f82dd
 #include <linux smp.h="">
Jesse Keating 2f82dd
+#include <linux mm_types.h="">
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 static inline void fill_ldt(struct desc_struct *desc,
Jesse Keating 2f82dd
 			    const struct user_desc *info)
Jesse Keating 2f82dd
@@ -93,6 +94,9 @@ static inline int desc_empty(const void *ptr)
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 #define load_TLS(t, cpu) native_load_tls(t, cpu)
Jesse Keating 2f82dd
 #define set_ldt native_set_ldt
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+#define load_user_cs_desc native_load_user_cs_desc
Jesse Keating 2f82dd
+#endif /*CONFIG_X86_32*/
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 #define write_ldt_entry(dt, entry, desc)	\
Jesse Keating 2f82dd
 	native_write_ldt_entry(dt, entry, desc)
Jesse Keating 2f82dd
@@ -392,4 +396,25 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
Jesse Keating 2f82dd
 	_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+static inline void set_user_cs(struct desc_struct *desc, unsigned long limit)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	limit = (limit - 1) / PAGE_SIZE;
Jesse Keating 2f82dd
+	desc->a = limit & 0xffff;
Jesse Keating 2f82dd
+	desc->b = (limit & 0xf0000) | 0x00c0fb00;
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+static inline void native_load_user_cs_desc(int cpu, struct mm_struct *mm)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs;
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+#define arch_add_exec_range arch_add_exec_range
Jesse Keating 2f82dd
+#define arch_remove_exec_range arch_remove_exec_range
Jesse Keating 2f82dd
+#define arch_flush_exec_range arch_flush_exec_range
Jesse Keating 2f82dd
+extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit);
Jesse Keating 2f82dd
+extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);
Jesse Keating 2f82dd
+extern void arch_flush_exec_range(struct mm_struct *mm);
Jesse Keating 2f82dd
+#endif /* CONFIG_X86_32 */
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 #endif /* _ASM_X86_DESC_H */
Jesse Keating 2f82dd
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
Jesse Keating 2f82dd
index 80a1dee..8314c66 100644
Jesse Keating 2f82dd
--- a/arch/x86/include/asm/mmu.h
Jesse Keating 2f82dd
+++ b/arch/x86/include/asm/mmu.h
Jesse Keating 2f82dd
@@ -7,12 +7,19 @@
Jesse Keating 2f82dd
 /*
Jesse Keating 2f82dd
  * The x86 doesn't have a mmu context, but
Jesse Keating 2f82dd
  * we put the segment information here.
Jesse Keating 2f82dd
+ *
Jesse Keating 2f82dd
+ * exec_limit is used to track the range PROT_EXEC
Jesse Keating 2f82dd
+ * mappings span.
Jesse Keating 2f82dd
  */
Jesse Keating 2f82dd
 typedef struct {
Jesse Keating 2f82dd
 	void *ldt;
Jesse Keating 2f82dd
 	int size;
Jesse Keating 2f82dd
 	struct mutex lock;
Jesse Keating 2f82dd
 	void *vdso;
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+	struct desc_struct user_cs;
Jesse Keating 2f82dd
+	unsigned long exec_limit;
Jesse Keating 2f82dd
+#endif
Jesse Keating 2f82dd
 } mm_context_t;
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 #ifdef CONFIG_SMP
Jesse Keating 2f82dd
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
Jesse Keating 2f82dd
index 8aebcc4..cbbd2b0 100644
Jesse Keating 2f82dd
--- a/arch/x86/include/asm/paravirt.h
Jesse Keating 2f82dd
+++ b/arch/x86/include/asm/paravirt.h
Jesse Keating 2f82dd
@@ -289,6 +289,12 @@ static inline void set_ldt(const void *addr, unsigned entries)
Jesse Keating 2f82dd
 {
Jesse Keating 2f82dd
 	PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+static inline void load_user_cs_desc(unsigned int cpu, struct mm_struct *mm)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	PVOP_VCALL2(pv_cpu_ops.load_user_cs_desc, cpu, mm);
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
+#endif /*CONFIG_X86_32*/
Jesse Keating 2f82dd
 static inline void store_gdt(struct desc_ptr *dtr)
Jesse Keating 2f82dd
 {
Jesse Keating 2f82dd
 	PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
Jesse Keating 2f82dd
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
Jesse Keating 2f82dd
index dd0f5b3..c2727ef 100644
Jesse Keating 2f82dd
--- a/arch/x86/include/asm/paravirt_types.h
Jesse Keating 2f82dd
+++ b/arch/x86/include/asm/paravirt_types.h
Jesse Keating 2f82dd
@@ -118,6 +118,9 @@ struct pv_cpu_ops {
Jesse Keating 2f82dd
 	void (*store_gdt)(struct desc_ptr *);
Jesse Keating 2f82dd
 	void (*store_idt)(struct desc_ptr *);
Jesse Keating 2f82dd
 	void (*set_ldt)(const void *desc, unsigned entries);
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+	void (*load_user_cs_desc)(int cpu, struct mm_struct *mm);
Jesse Keating 2f82dd
+#endif
Jesse Keating 2f82dd
 	unsigned long (*store_tr)(void);
Jesse Keating 2f82dd
 	void (*load_tls)(struct thread_struct *t, unsigned int cpu);
Jesse Keating 2f82dd
 #ifdef CONFIG_X86_64
Jesse Keating 2f82dd
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
Jesse Keating 2f82dd
index c3429e8..62cc460 100644
Jesse Keating 2f82dd
--- a/arch/x86/include/asm/processor.h
Jesse Keating 2f82dd
+++ b/arch/x86/include/asm/processor.h
Jesse Keating 2f82dd
@@ -161,6 +161,9 @@ static inline int hlt_works(int cpu)
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 #define cache_line_size()	(boot_cpu_data.x86_cache_alignment)
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+#define __HAVE_ARCH_ALIGN_STACK
Jesse Keating 2f82dd
+extern unsigned long arch_align_stack(unsigned long sp);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 extern void cpu_detect(struct cpuinfo_x86 *c);
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 extern struct pt_regs *idle_regs(struct pt_regs *);
Jesse Keating 2f82dd
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
Jesse Keating 2f82dd
index cc25c2b..6ce4863 100644
Jesse Keating 2f82dd
--- a/arch/x86/kernel/cpu/common.c
Jesse Keating 2f82dd
+++ b/arch/x86/kernel/cpu/common.c
Jesse Keating 2f82dd
@@ -798,6 +798,20 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
Jesse Keating 2f82dd
 	/* Filter out anything that depends on CPUID levels we don't have */
Jesse Keating 2f82dd
 	filter_cpuid_features(c, true);
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+	/*
Jesse Keating 2f82dd
+	 *  emulation of NX with segment limits unfortunately means
Jesse Keating 2f82dd
+	 *  we have to disable the fast system calls, due to the way that
Jesse Keating 2f82dd
+	 *  sysexit clears the segment limits on return.
Jesse Keating 2f82dd
+	 *  If we have either disabled exec-shield on the boot command line,
Jesse Keating 2f82dd
+	 *  or we have NX, then we don't need to do this.
Jesse Keating 2f82dd
+	 */
Jesse Keating 2f82dd
+	if (exec_shield != 0) {
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_PAE
Jesse Keating 2f82dd
+		if (!test_cpu_cap(c, X86_FEATURE_NX))
Jesse Keating 2f82dd
+#endif
Jesse Keating 2f82dd
+			clear_cpu_cap(c, X86_FEATURE_SEP);
Jesse Keating 2f82dd
+	}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 	/* If the model name is still unset, do table lookup. */
Jesse Keating 2f82dd
 	if (!c->x86_model_id[0]) {
Jesse Keating 2f82dd
 		const char *p;
Jesse Keating 2f82dd
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
Jesse Keating 2f82dd
index 1b1739d..c2dda16 100644
Jesse Keating 2f82dd
--- a/arch/x86/kernel/paravirt.c
Jesse Keating 2f82dd
+++ b/arch/x86/kernel/paravirt.c
Jesse Keating 2f82dd
@@ -345,6 +345,9 @@ struct pv_cpu_ops pv_cpu_ops = {
Jesse Keating 2f82dd
 	.read_tscp = native_read_tscp,
Jesse Keating 2f82dd
 	.load_tr_desc = native_load_tr_desc,
Jesse Keating 2f82dd
 	.set_ldt = native_set_ldt,
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+	.load_user_cs_desc = native_load_user_cs_desc,
Jesse Keating 2f82dd
+#endif /*CONFIG_X86_32*/
Jesse Keating 2f82dd
 	.load_gdt = native_load_gdt,
Jesse Keating 2f82dd
 	.load_idt = native_load_idt,
Jesse Keating 2f82dd
 	.store_gdt = native_store_gdt,
Jesse Keating 2f82dd
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
Jesse Keating 2f82dd
index 4cf7956..b2407dc 100644
Jesse Keating 2f82dd
--- a/arch/x86/kernel/process_32.c
Jesse Keating 2f82dd
+++ b/arch/x86/kernel/process_32.c
Jesse Keating 2f82dd
@@ -296,7 +296,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
Jesse Keating 2f82dd
 void
Jesse Keating 2f82dd
 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
Jesse Keating 2f82dd
 {
Jesse Keating 2f82dd
+	int cpu;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 	set_user_gs(regs, 0);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 	regs->fs		= 0;
Jesse Keating 2f82dd
 	set_fs(USER_DS);
Jesse Keating 2f82dd
 	regs->ds		= __USER_DS;
Jesse Keating 2f82dd
@@ -305,6 +308,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
Jesse Keating 2f82dd
 	regs->cs		= __USER_CS;
Jesse Keating 2f82dd
 	regs->ip		= new_ip;
Jesse Keating 2f82dd
 	regs->sp		= new_sp;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	cpu = get_cpu();
Jesse Keating 2f82dd
+	load_user_cs_desc(cpu, current->mm);
Jesse Keating 2f82dd
+	put_cpu();
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 	/*
Jesse Keating 2f82dd
 	 * Free the old FP and other extended state
Jesse Keating 2f82dd
 	 */
Jesse Keating 2f82dd
@@ -364,6 +372,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
Jesse Keating 2f82dd
 	if (preload_fpu)
Jesse Keating 2f82dd
 		prefetch(next->xstate);
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+	if (next_p->mm)
Jesse Keating 2f82dd
+		load_user_cs_desc(cpu, next_p->mm);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 	/*
Jesse Keating 2f82dd
 	 * Reload esp0.
Jesse Keating 2f82dd
 	 */
Jesse Keating 2f82dd
@@ -497,3 +508,40 @@ unsigned long get_wchan(struct task_struct *p)
Jesse Keating 2f82dd
 	return 0;
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+static void modify_cs(struct mm_struct *mm, unsigned long limit)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	mm->context.exec_limit = limit;
Jesse Keating 2f82dd
+	set_user_cs(&mm->context.user_cs, limit);
Jesse Keating 2f82dd
+	if (mm == current->mm) {
Jesse Keating 2f82dd
+		int cpu;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+		cpu = get_cpu();
Jesse Keating 2f82dd
+		load_user_cs_desc(cpu, mm);
Jesse Keating 2f82dd
+		put_cpu();
Jesse Keating 2f82dd
+	}
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+void arch_add_exec_range(struct mm_struct *mm, unsigned long limit)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	if (limit > mm->context.exec_limit)
Jesse Keating 2f82dd
+		modify_cs(mm, limit);
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	struct vm_area_struct *vma;
Jesse Keating 2f82dd
+	unsigned long limit = PAGE_SIZE;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	if (old_end == mm->context.exec_limit) {
Jesse Keating 2f82dd
+		for (vma = mm->mmap; vma; vma = vma->vm_next)
Jesse Keating 2f82dd
+			if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
Jesse Keating 2f82dd
+				limit = vma->vm_end;
Jesse Keating 2f82dd
+		modify_cs(mm, limit);
Jesse Keating 2f82dd
+	}
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+void arch_flush_exec_range(struct mm_struct *mm)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	mm->context.exec_limit = 0;
Jesse Keating 2f82dd
+	set_user_cs(&mm->context.user_cs, 0);
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
Jesse Keating 2f82dd
index 7e37dce..92ae538 100644
Jesse Keating 2f82dd
--- a/arch/x86/kernel/traps.c
Jesse Keating 2f82dd
+++ b/arch/x86/kernel/traps.c
Jesse Keating 2f82dd
@@ -115,6 +115,76 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err)
Jesse Keating 2f82dd
 	if (!user_mode_vm(regs))
Jesse Keating 2f82dd
 		die(str, regs, err);
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+static inline int
Jesse Keating 2f82dd
+__compare_user_cs_desc(const struct desc_struct *desc1,
Jesse Keating 2f82dd
+	const struct desc_struct *desc2)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	return ((desc1->limit0 != desc2->limit0) ||
Jesse Keating 2f82dd
+		(desc1->limit != desc2->limit) ||
Jesse Keating 2f82dd
+		(desc1->base0 != desc2->base0) ||
Jesse Keating 2f82dd
+		(desc1->base1 != desc2->base1) ||
Jesse Keating 2f82dd
+		(desc1->base2 != desc2->base2));
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+/*
Jesse Keating 2f82dd
+ * lazy-check for CS validity on exec-shield binaries:
Jesse Keating 2f82dd
+ *
Jesse Keating 2f82dd
+ * the original non-exec stack patch was written by
Jesse Keating 2f82dd
+ * Solar Designer <solar at="" openwall.com="">. Thanks!
Jesse Keating 2f82dd
+ */
Jesse Keating 2f82dd
+static int
Jesse Keating 2f82dd
+check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	struct desc_struct *desc1, *desc2;
Jesse Keating 2f82dd
+	struct vm_area_struct *vma;
Jesse Keating 2f82dd
+	unsigned long limit;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	if (current->mm == NULL)
Jesse Keating 2f82dd
+		return 0;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	limit = -1UL;
Jesse Keating 2f82dd
+	if (current->mm->context.exec_limit != -1UL) {
Jesse Keating 2f82dd
+		limit = PAGE_SIZE;
Jesse Keating 2f82dd
+		spin_lock(¤t->mm->page_table_lock);
Jesse Keating 2f82dd
+		for (vma = current->mm->mmap; vma; vma = vma->vm_next)
Jesse Keating 2f82dd
+			if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
Jesse Keating 2f82dd
+				limit = vma->vm_end;
Jesse Keating 2f82dd
+		vma = get_gate_vma(current);
Jesse Keating 2f82dd
+		if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
Jesse Keating 2f82dd
+			limit = vma->vm_end;
Jesse Keating 2f82dd
+		spin_unlock(¤t->mm->page_table_lock);
Jesse Keating 2f82dd
+		if (limit >= TASK_SIZE)
Jesse Keating 2f82dd
+			limit = -1UL;
Jesse Keating 2f82dd
+		current->mm->context.exec_limit = limit;
Jesse Keating 2f82dd
+	}
Jesse Keating 2f82dd
+	set_user_cs(¤t->mm->context.user_cs, limit);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	desc1 = ¤t->mm->context.user_cs;
Jesse Keating 2f82dd
+	desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	if (__compare_user_cs_desc(desc1, desc2)) {
Jesse Keating 2f82dd
+		/*
Jesse Keating 2f82dd
+		 * The CS was not in sync - reload it and retry the
Jesse Keating 2f82dd
+		 * instruction. If the instruction still faults then
Jesse Keating 2f82dd
+		 * we won't hit this branch next time around.
Jesse Keating 2f82dd
+		 */
Jesse Keating 2f82dd
+		if (print_fatal_signals >= 2) {
Jesse Keating 2f82dd
+			printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n",
Jesse Keating 2f82dd
+				error_code, error_code/8, regs->ip,
Jesse Keating 2f82dd
+				smp_processor_id());
Jesse Keating 2f82dd
+			printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, CPU_cs: %08x/%08x.\n",
Jesse Keating 2f82dd
+				current->mm->context.exec_limit,
Jesse Keating 2f82dd
+				desc1->a, desc1->b, desc2->a, desc2->b);
Jesse Keating 2f82dd
+		}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+		load_user_cs_desc(cpu, current->mm);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+		return 1;
Jesse Keating 2f82dd
+	}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	return 0;
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
 #endif
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 static void __kprobes
Jesse Keating 2f82dd
@@ -273,6 +343,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
Jesse Keating 2f82dd
 	if (!user_mode(regs))
Jesse Keating 2f82dd
 		goto gp_in_kernel;
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	int cpu;
Jesse Keating 2f82dd
+	int ok;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	cpu = get_cpu();
Jesse Keating 2f82dd
+	ok = check_lazy_exec_limit(cpu, regs, error_code);
Jesse Keating 2f82dd
+	put_cpu();
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	if (ok)
Jesse Keating 2f82dd
+		return;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	if (print_fatal_signals) {
Jesse Keating 2f82dd
+		printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n",
Jesse Keating 2f82dd
+			error_code, error_code/8, regs->ip, smp_processor_id());
Jesse Keating 2f82dd
+		printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n",
Jesse Keating 2f82dd
+			current->mm->context.exec_limit,
Jesse Keating 2f82dd
+			current->mm->context.user_cs.a,
Jesse Keating 2f82dd
+			current->mm->context.user_cs.b);
Jesse Keating 2f82dd
+	}
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
+#endif /*CONFIG_X86_32*/
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 	tsk->thread.error_code = error_code;
Jesse Keating 2f82dd
 	tsk->thread.trap_no = 13;
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
@@ -881,19 +974,37 @@ do_device_not_available(struct pt_regs *regs, long error_code)
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 #ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+/*
Jesse Keating 2f82dd
+ * The fixup code for errors in iret jumps to here (iret_exc). It loses
Jesse Keating 2f82dd
+ * the original trap number and erorr code. The bogus trap 32 and error
Jesse Keating 2f82dd
+ * code 0 are what the vanilla kernel delivers via:
Jesse Keating 2f82dd
+ * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
Jesse Keating 2f82dd
+ *
Jesse Keating 2f82dd
+ * NOTE: Because of the final "1" in the macro we need to enable interrupts.
Jesse Keating 2f82dd
+ *
Jesse Keating 2f82dd
+ * In case of a general protection fault in the iret instruction, we
Jesse Keating 2f82dd
+ * need to check for a lazy CS update for exec-shield.
Jesse Keating 2f82dd
+ */
Jesse Keating 2f82dd
 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
Jesse Keating 2f82dd
 {
Jesse Keating 2f82dd
-	siginfo_t info;
Jesse Keating 2f82dd
+	int ok;
Jesse Keating 2f82dd
+	int cpu;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 	local_irq_enable();
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
-	info.si_signo = SIGILL;
Jesse Keating 2f82dd
-	info.si_errno = 0;
Jesse Keating 2f82dd
-	info.si_code = ILL_BADSTK;
Jesse Keating 2f82dd
-	info.si_addr = NULL;
Jesse Keating 2f82dd
-	if (notify_die(DIE_TRAP, "iret exception",
Jesse Keating 2f82dd
-			regs, error_code, 32, SIGILL) == NOTIFY_STOP)
Jesse Keating 2f82dd
-		return;
Jesse Keating 2f82dd
-	do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
Jesse Keating 2f82dd
+	cpu = get_cpu();
Jesse Keating 2f82dd
+	ok = check_lazy_exec_limit(cpu, regs, error_code);
Jesse Keating 2f82dd
+	put_cpu();
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	if (!ok && notify_die(DIE_TRAP, "iret exception", regs,
Jesse Keating 2f82dd
+		error_code, 32, SIGSEGV) != NOTIFY_STOP) {
Jesse Keating 2f82dd
+			siginfo_t info;
Jesse Keating 2f82dd
+			info.si_signo = SIGSEGV;
Jesse Keating 2f82dd
+			info.si_errno = 0;
Jesse Keating 2f82dd
+			info.si_code = ILL_BADSTK;
Jesse Keating 2f82dd
+			info.si_addr = 0;
Jesse Keating 2f82dd
+			do_trap(32, SIGSEGV, "iret exception", regs, error_code, &info);
Jesse Keating 2f82dd
+	}
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
 #endif
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
Jesse Keating 2f82dd
index 73ffd55..0cf2a7b 100644
Jesse Keating 2f82dd
--- a/arch/x86/mm/init.c
Jesse Keating 2f82dd
+++ b/arch/x86/mm/init.c
Jesse Keating 2f82dd
@@ -149,6 +149,12 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
Jesse Keating 2f82dd
 	set_nx();
Jesse Keating 2f82dd
 	if (nx_enabled)
Jesse Keating 2f82dd
 		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+	else
Jesse Keating 2f82dd
+	if (exec_shield)
Jesse Keating 2f82dd
+		printk(KERN_INFO "Using x86 segment limits to approximate "
Jesse Keating 2f82dd
+			"NX protection\n");
Jesse Keating 2f82dd
+#endif
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 	/* Enable PSE if available */
Jesse Keating 2f82dd
 	if (cpu_has_pse)
Jesse Keating 2f82dd
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
Jesse Keating 2f82dd
index c8191de..7d84d01 100644
Jesse Keating 2f82dd
--- a/arch/x86/mm/mmap.c
Jesse Keating 2f82dd
+++ b/arch/x86/mm/mmap.c
Jesse Keating 2f82dd
@@ -124,13 +124,16 @@ static unsigned long mmap_legacy_base(void)
Jesse Keating 2f82dd
  */
Jesse Keating 2f82dd
 void arch_pick_mmap_layout(struct mm_struct *mm)
Jesse Keating 2f82dd
 {
Jesse Keating 2f82dd
-	if (mmap_is_legacy()) {
Jesse Keating 2f82dd
+	if (!(2 & exec_shield) && mmap_is_legacy()) {
Jesse Keating 2f82dd
 		mm->mmap_base = mmap_legacy_base();
Jesse Keating 2f82dd
 		mm->get_unmapped_area = arch_get_unmapped_area;
Jesse Keating 2f82dd
 		mm->unmap_area = arch_unmap_area;
Jesse Keating 2f82dd
 	} else {
Jesse Keating 2f82dd
 		mm->mmap_base = mmap_base();
Jesse Keating 2f82dd
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
Jesse Keating 2f82dd
+		if (!(current->personality & READ_IMPLIES_EXEC)
Jesse Keating 2f82dd
+		    && mmap_is_ia32())
Jesse Keating 2f82dd
+			mm->get_unmapped_exec_area = arch_get_unmapped_exec_area;
Jesse Keating 2f82dd
 		mm->unmap_area = arch_unmap_area_topdown;
Jesse Keating 2f82dd
 	}
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
Jesse Keating 2f82dd
index 513d8ed..c614a90 100644
Jesse Keating 2f82dd
--- a/arch/x86/mm/setup_nx.c
Jesse Keating 2f82dd
+++ b/arch/x86/mm/setup_nx.c
Jesse Keating 2f82dd
@@ -1,3 +1,4 @@
Jesse Keating 2f82dd
+#include <linux sched.h="">
Jesse Keating 2f82dd
 #include <linux spinlock.h="">
Jesse Keating 2f82dd
 #include <linux errno.h="">
Jesse Keating 2f82dd
 #include <linux init.h="">
Jesse Keating 2f82dd
@@ -27,6 +28,9 @@ static int __init noexec_setup(char *str)
Jesse Keating 2f82dd
 	} else if (!strncmp(str, "off", 3)) {
Jesse Keating 2f82dd
 		disable_nx = 1;
Jesse Keating 2f82dd
 		__supported_pte_mask &= ~_PAGE_NX;
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+		exec_shield = 0;
Jesse Keating 2f82dd
+#endif
Jesse Keating 2f82dd
 	}
Jesse Keating 2f82dd
 	return 0;
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
Jesse Keating 2f82dd
index 36fe08e..3806a45 100644
Jesse Keating 2f82dd
--- a/arch/x86/mm/tlb.c
Jesse Keating 2f82dd
+++ b/arch/x86/mm/tlb.c
Jesse Keating 2f82dd
@@ -6,6 +6,7 @@
Jesse Keating 2f82dd
 #include <linux interrupt.h="">
Jesse Keating 2f82dd
 #include <linux module.h="">
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+#include <asm desc.h="">
Jesse Keating 2f82dd
 #include <asm tlbflush.h="">
Jesse Keating 2f82dd
 #include <asm mmu_context.h="">
Jesse Keating 2f82dd
 #include <asm apic.h="">
Jesse Keating 2f82dd
@@ -130,6 +131,12 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
Jesse Keating 2f82dd
 	union smp_flush_state *f;
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 	cpu = smp_processor_id();
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+	if (current->active_mm)
Jesse Keating 2f82dd
+		load_user_cs_desc(cpu, current->active_mm);
Jesse Keating 2f82dd
+#endif
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 	/*
Jesse Keating 2f82dd
 	 * orig_rax contains the negated interrupt vector.
Jesse Keating 2f82dd
 	 * Use that to determine where the sender put the data.
Jesse Keating 2f82dd
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
Jesse Keating 2f82dd
index 58bc00f..1fdafb5 100644
Jesse Keating 2f82dd
--- a/arch/x86/vdso/vdso32-setup.c
Jesse Keating 2f82dd
+++ b/arch/x86/vdso/vdso32-setup.c
Jesse Keating 2f82dd
@@ -331,7 +331,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
Jesse Keating 2f82dd
 	if (compat)
Jesse Keating 2f82dd
 		addr = VDSO_HIGH_BASE;
Jesse Keating 2f82dd
 	else {
Jesse Keating 2f82dd
-		addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
Jesse Keating 2f82dd
+		addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1);
Jesse Keating 2f82dd
 		if (IS_ERR_VALUE(addr)) {
Jesse Keating 2f82dd
 			ret = addr;
Jesse Keating 2f82dd
 			goto up_fail;
Jesse Keating 2f82dd
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
Jesse Keating 2f82dd
index 3439616..31e5c6f 100644
Jesse Keating 2f82dd
--- a/arch/x86/xen/enlighten.c
Jesse Keating 2f82dd
+++ b/arch/x86/xen/enlighten.c
Jesse Keating 2f82dd
@@ -323,6 +323,24 @@ static void xen_set_ldt(const void *addr, unsigned entries)
Jesse Keating 2f82dd
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+static void xen_load_user_cs_desc(int cpu, struct mm_struct *mm)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	void *gdt;
Jesse Keating 2f82dd
+	xmaddr_t mgdt;
Jesse Keating 2f82dd
+	u64 descriptor;
Jesse Keating 2f82dd
+	struct desc_struct user_cs;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	gdt = &get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS];
Jesse Keating 2f82dd
+	mgdt = virt_to_machine(gdt);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	user_cs = mm->context.user_cs;
Jesse Keating 2f82dd
+	descriptor = (u64) user_cs.a | ((u64) user_cs.b) << 32;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	HYPERVISOR_update_descriptor(mgdt.maddr, descriptor);
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
+#endif /*CONFIG_X86_32*/
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 static void xen_load_gdt(const struct desc_ptr *dtr)
Jesse Keating 2f82dd
 {
Jesse Keating 2f82dd
 	unsigned long va = dtr->address;
Jesse Keating 2f82dd
@@ -949,6 +967,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 	.load_tr_desc = paravirt_nop,
Jesse Keating 2f82dd
 	.set_ldt = xen_set_ldt,
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+	.load_user_cs_desc = xen_load_user_cs_desc,
Jesse Keating 2f82dd
+#endif /*CONFIG_X86_32*/
Jesse Keating 2f82dd
 	.load_gdt = xen_load_gdt,
Jesse Keating 2f82dd
 	.load_idt = xen_load_idt,
Jesse Keating 2f82dd
 	.load_tls = xen_load_tls,
Jesse Keating 2f82dd
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
Jesse Keating 2f82dd
index b9b3bb5..1e55926 100644
Jesse Keating 2f82dd
--- a/fs/binfmt_elf.c
Jesse Keating 2f82dd
+++ b/fs/binfmt_elf.c
Jesse Keating 2f82dd
@@ -73,7 +73,7 @@ static struct linux_binfmt elf_format = {
Jesse Keating 2f82dd
 		.hasvdso	= 1
Jesse Keating 2f82dd
 };
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
-#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
Jesse Keating 2f82dd
+#define BAD_ADDR(x) IS_ERR_VALUE(x)
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 static int set_brk(unsigned long start, unsigned long end)
Jesse Keating 2f82dd
 {
Jesse Keating 2f82dd
@@ -721,6 +721,11 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
Jesse Keating 2f82dd
 			break;
Jesse Keating 2f82dd
 		}
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+	if (current->personality == PER_LINUX && (exec_shield & 2)) {
Jesse Keating 2f82dd
+		executable_stack = EXSTACK_DISABLE_X;
Jesse Keating 2f82dd
+		current->flags |= PF_RANDOMIZE;
Jesse Keating 2f82dd
+	}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 	/* Some simple consistency checks for the interpreter */
Jesse Keating 2f82dd
 	if (elf_interpreter) {
Jesse Keating 2f82dd
 		retval = -ELIBBAD;
Jesse Keating 2f82dd
@@ -740,6 +745,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
Jesse Keating 2f82dd
 	if (retval)
Jesse Keating 2f82dd
 		goto out_free_dentry;
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+#ifdef CONFIG_X86_32
Jesse Keating 2f82dd
+	/*
Jesse Keating 2f82dd
+	 * Turn off the CS limit completely if exec-shield disabled or
Jesse Keating 2f82dd
+	 * NX active:
Jesse Keating 2f82dd
+	 */
Jesse Keating 2f82dd
+	if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || nx_enabled)
Jesse Keating 2f82dd
+		arch_add_exec_range(current->mm, -1);
Jesse Keating 2f82dd
+#endif
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 	/* OK, This is the point of no return */
Jesse Keating 2f82dd
 	current->flags &= ~PF_FORKNOEXEC;
Jesse Keating 2f82dd
 	current->mm->def_flags = def_flags;
Jesse Keating 2f82dd
@@ -747,7 +761,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
Jesse Keating 2f82dd
 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
Jesse Keating 2f82dd
 	   may depend on the personality.  */
Jesse Keating 2f82dd
 	SET_PERSONALITY(loc->elf_ex);
Jesse Keating 2f82dd
-	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
Jesse Keating 2f82dd
+	if (!(exec_shield & 2) &&
Jesse Keating 2f82dd
+			elf_read_implies_exec(loc->elf_ex, executable_stack))
Jesse Keating 2f82dd
 		current->personality |= READ_IMPLIES_EXEC;
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
Jesse Keating 2f82dd
@@ -912,7 +927,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
Jesse Keating 2f82dd
 					    interpreter,
Jesse Keating 2f82dd
 					    &interp_map_addr,
Jesse Keating 2f82dd
 					    load_bias);
Jesse Keating 2f82dd
-		if (!IS_ERR((void *)elf_entry)) {
Jesse Keating 2f82dd
+		if (!BAD_ADDR(elf_entry)) {
Jesse Keating 2f82dd
 			/*
Jesse Keating 2f82dd
 			 * load_elf_interp() returns relocation
Jesse Keating 2f82dd
 			 * adjustment
Jesse Keating 2f82dd
diff --git a/include/linux/mm.h b/include/linux/mm.h
Jesse Keating 2f82dd
index 24c3956..88f944d 100644
Jesse Keating 2f82dd
--- a/include/linux/mm.h
Jesse Keating 2f82dd
+++ b/include/linux/mm.h
Jesse Keating 2f82dd
@@ -1129,7 +1129,13 @@ extern int install_special_mapping(struct mm_struct *mm,
Jesse Keating 2f82dd
 				   unsigned long addr, unsigned long len,
Jesse Keating 2f82dd
 				   unsigned long flags, struct page **pages);
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
-extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
Jesse Keating 2f82dd
+extern unsigned long get_unmapped_area_prot(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, int);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+static inline unsigned long get_unmapped_area(struct file *file, unsigned long addr,
Jesse Keating 2f82dd
+		unsigned long len, unsigned long pgoff, unsigned long flags)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	return get_unmapped_area_prot(file, addr, len, pgoff, flags, 0);
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
Jesse Keating 2f82dd
 	unsigned long len, unsigned long prot,
Jesse Keating 2f82dd
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
Jesse Keating 2f82dd
index 84a524a..a81e0db 100644
Jesse Keating 2f82dd
--- a/include/linux/mm_types.h
Jesse Keating 2f82dd
+++ b/include/linux/mm_types.h
Jesse Keating 2f82dd
@@ -206,6 +206,9 @@ struct mm_struct {
Jesse Keating 2f82dd
 	unsigned long (*get_unmapped_area) (struct file *filp,
Jesse Keating 2f82dd
 				unsigned long addr, unsigned long len,
Jesse Keating 2f82dd
 				unsigned long pgoff, unsigned long flags);
Jesse Keating 2f82dd
+       unsigned long (*get_unmapped_exec_area) (struct file *filp,
Jesse Keating 2f82dd
+				unsigned long addr, unsigned long len,
Jesse Keating 2f82dd
+				unsigned long pgoff, unsigned long flags);
Jesse Keating 2f82dd
 	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
Jesse Keating 2f82dd
 	unsigned long mmap_base;		/* base of mmap area */
Jesse Keating 2f82dd
 	unsigned long task_size;		/* size of task vm space */
Jesse Keating 2f82dd
diff --git a/include/linux/resource.h b/include/linux/resource.h
Jesse Keating 2f82dd
index 40fc7e6..68c2549 100644
Jesse Keating 2f82dd
--- a/include/linux/resource.h
Jesse Keating 2f82dd
+++ b/include/linux/resource.h
Jesse Keating 2f82dd
@@ -55,8 +55,11 @@ struct rlimit {
Jesse Keating 2f82dd
 /*
Jesse Keating 2f82dd
  * Limit the stack by to some sane default: root can always
Jesse Keating 2f82dd
  * increase this limit if needed..  8MB seems reasonable.
Jesse Keating 2f82dd
+ *
Jesse Keating 2f82dd
+ * (2MB more to cover randomization effects.)
Jesse Keating 2f82dd
  */
Jesse Keating 2f82dd
-#define _STK_LIM	(8*1024*1024)
Jesse Keating 2f82dd
+#define _STK_LIM	(10*1024*1024)
Jesse Keating 2f82dd
+#define EXEC_STACK_BIAS	(2*1024*1024)
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 /*
Jesse Keating 2f82dd
  * GPG2 wants 64kB of mlocked memory, to make sure pass phrases
Jesse Keating 2f82dd
diff --git a/include/linux/sched.h b/include/linux/sched.h
Jesse Keating 2f82dd
index 75e6e60..0bce489 100644
Jesse Keating 2f82dd
--- a/include/linux/sched.h
Jesse Keating 2f82dd
+++ b/include/linux/sched.h
Jesse Keating 2f82dd
@@ -102,6 +102,9 @@ struct fs_struct;
Jesse Keating 2f82dd
 struct bts_context;
Jesse Keating 2f82dd
 struct perf_event_context;
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+extern int exec_shield;
Jesse Keating 2f82dd
+extern int print_fatal_signals;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 /*
Jesse Keating 2f82dd
  * List of flags we want to share for kernel threads,
Jesse Keating 2f82dd
  * if only because they are not used by them anyway.
Jesse Keating 2f82dd
@@ -378,6 +381,10 @@ extern int sysctl_max_map_count;
Jesse Keating 2f82dd
 extern unsigned long
Jesse Keating 2f82dd
 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
Jesse Keating 2f82dd
 		       unsigned long, unsigned long);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+extern unsigned long
Jesse Keating 2f82dd
+arch_get_unmapped_exec_area(struct file *, unsigned long, unsigned long,
Jesse Keating 2f82dd
+		       unsigned long, unsigned long);
Jesse Keating 2f82dd
 extern unsigned long
Jesse Keating 2f82dd
 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
Jesse Keating 2f82dd
 			  unsigned long len, unsigned long pgoff,
Jesse Keating 2f82dd
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
Jesse Keating 2f82dd
index 0d949c5..12ca319 100644
Jesse Keating 2f82dd
--- a/kernel/sysctl.c
Jesse Keating 2f82dd
+++ b/kernel/sysctl.c
Jesse Keating 2f82dd
@@ -88,6 +88,26 @@ extern int sysctl_nr_open_min, sysctl_nr_open_max;
Jesse Keating 2f82dd
 #ifndef CONFIG_MMU
Jesse Keating 2f82dd
 extern int sysctl_nr_trim_pages;
Jesse Keating 2f82dd
 #endif
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+int exec_shield = (1<<0);
Jesse Keating 2f82dd
+/* exec_shield is a bitmask:
Jesse Keating 2f82dd
+ * 0: off; vdso at STACK_TOP, 1 page below TASK_SIZE
Jesse Keating 2f82dd
+ * (1<<0) 1: on [also on if !=0]
Jesse Keating 2f82dd
+ * (1<<1) 2: force noexecstack regardless of PT_GNU_STACK
Jesse Keating 2f82dd
+ * The old settings
Jesse Keating 2f82dd
+ * (1<<2) 4: vdso just below .text of main (unless too low)
Jesse Keating 2f82dd
+ * (1<<3) 8: vdso just below .text of PT_INTERP (unless too low)
Jesse Keating 2f82dd
+ * are ignored because the vdso is placed completely randomly
Jesse Keating 2f82dd
+ */
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+static int __init setup_exec_shield(char *str)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	get_option(&str, &exec_shield);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	return 1;
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
+__setup("exec-shield=", setup_exec_shield);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 #ifdef CONFIG_RCU_TORTURE_TEST
Jesse Keating 2f82dd
 extern int rcutorture_runnable;
Jesse Keating 2f82dd
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
Jesse Keating 2f82dd
@@ -408,6 +428,14 @@ static struct ctl_table kern_table[] = {
Jesse Keating 2f82dd
 		.proc_handler	= &proc_dointvec,
Jesse Keating 2f82dd
 	},
Jesse Keating 2f82dd
 	{
Jesse Keating 2f82dd
+		.ctl_name	= CTL_UNNUMBERED,
Jesse Keating 2f82dd
+		.procname	= "exec-shield",
Jesse Keating 2f82dd
+		.data		= &exec_shield,
Jesse Keating 2f82dd
+		.maxlen		= sizeof(int),
Jesse Keating 2f82dd
+		.mode		= 0644,
Jesse Keating 2f82dd
+		.proc_handler	= &proc_dointvec,
Jesse Keating 2f82dd
+	},
Jesse Keating 2f82dd
+	{
Jesse Keating 2f82dd
 		.ctl_name	= KERN_CORE_USES_PID,
Jesse Keating 2f82dd
 		.procname	= "core_uses_pid",
Jesse Keating 2f82dd
 		.data		= &core_uses_pid,
Jesse Keating 2f82dd
diff --git a/mm/mmap.c b/mm/mmap.c
Jesse Keating 2f82dd
index 73f5e4b..814b95f 100644
Jesse Keating 2f82dd
--- a/mm/mmap.c
Jesse Keating 2f82dd
+++ b/mm/mmap.c
Jesse Keating 2f82dd
@@ -29,6 +29,7 @@
Jesse Keating 2f82dd
 #include <linux rmap.h="">
Jesse Keating 2f82dd
 #include <linux mmu_notifier.h="">
Jesse Keating 2f82dd
 #include <linux perf_event.h="">
Jesse Keating 2f82dd
+#include <linux random.h="">
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 #include <asm uaccess.h="">
Jesse Keating 2f82dd
 #include <asm cacheflush.h="">
Jesse Keating 2f82dd
@@ -45,6 +46,18 @@
Jesse Keating 2f82dd
 #define arch_rebalance_pgtables(addr, len)		(addr)
Jesse Keating 2f82dd
 #endif
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+/* No sane architecture will #define these to anything else */
Jesse Keating 2f82dd
+#ifndef arch_add_exec_range
Jesse Keating 2f82dd
+#define arch_add_exec_range(mm, limit)	do { ; } while (0)
Jesse Keating 2f82dd
+#endif
Jesse Keating 2f82dd
+#ifndef arch_flush_exec_range
Jesse Keating 2f82dd
+#define arch_flush_exec_range(mm)	do { ; } while (0)
Jesse Keating 2f82dd
+#endif
Jesse Keating 2f82dd
+#ifndef arch_remove_exec_range
Jesse Keating 2f82dd
+#define arch_remove_exec_range(mm, limit)	do { ; } while (0)
Jesse Keating 2f82dd
+#endif
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 static void unmap_region(struct mm_struct *mm,
Jesse Keating 2f82dd
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
Jesse Keating 2f82dd
 		unsigned long start, unsigned long end);
Chuck Ebbert 4db245
@@ -389,6 +402,9 @@ static inline void
Jesse Keating 2f82dd
 {
Chuck Ebbert 4db245
 	struct vm_area_struct *next;
Chuck Ebbert 4db245
 
Jesse Keating 2f82dd
+	if (vma->vm_flags & VM_EXEC)
Jesse Keating 2f82dd
+		arch_add_exec_range(mm, vma->vm_end);
Chuck Ebbert 4db245
+
Chuck Ebbert 4db245
 	vma->vm_prev = prev;
Jesse Keating 2f82dd
 	if (prev) {
Chuck Ebbert 4db245
 		next = prev->vm_next;
Jesse Keating 2f82dd
@@ -491,6 +506,8 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
Jesse Keating 2f82dd
 	rb_erase(&vma->vm_rb, &mm->mm_rb);
Jesse Keating 2f82dd
 	if (mm->mmap_cache == vma)
Jesse Keating 2f82dd
 		mm->mmap_cache = prev;
Jesse Keating 2f82dd
+	if (vma->vm_flags & VM_EXEC)
Jesse Keating 2f82dd
+		arch_remove_exec_range(mm, vma->vm_end);
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 /*
Jesse Keating 2f82dd
@@ -798,6 +815,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
Jesse Keating 2f82dd
 		} else					/* cases 2, 5, 7 */
Jesse Keating 2f82dd
 			vma_adjust(prev, prev->vm_start,
Jesse Keating 2f82dd
 				end, prev->vm_pgoff, NULL);
Jesse Keating 2f82dd
+		if (prev->vm_flags & VM_EXEC)
Jesse Keating 2f82dd
+			arch_add_exec_range(mm, prev->vm_end);
Jesse Keating 2f82dd
 		return prev;
Jesse Keating 2f82dd
 	}
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
@@ -970,7 +989,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
Jesse Keating 2f82dd
 	/* Obtain the address to map to. we verify (or select) it and ensure
Jesse Keating 2f82dd
 	 * that it represents a valid section of the address space.
Jesse Keating 2f82dd
 	 */
Jesse Keating 2f82dd
-	addr = get_unmapped_area(file, addr, len, pgoff, flags);
Jesse Keating 2f82dd
+	addr = get_unmapped_area_prot(file, addr, len, pgoff, flags,
Jesse Keating 2f82dd
+		prot & PROT_EXEC);
Jesse Keating 2f82dd
 	if (addr & ~PAGE_MASK)
Jesse Keating 2f82dd
 		return addr;
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
@@ -1453,21 +1473,25 @@ void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 unsigned long
Jesse Keating 2f82dd
-get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
Jesse Keating 2f82dd
-		unsigned long pgoff, unsigned long flags)
Jesse Keating 2f82dd
+get_unmapped_area_prot(struct file *file, unsigned long addr, unsigned long len,
Jesse Keating 2f82dd
+		unsigned long pgoff, unsigned long flags, int exec)
Jesse Keating 2f82dd
 {
Jesse Keating 2f82dd
 	unsigned long (*get_area)(struct file *, unsigned long,
Jesse Keating 2f82dd
 				  unsigned long, unsigned long, unsigned long);
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 	unsigned long error = arch_mmap_check(addr, len, flags);
Jesse Keating 2f82dd
 	if (error)
Jesse Keating 2f82dd
 		return error;
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 	/* Careful about overflows.. */
Jesse Keating 2f82dd
 	if (len > TASK_SIZE)
Jesse Keating 2f82dd
 		return -ENOMEM;
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
-	get_area = current->mm->get_unmapped_area;
Jesse Keating 2f82dd
+	if (exec && current->mm->get_unmapped_exec_area)
Jesse Keating 2f82dd
+		get_area = current->mm->get_unmapped_exec_area;
Jesse Keating 2f82dd
+	else
Jesse Keating 2f82dd
+		get_area = current->mm->get_unmapped_area;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 	if (file && file->f_op && file->f_op->get_unmapped_area)
Jesse Keating 2f82dd
 		get_area = file->f_op->get_unmapped_area;
Jesse Keating 2f82dd
 	addr = get_area(file, addr, len, pgoff, flags);
Jesse Keating 2f82dd
@@ -1473,8 +1497,76 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 	return arch_rebalance_pgtables(addr, len);
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
+EXPORT_SYMBOL(get_unmapped_area_prot);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+#define SHLIB_BASE	0x00110000
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+unsigned long
Jesse Keating 2f82dd
+arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0,
Jesse Keating 2f82dd
+		unsigned long len0, unsigned long pgoff, unsigned long flags)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	unsigned long addr = addr0, len = len0;
Jesse Keating 2f82dd
+	struct mm_struct *mm = current->mm;
Jesse Keating 2f82dd
+	struct vm_area_struct *vma;
Jesse Keating 2f82dd
+	unsigned long tmp;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	if (len > TASK_SIZE)
Jesse Keating 2f82dd
+		return -ENOMEM;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	if (flags & MAP_FIXED)
Jesse Keating 2f82dd
+		return addr;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	if (!addr)
Jesse Keating 2f82dd
+		addr = randomize_range(SHLIB_BASE, 0x01000000, len);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	if (addr) {
Jesse Keating 2f82dd
+		addr = PAGE_ALIGN(addr);
Jesse Keating 2f82dd
+		vma = find_vma(mm, addr);
Jesse Keating 2f82dd
+		if (TASK_SIZE - len >= addr &&
Jesse Keating 2f82dd
+		    (!vma || addr + len <= vma->vm_start))
Jesse Keating 2f82dd
+			return addr;
Jesse Keating 2f82dd
+	}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+	addr = SHLIB_BASE;
Jesse Keating 2f82dd
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
Jesse Keating 2f82dd
+		/* At this point:  (!vma || addr < vma->vm_end). */
Jesse Keating 2f82dd
+		if (TASK_SIZE - len < addr)
Jesse Keating 2f82dd
+			return -ENOMEM;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+		if (!vma || addr + len <= vma->vm_start) {
Jesse Keating 2f82dd
+			/*
Jesse Keating 2f82dd
+			 * Must not let a PROT_EXEC mapping get into the
Jesse Keating 2f82dd
+			 * brk area:
Jesse Keating 2f82dd
+			 */
Jesse Keating 2f82dd
+			if (addr + len > mm->brk)
Jesse Keating 2f82dd
+				goto failed;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+			/*
Jesse Keating 2f82dd
+			 * Up until the brk area we randomize addresses
Jesse Keating 2f82dd
+			 * as much as possible:
Jesse Keating 2f82dd
+			 */
Jesse Keating 2f82dd
+			if (addr >= 0x01000000) {
Jesse Keating 2f82dd
+				tmp = randomize_range(0x01000000,
Jesse Keating 2f82dd
+					PAGE_ALIGN(max(mm->start_brk,
Jesse Keating 2f82dd
+					(unsigned long)0x08000000)), len);
Jesse Keating 2f82dd
+				vma = find_vma(mm, tmp);
Jesse Keating 2f82dd
+				if (TASK_SIZE - len >= tmp &&
Jesse Keating 2f82dd
+				    (!vma || tmp + len <= vma->vm_start))
Jesse Keating 2f82dd
+					return tmp;
Jesse Keating 2f82dd
+			}
Jesse Keating 2f82dd
+			/*
Jesse Keating 2f82dd
+			 * Ok, randomization didnt work out - return
Jesse Keating 2f82dd
+			 * the result of the linear search:
Jesse Keating 2f82dd
+			 */
Jesse Keating 2f82dd
+			return addr;
Jesse Keating 2f82dd
+		}
Jesse Keating 2f82dd
+		addr = vma->vm_end;
Jesse Keating 2f82dd
+	}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
+failed:
Jesse Keating 2f82dd
+	return current->mm->get_unmapped_area(filp, addr0, len0, pgoff, flags);
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
-EXPORT_SYMBOL(get_unmapped_area);
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
Jesse Keating 2f82dd
 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
Jesse Keating 2f82dd
@@ -1549,6 +1641,14 @@ out:
Jesse Keating 2f82dd
 	return prev ? prev->vm_next : vma;
Jesse Keating 2f82dd
 }
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+static int over_stack_limit(unsigned long sz)
Jesse Keating 2f82dd
+{
Jesse Keating 2f82dd
+	if (sz < EXEC_STACK_BIAS)
Jesse Keating 2f82dd
+		return 0;
Jesse Keating 2f82dd
+	return (sz - EXEC_STACK_BIAS) >
Jesse Keating 2f82dd
+			current->signal->rlim[RLIMIT_STACK].rlim_cur;
Jesse Keating 2f82dd
+}
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 /*
Jesse Keating 2f82dd
  * Verify that the stack growth is acceptable and
Jesse Keating 2f82dd
  * update accounting. This is shared with both the
Jesse Keating 2f82dd
@@ -1565,7 +1665,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
Jesse Keating 2f82dd
 		return -ENOMEM;
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 	/* Stack limit test */
Jesse Keating 2f82dd
-	if (size > rlim[RLIMIT_STACK].rlim_cur)
Jesse Keating 2f82dd
+	if (over_stack_limit(size))
Jesse Keating 2f82dd
 		return -ENOMEM;
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 	/* mlock limit tests */
Jesse Keating 2f82dd
@@ -1875,10 +1975,14 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
Jesse Keating 2f82dd
 	if (new->vm_ops && new->vm_ops->open)
Jesse Keating 2f82dd
 		new->vm_ops->open(new);
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
-	if (new_below)
Jesse Keating 2f82dd
+	if (new_below) {
Jesse Keating 2f82dd
+		unsigned long old_end = vma->vm_end;
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 		vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
Jesse Keating 2f82dd
 			((addr - new->vm_start) >> PAGE_SHIFT), new);
Jesse Keating 2f82dd
-	else
Jesse Keating 2f82dd
+		if (vma->vm_flags & VM_EXEC)
Jesse Keating 2f82dd
+			arch_remove_exec_range(mm, old_end);
Jesse Keating 2f82dd
+	} else
Jesse Keating 2f82dd
 		vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 	return 0;
Jesse Keating 2f82dd
@@ -2128,6 +2232,7 @@ void exit_mmap(struct mm_struct *mm)
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
Jesse Keating 2f82dd
 	tlb_finish_mmu(tlb, 0, end);
Jesse Keating 2f82dd
+	arch_flush_exec_range(mm);
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
 	/*
Jesse Keating 2f82dd
 	 * Walk the list again, actually closing and freeing it,
Jesse Keating 2f82dd
diff --git a/mm/mprotect.c b/mm/mprotect.c
Jesse Keating 2f82dd
index 8bc969d..3c9b4fc 100644
Jesse Keating 2f82dd
--- a/mm/mprotect.c
Jesse Keating 2f82dd
+++ b/mm/mprotect.c
Jesse Keating 2f82dd
@@ -26,9 +26,14 @@
Jesse Keating 2f82dd
 #include <linux perf_event.h="">
Jesse Keating 2f82dd
 #include <asm uaccess.h="">
Jesse Keating 2f82dd
 #include <asm pgtable.h="">
Jesse Keating 2f82dd
+#include <asm pgalloc.h="">
Jesse Keating 2f82dd
 #include <asm cacheflush.h="">
Jesse Keating 2f82dd
 #include <asm tlbflush.h="">
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+#ifndef arch_remove_exec_range
Jesse Keating 2f82dd
+#define arch_remove_exec_range(mm, limit)      do { ; } while (0)
Jesse Keating 2f82dd
+#endif
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 #ifndef pgprot_modify
Jesse Keating 2f82dd
 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
Jesse Keating 2f82dd
 {
Jesse Keating 2f82dd
@@ -139,7 +144,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
Jesse Keating 2f82dd
 	struct mm_struct *mm = vma->vm_mm;
Jesse Keating 2f82dd
 	unsigned long oldflags = vma->vm_flags;
Jesse Keating 2f82dd
 	long nrpages = (end - start) >> PAGE_SHIFT;
Jesse Keating 2f82dd
-	unsigned long charged = 0;
Jesse Keating 2f82dd
+	unsigned long charged = 0, old_end = vma->vm_end;
Jesse Keating 2f82dd
 	pgoff_t pgoff;
Jesse Keating 2f82dd
 	int error;
Jesse Keating 2f82dd
 	int dirty_accountable = 0;
Jesse Keating 2f82dd
@@ -204,6 +209,9 @@ success:
Jesse Keating 2f82dd
 		dirty_accountable = 1;
Jesse Keating 2f82dd
 	}
Jesse Keating 2f82dd
 
Jesse Keating 2f82dd
+	if (oldflags & VM_EXEC)
Jesse Keating 2f82dd
+		arch_remove_exec_range(current->mm, old_end);
Jesse Keating 2f82dd
+
Jesse Keating 2f82dd
 	mmu_notifier_invalidate_range_start(mm, start, end);
Jesse Keating 2f82dd
 	if (is_vm_hugetlb_page(vma))
Jesse Keating 2f82dd
 		hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
Jesse Keating 2f82dd
diff --git a/mm/mremap.c b/mm/mremap.c
Jesse Keating 2f82dd
index 97bff25..17a9fd7 100644
Jesse Keating 2f82dd
--- a/mm/mremap.c
Jesse Keating 2f82dd
+++ b/mm/mremap.c
Jesse Keating 2f82dd
@@ -414,10 +414,10 @@ unsigned long do_mremap(unsigned long addr,
Jesse Keating 2f82dd
		if (vma->vm_flags & VM_MAYSHARE)
Jesse Keating 2f82dd
			map_flags |= MAP_SHARED;
Jesse Keating 2f82dd
Jesse Keating 2f82dd
-		new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
Jesse Keating 2f82dd
+		new_addr = get_unmapped_area_prot(vma->vm_file, 0, new_len,
Jesse Keating 2f82dd
 					vma->vm_pgoff +
Jesse Keating 2f82dd
 					((addr - vma->vm_start) >> PAGE_SHIFT),
Jesse Keating 2f82dd
-					map_flags);
Jesse Keating 2f82dd
+					map_flags, vma->vm_flags & VM_EXEC);
Jesse Keating 2f82dd
		if (new_addr & ~PAGE_MASK) {
Jesse Keating 2f82dd
			ret = new_addr;
Jesse Keating 2f82dd
			goto out;