Index: linux-2.6.10/arch/i386/kernel/asm-offsets.c =================================================================== --- linux-2.6.10.orig/arch/i386/kernel/asm-offsets.c 2004-12-25 05:34:31.000000000 +0800 +++ linux-2.6.10/arch/i386/kernel/asm-offsets.c 2005-04-05 16:34:18.173220992 +0800 @@ -52,6 +52,7 @@ OFFSET(TI_preempt_count, thread_info, preempt_count); OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_restart_block, thread_info, restart_block); + OFFSET(TI_sysenter_return, thread_info, sysenter_return); BLANK(); OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); Index: linux-2.6.10/arch/i386/kernel/cpu/common.c =================================================================== --- linux-2.6.10.orig/arch/i386/kernel/cpu/common.c 2004-12-25 05:33:50.000000000 +0800 +++ linux-2.6.10/arch/i386/kernel/cpu/common.c 2005-04-05 16:34:18.174220840 +0800 @@ -384,6 +384,12 @@ if (disable_pse) clear_bit(X86_FEATURE_PSE, c->x86_capability); + /* hack: disable SEP for non-NX cpus; SEP breaks Execshield. */ + #ifdef CONFIG_HIGHMEM64G + if (!test_bit(X86_FEATURE_NX, c->x86_capability)) + #endif + clear_bit(X86_FEATURE_SEP, c->x86_capability); + /* If the model name is still unset, do table lookup. */ if ( !c->x86_model_id[0] ) { char *p; Index: linux-2.6.10/arch/i386/kernel/entry.S =================================================================== --- linux-2.6.10.orig/arch/i386/kernel/entry.S 2005-04-05 16:29:30.192000792 +0800 +++ linux-2.6.10/arch/i386/kernel/entry.S 2005-04-05 16:34:18.167221904 +0800 @@ -218,8 +218,12 @@ pushl %ebp pushfl pushl $(__USER_CS) - pushl $SYSENTER_RETURN - + /* + * Push current_thread_info()->sysenter_return to the stack. + * A tiny bit of offset fixup is necessary - 4*4 means the 4 words + * pushed above, and the word being pushed now: + */ + pushl (TI_sysenter_return-THREAD_SIZE+4*4)(%esp) /* * Load the potential sixth argument from user stack. * Careful about security. Index: linux-2.6.10/arch/i386/kernel/process.c =================================================================== --- linux-2.6.10.orig/arch/i386/kernel/process.c 2004-12-25 05:33:47.000000000 +0800 +++ linux-2.6.10/arch/i386/kernel/process.c 2005-04-05 16:34:18.173220992 +0800 @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -565,6 +567,8 @@ /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ __unlazy_fpu(prev_p); + if (next_p->mm) + load_user_cs_desc(cpu, next_p->mm); /* * Reload esp0, LDT and the page table pointer: @@ -812,3 +816,62 @@ return 0; } + +unsigned long arch_align_stack(unsigned long sp) +{ + if (current->flags & PF_RELOCEXEC) + sp -= ((get_random_int() % 65536) << 4); + return sp & ~0xf; +} + + +void arch_add_exec_range(struct mm_struct *mm, unsigned long limit) +{ + if (limit > mm->context.exec_limit) { + mm->context.exec_limit = limit; + set_user_cs(&mm->context.user_cs, limit); + if (mm == current->mm) + load_user_cs_desc(smp_processor_id(), mm); + } +} + +void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end) +{ + struct vm_area_struct *vma; + unsigned long limit = 0; + + if (old_end == mm->context.exec_limit) { + for (vma = mm->mmap; vma; vma = vma->vm_next) + if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit)) + limit = vma->vm_end; + + mm->context.exec_limit = limit; + set_user_cs(&mm->context.user_cs, limit); + if (mm == current->mm) + load_user_cs_desc(smp_processor_id(), mm); + } +} + +void arch_flush_exec_range(struct mm_struct *mm) +{ + mm->context.exec_limit = 0; + set_user_cs(&mm->context.user_cs, 0); +} + +/* + * Generate random brk address between 128MB and 196MB. (if the layout + * allows it.) + */ +void randomize_brk(unsigned long old_brk) +{ + unsigned long new_brk, range_start, range_end; + + range_start = 0x08000000; + if (current->mm->brk >= range_start) + range_start = current->mm->brk; + range_end = range_start + 0x02000000; + new_brk = randomize_range(range_start, range_end, 0); + if (new_brk) + current->mm->brk = new_brk; +} + Index: linux-2.6.10/arch/i386/kernel/signal.c =================================================================== --- linux-2.6.10.orig/arch/i386/kernel/signal.c 2005-04-05 16:29:23.290050048 +0800 +++ linux-2.6.10/arch/i386/kernel/signal.c 2005-04-05 16:34:18.170221448 +0800 @@ -390,7 +390,7 @@ if (err) goto give_sigsegv; - restorer = &__kernel_sigreturn; + restorer = current->mm->context.vdso + (long)&__kernel_sigreturn; if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; @@ -487,9 +487,10 @@ goto give_sigsegv; /* Set up to return from userspace. */ - restorer = &__kernel_rt_sigreturn; + restorer = current->mm->context.vdso + (long)&__kernel_rt_sigreturn; if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; + err |= __put_user(restorer, &frame->pretcode); /* Index: linux-2.6.10/arch/i386/kernel/smp.c =================================================================== --- linux-2.6.10.orig/arch/i386/kernel/smp.c 2005-04-05 16:29:30.198999728 +0800 +++ linux-2.6.10/arch/i386/kernel/smp.c 2005-04-05 16:34:18.172221144 +0800 @@ -22,6 +22,7 @@ #include #include +#include #include /* @@ -313,6 +314,8 @@ unsigned long cpu; cpu = get_cpu(); + if (current->active_mm) + load_user_cs_desc(cpu, current->active_mm); if (!cpu_isset(cpu, flush_cpumask)) goto out; Index: linux-2.6.10/arch/i386/kernel/sysenter.c =================================================================== --- linux-2.6.10.orig/arch/i386/kernel/sysenter.c 2004-12-25 05:35:40.000000000 +0800 +++ linux-2.6.10/arch/i386/kernel/sysenter.c 2005-04-05 16:34:18.171221296 +0800 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -41,11 +42,14 @@ extern const char vsyscall_int80_start, vsyscall_int80_end; extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; +struct page *sysenter_page; + static int __init sysenter_setup(void) { void *page = (void *)get_zeroed_page(GFP_ATOMIC); - __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); + __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_KERNEL_RO); + sysenter_page = virt_to_page(page); if (!boot_cpu_has(X86_FEATURE_SEP)) { memcpy(page, @@ -59,7 +63,51 @@ &vsyscall_sysenter_end - &vsyscall_sysenter_start); on_each_cpu(enable_sep_cpu, NULL, 1, 1); + return 0; } __initcall(sysenter_setup); + +extern void SYSENTER_RETURN_OFFSET; + +unsigned int vdso_enabled = 0; + +void map_vsyscall(void) +{ + struct thread_info *ti = current_thread_info(); + struct vm_area_struct *vma; + unsigned long addr; + + if (unlikely(!vdso_enabled)) { + current->mm->context.vdso = NULL; + return; + } + + /* + * Map the vDSO (it will be randomized): + */ + down_write(¤t->mm->mmap_sem); + addr = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, 0); + current->mm->context.vdso = (void *)addr; + ti->sysenter_return = (void *)addr + (long)&SYSENTER_RETURN_OFFSET; + if (addr != -1) { + vma = find_vma(current->mm, addr); + if (vma) { + pgprot_val(vma->vm_page_prot) &= ~_PAGE_RW; + get_page(sysenter_page); + install_page(current->mm, vma, addr, + sysenter_page, vma->vm_page_prot); + + } + } + up_write(¤t->mm->mmap_sem); +} + +static int __init vdso_setup(char *str) +{ + vdso_enabled = simple_strtoul(str, NULL, 0); + return 1; +} +__setup("vdso=", vdso_setup); + Index: linux-2.6.10/arch/i386/kernel/traps.c =================================================================== --- linux-2.6.10.orig/arch/i386/kernel/traps.c 2005-04-05 16:29:30.193000640 +0800 +++ linux-2.6.10/arch/i386/kernel/traps.c 2005-04-05 16:43:17.073295728 +0800 @@ -497,6 +497,10 @@ DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) +/* + * the original non-exec stack patch was written by + * Solar Designer . Thanks! + */ fastcall void do_general_protection(struct pt_regs * regs, long error_code) { int cpu = get_cpu(); @@ -535,6 +539,46 @@ if (!(regs->xcs & 3)) goto gp_in_kernel; + /* + * lazy-check for CS validity on exec-shield binaries: + */ + if (current->mm) { + int cpu = smp_processor_id(); + struct desc_struct *desc1, *desc2; + struct vm_area_struct *vma; + unsigned long limit = 0; + + spin_lock(¤t->mm->page_table_lock); + for (vma = current->mm->mmap; vma; vma = vma->vm_next) + if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit)) + limit = vma->vm_end; + spin_unlock(¤t->mm->page_table_lock); + + current->mm->context.exec_limit = limit; + set_user_cs(¤t->mm->context.user_cs, limit); + + desc1 = ¤t->mm->context.user_cs; + desc2 = per_cpu(cpu_gdt_table, cpu) + GDT_ENTRY_DEFAULT_USER_CS; + + /* + * The CS was not in sync - reload it and retry the + * instruction. If the instruction still faults then + * we wont hit this branch next time around. + */ + if (desc1->a != desc2->a || desc1->b != desc2->b) { + if (print_fatal_signals >= 2) { + printk("#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n", error_code, error_code/8, regs->eip, smp_processor_id()); + printk(" exec_limit: %08lx, user_cs: %08lx/%08lx, CPU_cs: %08lx/%08lx.\n", current->mm->context.exec_limit, desc1->a, desc1->b, desc2->a, desc2->b); + } + load_user_cs_desc(cpu, current->mm); + return; + } + } + if (print_fatal_signals) { + printk("#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n", error_code, error_code/8, regs->eip, smp_processor_id()); + printk(" exec_limit: %08lx, user_cs: %08lx/%08lx.\n", current->mm->context.exec_limit, current->mm->context.user_cs.a, current->mm->context.user_cs.b); + } + current->thread.error_code = error_code; current->thread.trap_no = 13; force_sig(SIGSEGV, current); Index: linux-2.6.10/arch/i386/kernel/vsyscall.lds.S =================================================================== --- linux-2.6.10.orig/arch/i386/kernel/vsyscall.lds.S 2004-12-25 05:34:31.000000000 +0800 +++ linux-2.6.10/arch/i386/kernel/vsyscall.lds.S 2005-04-05 16:34:18.169221600 +0800 @@ -7,7 +7,7 @@ SECTIONS { - . = VSYSCALL_BASE + SIZEOF_HEADERS; + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .dynsym : { *(.dynsym) } @@ -20,7 +20,7 @@ For the layouts to match, we need to skip more than enough space for the dynamic symbol table et al. If this amount is insufficient, ld -shared will barf. Just increase it here. */ - . = VSYSCALL_BASE + 0x400; + . = 0x400; .text : { *(.text) } :text =0x90909090 Index: linux-2.6.10/arch/i386/kernel/vsyscall-sysenter.S =================================================================== --- linux-2.6.10.orig/arch/i386/kernel/vsyscall-sysenter.S 2004-12-25 05:34:32.000000000 +0800 +++ linux-2.6.10/arch/i386/kernel/vsyscall-sysenter.S 2005-04-05 16:34:18.170221448 +0800 @@ -24,11 +24,11 @@ /* 7: align return point with nop's to make disassembly easier */ .space 7,0x90 - /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */ + /* 14: System call restart point is here! (SYSENTER_RETURN_OFFSET-2) */ jmp .Lenter_kernel /* 16: System call normal return point is here! */ - .globl SYSENTER_RETURN /* Symbol used by entry.S. */ -SYSENTER_RETURN: + .globl SYSENTER_RETURN_OFFSET /* Symbol used by sysenter.c */ +SYSENTER_RETURN_OFFSET: pop %ebp .Lpop_ebp: pop %edx Index: linux-2.6.10/arch/i386/mm/init.c =================================================================== --- linux-2.6.10.orig/arch/i386/mm/init.c 2005-04-05 16:29:28.016331544 +0800 +++ linux-2.6.10/arch/i386/mm/init.c 2005-04-05 16:34:18.167221904 +0800 @@ -518,7 +518,10 @@ set_nx(); if (nx_enabled) printk("NX (Execute Disable) protection: active\n"); + else #endif + if (exec_shield) + printk("Using x86 segment limits to approximate NX protection\n"); pagetable_init(); Index: linux-2.6.10/arch/i386/mm/mmap.c =================================================================== --- linux-2.6.10.orig/arch/i386/mm/mmap.c 2004-12-25 05:34:33.000000000 +0800 +++ linux-2.6.10/arch/i386/mm/mmap.c 2005-04-05 16:43:44.365146736 +0800 @@ -26,6 +26,7 @@ #include #include +#include /* * Top of mmap area (just below the process stack). @@ -38,13 +39,17 @@ static inline unsigned long mmap_base(struct mm_struct *mm) { unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + unsigned long random_factor = 0; + + if (current->flags & PF_RELOCEXEC) + random_factor = get_random_int() % (1024*1024); if (gap < MIN_GAP) gap = MIN_GAP; else if (gap > MAX_GAP) gap = MAX_GAP; - return TASK_SIZE - (gap & PAGE_MASK); + return PAGE_ALIGN(TASK_SIZE - gap - random_factor); } /* @@ -57,15 +62,17 @@ * Fall back to the standard layout if the personality * bit is set, or if the expected stack growth is unlimited: */ - if (sysctl_legacy_va_layout || + if ((exec_shield != 2) && (sysctl_legacy_va_layout || (current->personality & ADDR_COMPAT_LAYOUT) || - current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { + current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)){ mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(mm); mm->get_unmapped_area = arch_get_unmapped_area_topdown; + if (current->flags & PF_RELOCEXEC) + mm->get_unmapped_exec_area = arch_get_unmapped_exec_area; mm->unmap_area = arch_unmap_area_topdown; } } Index: linux-2.6.10/arch/ia64/ia32/binfmt_elf32.c =================================================================== --- linux-2.6.10.orig/arch/ia64/ia32/binfmt_elf32.c 2004-12-25 05:35:28.000000000 +0800 +++ linux-2.6.10/arch/ia64/ia32/binfmt_elf32.c 2005-04-05 16:34:18.174220840 +0800 @@ -272,7 +272,7 @@ } static unsigned long -elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) +elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused) { unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK; Index: linux-2.6.10/arch/x86_64/ia32/ia32_binfmt.c =================================================================== --- linux-2.6.10.orig/arch/x86_64/ia32/ia32_binfmt.c 2004-12-25 05:33:49.000000000 +0800 +++ linux-2.6.10/arch/x86_64/ia32/ia32_binfmt.c 2005-04-05 16:34:18.175220688 +0800 @@ -390,7 +390,7 @@ } static unsigned long -elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) +elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused) { unsigned long map_addr; struct task_struct *me = current; Index: linux-2.6.10/drivers/char/random.c =================================================================== --- linux-2.6.10.orig/drivers/char/random.c 2005-04-05 16:29:24.214909448 +0800 +++ linux-2.6.10/drivers/char/random.c 2005-04-05 16:34:18.197217344 +0800 @@ -2469,3 +2469,37 @@ } #endif #endif /* CONFIG_INET */ + +/* + * Get a random word: + */ +unsigned int get_random_int(void) +{ + unsigned int val = 0; + + if (!exec_shield_randomize) + return 0; + +#ifdef CONFIG_X86_HAS_TSC + rdtscl(val); +#endif + val += current->pid + jiffies + (int)val; + + /* + * Use IP's RNG. It suits our purpose perfectly: it re-keys itself + * every second, from the entropy pool (and thus creates a limited + * drain on it), and uses halfMD4Transform within the second. We + * also spice it with the TSC (if available), jiffies, PID and the + * stack address: + */ + return secure_ip_id(val); +} + +unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len) +{ + unsigned long range = end - len - start; + if (end <= start + len) + return 0; + return PAGE_ALIGN(get_random_int() % range + start); +} + Index: linux-2.6.10/fs/binfmt_elf.c =================================================================== --- linux-2.6.10.orig/fs/binfmt_elf.c 2005-04-05 16:29:24.353888320 +0800 +++ linux-2.6.10/fs/binfmt_elf.c 2005-04-05 16:39:25.042569760 +0800 @@ -494,7 +494,7 @@ unsigned long reloc_func_desc = 0; char passed_fileno[6]; struct files_struct *files; - int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT; + int have_pt_gnu_stack, relocexec, executable_stack = EXSTACK_DEFAULT; unsigned long def_flags = 0; struct { struct elfhdr elf_ex; @@ -660,6 +660,24 @@ } have_pt_gnu_stack = (i < loc->elf_ex.e_phnum); + relocexec = 0; + + if (current->personality == PER_LINUX) + switch (exec_shield) { + case 1: + if (executable_stack == EXSTACK_DISABLE_X) { + current->flags |= PF_RELOCEXEC; + relocexec = PF_RELOCEXEC; + } + break; + + case 2: + executable_stack = EXSTACK_DISABLE_X; + current->flags |= PF_RELOCEXEC; + relocexec = PF_RELOCEXEC; + break; + } + /* Some simple consistency checks for the interpreter */ if (elf_interpreter) { interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT; @@ -713,6 +731,15 @@ if (retval) goto out_free_dentry; + current->flags |= relocexec; +#ifdef __i386__ + /* + * Turn off the CS limit completely if exec-shield disabled or + * NX active: + */ + if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || nx_enabled) + arch_add_exec_range(current->mm, -1); +#endif /* Discard our unneeded old files struct */ if (files) { steal_locks(files); @@ -731,7 +758,8 @@ /* Do this immediately, since STACK_TOP as used in setup_arg_pages may depend on the personality. */ SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); - if (elf_read_implies_exec(loc->elf_ex, have_pt_gnu_stack)) + if (exec_shield != 2 && + elf_read_implies_exec(loc->elf_ex, have_pt_gnu_stack)) current->personality |= READ_IMPLIES_EXEC; arch_pick_mmap_layout(current->mm); @@ -894,6 +922,14 @@ set_binfmt(&elf_format); + /* + * Map the vsyscall trampoline. This address is then passed via + * AT_SYSINFO. + */ +#ifdef __HAVE_ARCH_VSYSCALL + map_vsyscall(); +#endif + compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT), Index: linux-2.6.10/fs/exec.c =================================================================== --- linux-2.6.10.orig/fs/exec.c 2005-04-05 16:29:30.270988784 +0800 +++ linux-2.6.10/fs/exec.c 2005-04-05 16:34:18.177220384 +0800 @@ -396,7 +396,12 @@ while (i < MAX_ARG_PAGES) bprm->page[i++] = NULL; #else +#ifdef __HAVE_ARCH_ALIGN_STACK + stack_base = arch_align_stack(STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE); + stack_base = PAGE_ALIGN(stack_base); +#else stack_base = STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE; +#endif bprm->p += stack_base; mm->arg_start = bprm->p; arg_size = STACK_TOP - (PAGE_MASK & (unsigned long) mm->arg_start); @@ -854,6 +859,7 @@ tcomm[i] = '\0'; set_task_comm(current, tcomm); + current->flags &= ~PF_RELOCEXEC; flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || Index: linux-2.6.10/fs/proc/array.c =================================================================== --- linux-2.6.10.orig/fs/proc/array.c 2004-12-25 05:35:00.000000000 +0800 +++ linux-2.6.10/fs/proc/array.c 2005-04-05 16:34:18.180219928 +0800 @@ -373,8 +373,12 @@ ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; read_unlock(&tasklist_lock); - if (!whole || num_threads<2) - wchan = get_wchan(task); + if (!whole || num_threads<2) { + wchan = 0; + if (current->uid == task->uid || current->euid == task->uid || + capable(CAP_SYS_NICE)) + wchan = get_wchan(task); + } if (!whole) { min_flt = task->min_flt; maj_flt = task->maj_flt; Index: linux-2.6.10/fs/proc/base.c =================================================================== --- linux-2.6.10.orig/fs/proc/base.c 2005-04-05 16:29:24.361887104 +0800 +++ linux-2.6.10/fs/proc/base.c 2005-04-05 16:34:18.179220080 +0800 @@ -117,7 +117,7 @@ E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), - E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), + E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUSR), E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), @@ -142,7 +142,7 @@ E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), - E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), + E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUSR), E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), Index: linux-2.6.10/fs/proc/task_mmu.c =================================================================== --- linux-2.6.10.orig/fs/proc/task_mmu.c 2004-12-25 05:34:01.000000000 +0800 +++ linux-2.6.10/fs/proc/task_mmu.c 2005-04-05 16:41:11.796340720 +0800 @@ -14,19 +14,27 @@ buffer += sprintf(buffer, "VmSize:\t%8lu kB\n" "VmLck:\t%8lu kB\n" - "VmRSS:\t%8lu kB\n" - "VmData:\t%8lu kB\n" - "VmStk:\t%8lu kB\n" - "VmExe:\t%8lu kB\n" - "VmLib:\t%8lu kB\n" - "VmPTE:\t%8lu kB\n", - (mm->total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), - mm->locked_vm << (PAGE_SHIFT-10), - mm->rss << (PAGE_SHIFT-10), - data << (PAGE_SHIFT-10), - mm->stack_vm << (PAGE_SHIFT-10), text, lib, - (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10); - return buffer; + "VmData:\t%8lu kB\n" + "VmStk:\t%8lu kB\n" + "VmExe:\t%8lu kB\n" + "VmLib:\t%8lu kB\n" + "VmPTE:\t%8lu kB\n" + "StaBrk:\t%08lx kB\n" + "Brk:\t%08lx kB\n" + "StaStk:\t%08lx kB\n" , + (mm->total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), + mm->locked_vm << (PAGE_SHIFT-10), + mm->rss << (PAGE_SHIFT-10), + data << (PAGE_SHIFT-10), + mm->stack_vm << (PAGE_SHIFT-10), text, lib, + (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10, + mm->start_brk, mm->brk, mm->start_stack); +#if __i386__ + if (!nx_enabled) + buffer += sprintf(buffer, + "ExecLim:\t%08lx\n", mm->context.exec_limit); +#endif + return buffer; } unsigned long task_vsize(struct mm_struct *mm) @@ -47,6 +55,9 @@ static int show_map(struct seq_file *m, void *v) { +#ifdef __i386__ + struct task_struct *task = m->private; +#endif struct vm_area_struct *map = v; struct file *file = map->vm_file; int flags = map->vm_flags; @@ -65,7 +76,13 @@ map->vm_end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', - flags & VM_EXEC ? 'x' : '-', + (flags & VM_EXEC +#ifdef __i386__ + || (!nx_enabled && + (map->vm_start < task->mm->context.exec_limit)) +#endif + ) + ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', map->vm_pgoff << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); Index: linux-2.6.10/include/asm-i386/desc.h =================================================================== --- linux-2.6.10.orig/include/asm-i386/desc.h 2005-04-05 16:29:30.129010368 +0800 +++ linux-2.6.10/include/asm-i386/desc.h 2005-04-05 16:34:18.188218712 +0800 @@ -129,6 +129,20 @@ extern int __modify_ldt(struct mm_struct * mm, int func, void __user *ptr, unsigned long bytecount); +static inline void set_user_cs(struct desc_struct *desc, unsigned long limit) +{ + limit = (limit - 1) / PAGE_SIZE; + desc->a = limit & 0xffff; + desc->b = (limit & 0xf0000) | 0x00c0fb00; +} + +#define load_user_cs_desc(cpu, mm) \ + per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs + +extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit); +extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit); +extern void arch_flush_exec_range(struct mm_struct *mm); + #endif /* !__ASSEMBLY__ */ #endif Index: linux-2.6.10/include/asm-i386/elf.h =================================================================== --- linux-2.6.10.orig/include/asm-i386/elf.h 2004-12-25 05:35:15.000000000 +0800 +++ linux-2.6.10/include/asm-i386/elf.h 2005-04-05 16:34:18.188218712 +0800 @@ -9,6 +9,7 @@ #include #include #include /* for savesegment */ +#include #include @@ -133,15 +134,22 @@ #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs) #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs) -#define VSYSCALL_BASE (__fix_to_virt(FIX_VSYSCALL)) -#define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE) -#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall) extern void __kernel_vsyscall; +#define VSYSCALL_BASE ((unsigned long)current->mm->context.vdso) +#define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE) +#define VSYSCALL_OFFSET ((unsigned long) &__kernel_vsyscall) +#define VSYSCALL_ENTRY (VSYSCALL_BASE + VSYSCALL_OFFSET) -#define ARCH_DLINFO \ -do { \ - NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \ +/* kernel-internal fixmap address: */ +#define __VSYSCALL_BASE (__fix_to_virt(FIX_VSYSCALL)) +#define __VSYSCALL_EHDR ((const struct elfhdr *) __VSYSCALL_BASE) + +#define ARCH_DLINFO \ +do { \ + if (VSYSCALL_BASE) { \ + NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \ + } \ } while (0) /* @@ -152,15 +160,15 @@ * Dumping its extra ELF program headers includes all the other information * a debugger needs to easily find how the vsyscall DSO was being used. */ -#define ELF_CORE_EXTRA_PHDRS (VSYSCALL_EHDR->e_phnum) +#define ELF_CORE_EXTRA_PHDRS (__VSYSCALL_EHDR->e_phnum) #define ELF_CORE_WRITE_EXTRA_PHDRS \ do { \ const struct elf_phdr *const vsyscall_phdrs = \ - (const struct elf_phdr *) (VSYSCALL_BASE \ - + VSYSCALL_EHDR->e_phoff); \ + (const struct elf_phdr *) (__VSYSCALL_BASE \ + + __VSYSCALL_EHDR->e_phoff); \ int i; \ Elf32_Off ofs = 0; \ - for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ + for (i = 0; i < __VSYSCALL_EHDR->e_phnum; ++i) { \ struct elf_phdr phdr = vsyscall_phdrs[i]; \ if (phdr.p_type == PT_LOAD) { \ BUG_ON(ofs != 0); \ @@ -178,10 +186,10 @@ #define ELF_CORE_WRITE_EXTRA_DATA \ do { \ const struct elf_phdr *const vsyscall_phdrs = \ - (const struct elf_phdr *) (VSYSCALL_BASE \ - + VSYSCALL_EHDR->e_phoff); \ + (const struct elf_phdr *) (__VSYSCALL_BASE \ + + __VSYSCALL_EHDR->e_phoff); \ int i; \ - for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ + for (i = 0; i < __VSYSCALL_EHDR->e_phnum; ++i) { \ if (vsyscall_phdrs[i].p_type == PT_LOAD) \ DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \ PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \ @@ -190,4 +198,10 @@ #endif +#define __HAVE_ARCH_RANDOMIZE_BRK +extern void randomize_brk(unsigned long old_brk); + +#define __HAVE_ARCH_VSYSCALL +extern void map_vsyscall(void); + #endif Index: linux-2.6.10/include/asm-i386/mmu.h =================================================================== --- linux-2.6.10.orig/include/asm-i386/mmu.h 2004-12-25 05:35:00.000000000 +0800 +++ linux-2.6.10/include/asm-i386/mmu.h 2005-04-05 16:34:18.189218560 +0800 @@ -7,11 +7,17 @@ * we put the segment information here. * * cpu_vm_mask is used to optimize ldt flushing. + * + * exec_limit is used to track the range PROT_EXEC + * mappings span. */ typedef struct { int size; struct semaphore sem; void *ldt; + struct desc_struct user_cs; + unsigned long exec_limit; + void *vdso; } mm_context_t; #endif Index: linux-2.6.10/include/asm-i386/pgalloc.h =================================================================== --- linux-2.6.10.orig/include/asm-i386/pgalloc.h 2004-12-25 05:33:50.000000000 +0800 +++ linux-2.6.10/include/asm-i386/pgalloc.h 2005-04-05 16:34:18.190218408 +0800 @@ -4,6 +4,7 @@ #include #include #include +#include #include #include /* for struct page */ Index: linux-2.6.10/include/asm-i386/processor.h =================================================================== --- linux-2.6.10.orig/include/asm-i386/processor.h 2004-12-25 05:33:50.000000000 +0800 +++ linux-2.6.10/include/asm-i386/processor.h 2005-04-05 16:34:18.189218560 +0800 @@ -296,7 +296,10 @@ /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) +#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE/3) + +#define __HAVE_ARCH_ALIGN_STACK +extern unsigned long arch_align_stack(unsigned long sp); #define HAVE_ARCH_PICK_MMAP_LAYOUT @@ -478,6 +481,7 @@ regs->xcs = __USER_CS; \ regs->eip = new_eip; \ regs->esp = new_esp; \ + load_user_cs_desc(smp_processor_id(), current->mm); \ } while (0) /* Forward declaration, a strange C thing */ Index: linux-2.6.10/include/asm-i386/thread_info.h =================================================================== --- linux-2.6.10.orig/include/asm-i386/thread_info.h 2005-04-05 16:29:30.127010672 +0800 +++ linux-2.6.10/include/asm-i386/thread_info.h 2005-04-05 16:34:18.190218408 +0800 @@ -38,6 +38,7 @@ 0-0xBFFFFFFF for user-thead 0-0xFFFFFFFF for kernel-thread */ + void *sysenter_return; struct restart_block restart_block; unsigned long previous_esp; /* ESP of the previous stack in case Index: linux-2.6.10/include/asm-ia64/pgalloc.h =================================================================== --- linux-2.6.10.orig/include/asm-ia64/pgalloc.h 2004-12-25 05:33:49.000000000 +0800 +++ linux-2.6.10/include/asm-ia64/pgalloc.h 2005-04-05 16:34:18.184219320 +0800 @@ -23,6 +23,10 @@ #include #include +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + /* * Very stupidly, we used to get new pgd's and pmd's, init their contents * to point to the NULL versions of the next level page table, later on Index: linux-2.6.10/include/asm-ppc64/pgalloc.h =================================================================== --- linux-2.6.10.orig/include/asm-ppc64/pgalloc.h 2004-12-25 05:33:50.000000000 +0800 +++ linux-2.6.10/include/asm-ppc64/pgalloc.h 2005-04-05 16:34:18.185219168 +0800 @@ -11,6 +11,11 @@ extern kmem_cache_t *zero_cache; +/* Dummy functions since we don't support execshield on ppc */ +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License Index: linux-2.6.10/include/asm-ppc/pgalloc.h =================================================================== --- linux-2.6.10.orig/include/asm-ppc/pgalloc.h 2004-12-25 05:33:48.000000000 +0800 +++ linux-2.6.10/include/asm-ppc/pgalloc.h 2005-04-05 16:34:18.183219472 +0800 @@ -40,5 +40,10 @@ #define check_pgt_cache() do { } while (0) +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + + #endif /* _PPC_PGALLOC_H */ #endif /* __KERNEL__ */ Index: linux-2.6.10/include/asm-s390/pgalloc.h =================================================================== --- linux-2.6.10.orig/include/asm-s390/pgalloc.h 2004-12-25 05:35:00.000000000 +0800 +++ linux-2.6.10/include/asm-s390/pgalloc.h 2005-04-05 16:34:18.186219016 +0800 @@ -19,6 +19,10 @@ #include #include +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + #define check_pgt_cache() do {} while (0) extern void diag10(unsigned long addr); Index: linux-2.6.10/include/asm-sparc64/pgalloc.h =================================================================== --- linux-2.6.10.orig/include/asm-sparc64/pgalloc.h 2004-12-25 05:35:29.000000000 +0800 +++ linux-2.6.10/include/asm-sparc64/pgalloc.h 2005-04-05 16:34:18.187218864 +0800 @@ -261,4 +261,8 @@ #define pgd_free(pgd) free_pgd_fast(pgd) #define pgd_alloc(mm) get_pgd_fast() +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + #endif /* _SPARC64_PGALLOC_H */ Index: linux-2.6.10/include/asm-sparc/pgalloc.h =================================================================== --- linux-2.6.10.orig/include/asm-sparc/pgalloc.h 2004-12-25 05:33:51.000000000 +0800 +++ linux-2.6.10/include/asm-sparc/pgalloc.h 2005-04-05 16:34:18.191218256 +0800 @@ -66,4 +66,8 @@ #define pte_free(pte) BTFIXUP_CALL(pte_free)(pte) #define __pte_free_tlb(tlb, pte) pte_free(pte) +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + #endif /* _SPARC_PGALLOC_H */ Index: linux-2.6.10/include/asm-x86_64/pgalloc.h =================================================================== --- linux-2.6.10.orig/include/asm-x86_64/pgalloc.h 2004-12-25 05:34:57.000000000 +0800 +++ linux-2.6.10/include/asm-x86_64/pgalloc.h 2005-04-05 16:34:18.185219168 +0800 @@ -7,6 +7,11 @@ #include #include +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + + #define pmd_populate_kernel(mm, pmd, pte) \ set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))) #define pgd_populate(mm, pgd, pmd) \ Index: linux-2.6.10/include/linux/mm.h =================================================================== --- linux-2.6.10.orig/include/linux/mm.h 2005-04-05 16:29:30.250991824 +0800 +++ linux-2.6.10/include/linux/mm.h 2005-04-05 16:43:44.366146584 +0800 @@ -685,7 +685,14 @@ unsigned long addr, unsigned long len, pgoff_t pgoff); extern void exit_mmap(struct mm_struct *); -extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); +extern unsigned long get_unmapped_area_prot(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, int); + + +static inline unsigned long get_unmapped_area(struct file * file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + return get_unmapped_area_prot(file, addr, len, pgoff, flags, 0); +} extern unsigned long __do_mmap_pgoff(struct mm_struct *mm, struct file *file, unsigned long addr, unsigned long len, Index: linux-2.6.10/include/linux/random.h =================================================================== --- linux-2.6.10.orig/include/linux/random.h 2004-12-25 05:35:40.000000000 +0800 +++ linux-2.6.10/include/linux/random.h 2005-04-05 16:34:18.183219472 +0800 @@ -69,6 +69,9 @@ extern struct file_operations random_fops, urandom_fops; #endif +unsigned int get_random_int(void); +unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len); + #endif /* __KERNEL___ */ #endif /* _LINUX_RANDOM_H */ Index: linux-2.6.10/include/linux/resource.h =================================================================== --- linux-2.6.10.orig/include/linux/resource.h 2004-12-25 05:33:52.000000000 +0800 +++ linux-2.6.10/include/linux/resource.h 2005-04-05 16:34:18.182219624 +0800 @@ -52,8 +52,11 @@ /* * Limit the stack by to some sane default: root can always * increase this limit if needed.. 8MB seems reasonable. + * + * (2MB more to cover randomization effects.) */ -#define _STK_LIM (8*1024*1024) +#define _STK_LIM (10*1024*1024) +#define EXEC_STACK_BIAS (2*1024*1024) /* * GPG wants 32kB of mlocked memory, to make sure pass phrases Index: linux-2.6.10/include/linux/sched.h =================================================================== --- linux-2.6.10.orig/include/linux/sched.h 2005-04-05 16:29:27.971338384 +0800 +++ linux-2.6.10/include/linux/sched.h 2005-04-05 16:43:44.367146432 +0800 @@ -32,6 +32,9 @@ #include struct exec_domain; +extern int exec_shield; +extern int exec_shield_randomize; +extern int print_fatal_signals; /* * cloning flags: @@ -193,6 +196,10 @@ extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); + +extern unsigned long +arch_get_unmapped_exec_area(struct file *, unsigned long, unsigned long, + unsigned long, unsigned long); extern unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, @@ -208,6 +215,9 @@ unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); + unsigned long (*get_unmapped_exec_area) (struct file *filp, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags); void (*unmap_area) (struct vm_area_struct *area); unsigned long mmap_base; /* base of mmap area */ unsigned long free_area_cache; /* first hole */ @@ -720,6 +730,7 @@ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ #define PF_BORROWED_MM 0x00400000 /* I am a kthread doing use_mm */ +#define PF_RELOCEXEC 0x00800000 /* relocate shared libraries */ #ifdef CONFIG_SMP extern int set_cpus_allowed(task_t *p, cpumask_t new_mask); Index: linux-2.6.10/kernel/signal.c =================================================================== --- linux-2.6.10.orig/kernel/signal.c 2005-04-05 16:29:27.951341424 +0800 +++ linux-2.6.10/kernel/signal.c 2005-04-05 16:43:17.077295120 +0800 @@ -1608,6 +1608,35 @@ spin_unlock_irq(¤t->sighand->siglock); } +int print_fatal_signals = 0; + +static void print_fatal_signal(struct pt_regs *regs, int signr) +{ + int i; + unsigned char insn; + printk("%s/%d: potentially unexpected fatal signal %d.\n", + current->comm, current->pid, signr); + +#ifdef __i386__ + printk("code at %08lx: ", regs->eip); + for (i = 0; i < 16; i++) { + __get_user(insn, (unsigned char *)(regs->eip + i)); + printk("%02x ", insn); + } +#endif + printk("\n"); + show_regs(regs); +} + +static int __init setup_print_fatal_signals(char *str) +{ + get_option (&str, &print_fatal_signals); + + return 1; +} + +__setup("print-fatal-signals=", setup_print_fatal_signals); + #ifndef HAVE_ARCH_GET_SIGNAL_TO_DELIVER static void @@ -1808,6 +1837,12 @@ if (!signr) break; /* will return 0 */ + if ((signr == SIGSEGV) && print_fatal_signals) { + spin_unlock_irq(¤t->sighand->siglock); + print_fatal_signal(regs, signr); + spin_lock_irq(¤t->sighand->siglock); + } + if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { ptrace_signal_deliver(regs, cookie); @@ -1904,6 +1939,8 @@ * Anything else is fatal, maybe with a core dump. */ current->flags |= PF_SIGNALED; + if (print_fatal_signals) + print_fatal_signal(regs, signr); if (sig_kernel_coredump(signr)) { /* * If it was able to dump core, this kills all Index: linux-2.6.10/kernel/sysctl.c =================================================================== --- linux-2.6.10.orig/kernel/sysctl.c 2005-04-05 16:29:24.394882088 +0800 +++ linux-2.6.10/kernel/sysctl.c 2005-04-05 16:43:17.078294968 +0800 @@ -75,6 +75,29 @@ void __user *, size_t *, loff_t *); #endif +extern unsigned int vdso_enabled; + +int exec_shield = 1; +int exec_shield_randomize = 1; + +static int __init setup_exec_shield(char *str) +{ + get_option (&str, &exec_shield); + + return 1; +} + +__setup("exec-shield=", setup_exec_shield); + +static int __init setup_exec_shield_randomize(char *str) +{ + get_option (&str, &exec_shield_randomize); + + return 1; +} + +__setup("exec-shield-randomize=", setup_exec_shield_randomize); + /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; @@ -276,6 +299,40 @@ .proc_handler = &proc_dointvec, }, { + .ctl_name = KERN_PANIC, + .procname = "exec-shield", + .data = &exec_shield, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = KERN_PANIC, + .procname = "exec-shield-randomize", + .data = &exec_shield_randomize, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = KERN_PANIC, + .procname = "print-fatal-signals", + .data = &print_fatal_signals, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#if __i386__ + { + .ctl_name = KERN_PANIC, + .procname = "vdso", + .data = &vdso_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif + { .ctl_name = KERN_CORE_USES_PID, .procname = "core_uses_pid", .data = &core_uses_pid, Index: linux-2.6.10/mm/mmap.c =================================================================== --- linux-2.6.10.orig/mm/mmap.c 2005-04-05 16:29:30.134009608 +0800 +++ linux-2.6.10/mm/mmap.c 2005-04-05 16:43:44.369146128 +0800 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -245,6 +246,8 @@ __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node *rb_parent) { + if (vma->vm_flags & VM_EXEC) + arch_add_exec_range(mm, vma->vm_end); if (prev) { vma->vm_next = prev->vm_next; prev->vm_next = vma; @@ -347,6 +350,8 @@ rb_erase(&vma->vm_rb, &mm->mm_rb); if (mm->mmap_cache == vma) mm->mmap_cache = prev; + if (vma->vm_flags & VM_EXEC) + arch_remove_exec_range(mm, vma->vm_end); } /* @@ -642,6 +647,8 @@ } else /* cases 2, 5, 7 */ vma_adjust(prev, prev->vm_start, end, prev->vm_pgoff, NULL); + if (prev->vm_flags & VM_EXEC) + arch_add_exec_range(mm, prev->vm_end); return prev; } @@ -813,7 +820,7 @@ /* Obtain the address to map to. we verify (or select) it and ensure * that it represents a valid section of the address space. */ - addr = get_unmapped_area(file, addr, len, pgoff, flags); + addr = get_unmapped_area_prot(file, addr, len, pgoff, flags, prot & PROT_EXEC); if (addr & ~PAGE_MASK) return addr; @@ -1207,9 +1214,10 @@ area->vm_mm->free_area_cache = area->vm_end; } + unsigned long -get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) +get_unmapped_area_prot(struct file *file, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags, int exec) { if (flags & MAP_FIXED) { unsigned long ret; @@ -1241,10 +1249,80 @@ return file->f_op->get_unmapped_area(file, addr, len, pgoff, flags); - return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); + if (exec && current->mm->get_unmapped_exec_area) + return current->mm->get_unmapped_exec_area(file, addr, len, pgoff, flags); + else + return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); } -EXPORT_SYMBOL(get_unmapped_area); +EXPORT_SYMBOL(get_unmapped_area_prot); + + +#define SHLIB_BASE 0x00111000 + +unsigned long arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0, + unsigned long len0, unsigned long pgoff, unsigned long flags) +{ + unsigned long addr = addr0, len = len0; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long tmp; + + if (len > TASK_SIZE) + return -ENOMEM; + + if (!addr && !(flags & MAP_FIXED)) + addr = randomize_range(SHLIB_BASE, 0x01000000, len); + + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) { + return addr; + } + } + + addr = SHLIB_BASE; + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (TASK_SIZE - len < addr) { + return -ENOMEM; + } + if (!vma || addr + len <= vma->vm_start) { + /* + * Must not let a PROT_EXEC mapping get into the + * brk area: + */ + if (addr + len > mm->brk) + goto failed; + + /* + * Up until the brk area we randomize addresses + * as much as possible: + */ + if (addr >= 0x01000000) { + tmp = randomize_range(0x01000000, mm->brk, len); + vma = find_vma(mm, tmp); + if (TASK_SIZE - len >= tmp && + (!vma || tmp + len <= vma->vm_start)) + return tmp; + } + /* + * Ok, randomization didnt work out - return + * the result of the linear search: + */ + return addr; + } + addr = vma->vm_end; + } + +failed: + return current->mm->get_unmapped_area(filp, addr0, len0, pgoff, flags); +} + + /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr) @@ -1319,6 +1397,14 @@ return prev ? prev->vm_next : vma; } + +static int over_stack_limit(unsigned long sz) +{ + if (sz < EXEC_STACK_BIAS) + return 0; + return (sz - EXEC_STACK_BIAS) > current->signal->rlim[RLIMIT_STACK].rlim_cur; +} + #ifdef CONFIG_STACK_GROWSUP /* * vma is the first one with address > vma->vm_end. Have to extend vma. @@ -1358,7 +1444,7 @@ return -ENOMEM; } - if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur || + if (over_stack_limit(address - vma->vm_start) || ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur) { anon_vma_unlock(vma); @@ -1432,7 +1518,7 @@ return -ENOMEM; } - if (vma->vm_end - address > current->signal->rlim[RLIMIT_STACK].rlim_cur || + if (over_stack_limit(vma->vm_end - address) || ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur) { anon_vma_unlock(vma); @@ -1668,10 +1754,14 @@ if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new); - if (new_below) + if (new_below) { + unsigned long old_end = vma->vm_end; + vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + ((addr - new->vm_start) >> PAGE_SHIFT), new); - else + if (vma->vm_flags & VM_EXEC) + arch_remove_exec_range(mm, old_end); + } else vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); return 0; @@ -1890,6 +1980,7 @@ mm->rss = 0; mm->total_vm = 0; mm->locked_vm = 0; + arch_flush_exec_range(mm); spin_unlock(&mm->page_table_lock); Index: linux-2.6.10/mm/mprotect.c =================================================================== --- linux-2.6.10.orig/mm/mprotect.c 2005-04-05 16:29:30.135009456 +0800 +++ linux-2.6.10/mm/mprotect.c 2005-04-05 16:34:18.193217952 +0800 @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -117,7 +118,7 @@ struct mm_struct * mm = vma->vm_mm; unsigned long oldflags = vma->vm_flags; long nrpages = (end - start) >> PAGE_SHIFT; - unsigned long charged = 0; + unsigned long charged = 0, old_end = vma->vm_end; pgprot_t newprot; pgoff_t pgoff; int error; @@ -179,8 +180,11 @@ * vm_flags and vm_page_prot are protected by the mmap_sem * held in write mode. */ + oldflags = vma->vm_flags; vma->vm_flags = newflags; vma->vm_page_prot = newprot; + if (oldflags & VM_EXEC) + arch_remove_exec_range(current->mm, old_end); change_protection(vma, start, end, newprot); __vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); __vm_stat_account(mm, newflags, vma->vm_file, nrpages); Index: linux-2.6.10/mm/mremap.c =================================================================== --- linux-2.6.10.orig/mm/mremap.c 2004-12-25 05:34:58.000000000 +0800 +++ linux-2.6.10/mm/mremap.c 2005-04-05 16:43:44.370145976 +0800 @@ -385,8 +385,8 @@ if (vma->vm_flags & VM_MAYSHARE) map_flags |= MAP_SHARED; - new_addr = get_unmapped_area(vma->vm_file, 0, new_len, - vma->vm_pgoff, map_flags); + new_addr = get_unmapped_area_prot(vma->vm_file, 0, new_len, + vma->vm_pgoff, map_flags, vma->vm_flags & VM_EXEC); ret = new_addr; if (new_addr & ~PAGE_MASK) goto out;