Whamcloud - gitweb
Revert Tom's commit. It breaks recovery.
[fs/lustre-release.git] / lustre / kernel_patches / patches / linux-2.6.10-fc3-left.patch
1 Index: linux-2.6.10/arch/i386/kernel/asm-offsets.c
2 ===================================================================
3 --- linux-2.6.10.orig/arch/i386/kernel/asm-offsets.c    2004-12-25 05:34:31.000000000 +0800
4 +++ linux-2.6.10/arch/i386/kernel/asm-offsets.c 2005-04-05 16:34:18.173220992 +0800
5 @@ -52,6 +52,7 @@
6         OFFSET(TI_preempt_count, thread_info, preempt_count);
7         OFFSET(TI_addr_limit, thread_info, addr_limit);
8         OFFSET(TI_restart_block, thread_info, restart_block);
9 +       OFFSET(TI_sysenter_return, thread_info, sysenter_return);
10         BLANK();
11  
12         OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
13 Index: linux-2.6.10/arch/i386/kernel/cpu/common.c
14 ===================================================================
15 --- linux-2.6.10.orig/arch/i386/kernel/cpu/common.c     2004-12-25 05:33:50.000000000 +0800
16 +++ linux-2.6.10/arch/i386/kernel/cpu/common.c  2005-04-05 16:34:18.174220840 +0800
17 @@ -384,6 +384,12 @@
18         if (disable_pse)
19                 clear_bit(X86_FEATURE_PSE, c->x86_capability);
20  
21 +       /* hack: disable SEP for non-NX cpus; SEP breaks Execshield. */
22 +       #ifdef CONFIG_HIGHMEM64G
23 +       if (!test_bit(X86_FEATURE_NX, c->x86_capability)) 
24 +       #endif
25 +               clear_bit(X86_FEATURE_SEP, c->x86_capability);
26 +
27         /* If the model name is still unset, do table lookup. */
28         if ( !c->x86_model_id[0] ) {
29                 char *p;
30 Index: linux-2.6.10/arch/i386/kernel/entry.S
31 ===================================================================
32 --- linux-2.6.10.orig/arch/i386/kernel/entry.S  2005-04-05 16:29:30.192000792 +0800
33 +++ linux-2.6.10/arch/i386/kernel/entry.S       2005-04-05 16:34:18.167221904 +0800
34 @@ -218,8 +218,12 @@
35         pushl %ebp
36         pushfl
37         pushl $(__USER_CS)
38 -       pushl $SYSENTER_RETURN
39 -
40 +       /*
41 +        * Push current_thread_info()->sysenter_return to the stack.
42 +        * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
43 +        * pushed above, and the word being pushed now:
44 +        */
45 +       pushl (TI_sysenter_return-THREAD_SIZE+4*4)(%esp)
46  /*
47   * Load the potential sixth argument from user stack.
48   * Careful about security.
49 Index: linux-2.6.10/arch/i386/kernel/process.c
50 ===================================================================
51 --- linux-2.6.10.orig/arch/i386/kernel/process.c        2004-12-25 05:33:47.000000000 +0800
52 +++ linux-2.6.10/arch/i386/kernel/process.c     2005-04-05 16:34:18.173220992 +0800
53 @@ -36,6 +36,8 @@
54  #include <linux/module.h>
55  #include <linux/kallsyms.h>
56  #include <linux/ptrace.h>
57 +#include <linux/mman.h>
58 +#include <linux/random.h>
59  
60  #include <asm/uaccess.h>
61  #include <asm/pgtable.h>
62 @@ -565,6 +567,8 @@
63         /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
64  
65         __unlazy_fpu(prev_p);
66 +       if (next_p->mm)
67 +               load_user_cs_desc(cpu, next_p->mm);
68  
69         /*
70          * Reload esp0, LDT and the page table pointer:
71 @@ -812,3 +816,62 @@
72         return 0;
73  }
74  
75 +
76 +unsigned long arch_align_stack(unsigned long sp)
77 +{
78 +       if (current->flags & PF_RELOCEXEC)
79 +               sp -= ((get_random_int() % 65536) << 4);
80 +       return sp & ~0xf;
81 +}
82 +
83 +
84 +void arch_add_exec_range(struct mm_struct *mm, unsigned long limit)
85 +{
86 +       if (limit > mm->context.exec_limit) {
87 +               mm->context.exec_limit = limit;
88 +               set_user_cs(&mm->context.user_cs, limit);
89 +               if (mm == current->mm)
90 +                       load_user_cs_desc(smp_processor_id(), mm);
91 +       }
92 +}
93 +
94 +void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end)
95 +{
96 +       struct vm_area_struct *vma;
97 +       unsigned long limit = 0;
98 +
99 +       if (old_end == mm->context.exec_limit) {
100 +               for (vma = mm->mmap; vma; vma = vma->vm_next)
101 +                       if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
102 +                               limit = vma->vm_end;
103 +
104 +               mm->context.exec_limit = limit;
105 +               set_user_cs(&mm->context.user_cs, limit);
106 +               if (mm == current->mm)
107 +                       load_user_cs_desc(smp_processor_id(), mm);
108 +       }
109 +}
110 +
111 +void arch_flush_exec_range(struct mm_struct *mm)
112 +{
113 +       mm->context.exec_limit = 0;
114 +       set_user_cs(&mm->context.user_cs, 0);
115 +}
116 +
117 +/*
118 + * Generate random brk address between 128MB and 196MB. (if the layout
119 + * allows it.)
120 + */
121 +void randomize_brk(unsigned long old_brk)
122 +{
123 +       unsigned long new_brk, range_start, range_end;
124 +
125 +       range_start = 0x08000000;
126 +       if (current->mm->brk >= range_start)
127 +               range_start = current->mm->brk;
128 +       range_end = range_start + 0x02000000;
129 +       new_brk = randomize_range(range_start, range_end, 0);
130 +       if (new_brk)
131 +               current->mm->brk = new_brk;
132 +}
133 +
134 Index: linux-2.6.10/arch/i386/kernel/signal.c
135 ===================================================================
136 --- linux-2.6.10.orig/arch/i386/kernel/signal.c 2005-04-05 16:29:23.290050048 +0800
137 +++ linux-2.6.10/arch/i386/kernel/signal.c      2005-04-05 16:34:18.170221448 +0800
138 @@ -390,7 +390,7 @@
139         if (err)
140                 goto give_sigsegv;
141  
142 -       restorer = &__kernel_sigreturn;
143 +       restorer = current->mm->context.vdso + (long)&__kernel_sigreturn;
144         if (ka->sa.sa_flags & SA_RESTORER)
145                 restorer = ka->sa.sa_restorer;
146  
147 @@ -487,9 +487,10 @@
148                 goto give_sigsegv;
149  
150         /* Set up to return from userspace.  */
151 -       restorer = &__kernel_rt_sigreturn;
152 +       restorer = current->mm->context.vdso + (long)&__kernel_rt_sigreturn;
153         if (ka->sa.sa_flags & SA_RESTORER)
154                 restorer = ka->sa.sa_restorer;
155 +
156         err |= __put_user(restorer, &frame->pretcode);
157          
158         /*
159 Index: linux-2.6.10/arch/i386/kernel/smp.c
160 ===================================================================
161 --- linux-2.6.10.orig/arch/i386/kernel/smp.c    2005-04-05 16:29:30.198999728 +0800
162 +++ linux-2.6.10/arch/i386/kernel/smp.c 2005-04-05 16:34:18.172221144 +0800
163 @@ -22,6 +22,7 @@
164  
165  #include <asm/mtrr.h>
166  #include <asm/tlbflush.h>
167 +#include <asm/desc.h>
168  #include <mach_apic.h>
169  
170  /*
171 @@ -313,6 +314,8 @@
172         unsigned long cpu;
173  
174         cpu = get_cpu();
175 +       if (current->active_mm)
176 +               load_user_cs_desc(cpu, current->active_mm);
177  
178         if (!cpu_isset(cpu, flush_cpumask))
179                 goto out;
180 Index: linux-2.6.10/arch/i386/kernel/sysenter.c
181 ===================================================================
182 --- linux-2.6.10.orig/arch/i386/kernel/sysenter.c       2004-12-25 05:35:40.000000000 +0800
183 +++ linux-2.6.10/arch/i386/kernel/sysenter.c    2005-04-05 16:34:18.171221296 +0800
184 @@ -13,6 +13,7 @@
185  #include <linux/gfp.h>
186  #include <linux/string.h>
187  #include <linux/elf.h>
188 +#include <linux/mman.h>
189  
190  #include <asm/cpufeature.h>
191  #include <asm/msr.h>
192 @@ -41,11 +42,14 @@
193  extern const char vsyscall_int80_start, vsyscall_int80_end;
194  extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
195  
196 +struct page *sysenter_page;
197 +
198  static int __init sysenter_setup(void)
199  {
200         void *page = (void *)get_zeroed_page(GFP_ATOMIC);
201  
202 -       __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC);
203 +       __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_KERNEL_RO);
204 +       sysenter_page = virt_to_page(page);
205  
206         if (!boot_cpu_has(X86_FEATURE_SEP)) {
207                 memcpy(page,
208 @@ -59,7 +63,51 @@
209                &vsyscall_sysenter_end - &vsyscall_sysenter_start);
210  
211         on_each_cpu(enable_sep_cpu, NULL, 1, 1);
212 +
213         return 0;
214  }
215  
216  __initcall(sysenter_setup);
217 +
218 +extern void SYSENTER_RETURN_OFFSET;
219 +
220 +unsigned int vdso_enabled = 0;
221 +
222 +void map_vsyscall(void)
223 +{
224 +       struct thread_info *ti = current_thread_info();
225 +       struct vm_area_struct *vma;
226 +       unsigned long addr;
227 +
228 +       if (unlikely(!vdso_enabled)) {
229 +               current->mm->context.vdso = NULL;
230 +               return;
231 +       }
232 +
233 +       /*
234 +        * Map the vDSO (it will be randomized):
235 +        */
236 +       down_write(&current->mm->mmap_sem);
237 +       addr = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, 0);
238 +       current->mm->context.vdso = (void *)addr;
239 +       ti->sysenter_return = (void *)addr + (long)&SYSENTER_RETURN_OFFSET;
240 +       if (addr != -1) {
241 +               vma = find_vma(current->mm, addr);
242 +               if (vma) {
243 +                       pgprot_val(vma->vm_page_prot) &= ~_PAGE_RW;
244 +                       get_page(sysenter_page);
245 +                       install_page(current->mm, vma, addr,
246 +                                       sysenter_page, vma->vm_page_prot);
247 +                       
248 +               }
249 +       }
250 +       up_write(&current->mm->mmap_sem);
251 +}
252 +
253 +static int __init vdso_setup(char *str)
254 +{
255 +        vdso_enabled = simple_strtoul(str, NULL, 0);
256 +        return 1;
257 +}
258 +__setup("vdso=", vdso_setup);
259 +
260 Index: linux-2.6.10/arch/i386/kernel/traps.c
261 ===================================================================
262 --- linux-2.6.10.orig/arch/i386/kernel/traps.c  2005-04-05 16:29:30.193000640 +0800
263 +++ linux-2.6.10/arch/i386/kernel/traps.c       2005-04-05 16:43:17.073295728 +0800
264 @@ -497,6 +497,10 @@
265  DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
266  DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
267  
268 +/*
269 + * the original non-exec stack patch was written by
270 + * Solar Designer <solar at openwall.com>. Thanks!
271 + */
272  fastcall void do_general_protection(struct pt_regs * regs, long error_code)
273  {
274         int cpu = get_cpu();
275 @@ -535,6 +539,46 @@
276         if (!(regs->xcs & 3))
277                 goto gp_in_kernel;
278  
279 +       /*
280 +        * lazy-check for CS validity on exec-shield binaries:
281 +        */
282 +       if (current->mm) {
283 +               int cpu = smp_processor_id();
284 +               struct desc_struct *desc1, *desc2;
285 +               struct vm_area_struct *vma;
286 +               unsigned long limit = 0;
287 +               
288 +               spin_lock(&current->mm->page_table_lock);
289 +               for (vma = current->mm->mmap; vma; vma = vma->vm_next)
290 +                       if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
291 +                               limit = vma->vm_end;
292 +               spin_unlock(&current->mm->page_table_lock);
293 +
294 +               current->mm->context.exec_limit = limit;
295 +               set_user_cs(&current->mm->context.user_cs, limit);
296 +
297 +               desc1 = &current->mm->context.user_cs;
298 +               desc2 = per_cpu(cpu_gdt_table, cpu) + GDT_ENTRY_DEFAULT_USER_CS;
299 +
300 +               /*
301 +                * The CS was not in sync - reload it and retry the
302 +                * instruction. If the instruction still faults then
303 +                * we wont hit this branch next time around.
304 +                */
305 +               if (desc1->a != desc2->a || desc1->b != desc2->b) {
306 +                       if (print_fatal_signals >= 2) {
307 +                               printk("#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n", error_code, error_code/8, regs->eip, smp_processor_id());
308 +                               printk(" exec_limit: %08lx, user_cs: %08lx/%08lx, CPU_cs: %08lx/%08lx.\n", current->mm->context.exec_limit, desc1->a, desc1->b, desc2->a, desc2->b);
309 +                       }
310 +                       load_user_cs_desc(cpu, current->mm);
311 +                       return;
312 +               }
313 +       }
314 +       if (print_fatal_signals) {
315 +               printk("#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n", error_code, error_code/8, regs->eip, smp_processor_id());
316 +               printk(" exec_limit: %08lx, user_cs: %08lx/%08lx.\n", current->mm->context.exec_limit, current->mm->context.user_cs.a, current->mm->context.user_cs.b);
317 +       }
318 +
319         current->thread.error_code = error_code;
320         current->thread.trap_no = 13;
321         force_sig(SIGSEGV, current);
322 Index: linux-2.6.10/arch/i386/kernel/vsyscall.lds.S
323 ===================================================================
324 --- linux-2.6.10.orig/arch/i386/kernel/vsyscall.lds.S   2004-12-25 05:34:31.000000000 +0800
325 +++ linux-2.6.10/arch/i386/kernel/vsyscall.lds.S        2005-04-05 16:34:18.169221600 +0800
326 @@ -7,7 +7,7 @@
327  
328  SECTIONS
329  {
330 -  . = VSYSCALL_BASE + SIZEOF_HEADERS;
331 +  . = SIZEOF_HEADERS;
332  
333    .hash           : { *(.hash) }               :text
334    .dynsym         : { *(.dynsym) }
335 @@ -20,7 +20,7 @@
336       For the layouts to match, we need to skip more than enough
337       space for the dynamic symbol table et al.  If this amount
338       is insufficient, ld -shared will barf.  Just increase it here.  */
339 -  . = VSYSCALL_BASE + 0x400;
340 +  . = 0x400;
341  
342    .text           : { *(.text) }               :text =0x90909090
343  
344 Index: linux-2.6.10/arch/i386/kernel/vsyscall-sysenter.S
345 ===================================================================
346 --- linux-2.6.10.orig/arch/i386/kernel/vsyscall-sysenter.S      2004-12-25 05:34:32.000000000 +0800
347 +++ linux-2.6.10/arch/i386/kernel/vsyscall-sysenter.S   2005-04-05 16:34:18.170221448 +0800
348 @@ -24,11 +24,11 @@
349         /* 7: align return point with nop's to make disassembly easier */
350         .space 7,0x90
351  
352 -       /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */
353 +       /* 14: System call restart point is here! (SYSENTER_RETURN_OFFSET-2) */
354         jmp .Lenter_kernel
355         /* 16: System call normal return point is here! */
356 -       .globl SYSENTER_RETURN  /* Symbol used by entry.S.  */
357 -SYSENTER_RETURN:
358 +       .globl SYSENTER_RETURN_OFFSET   /* Symbol used by sysenter.c  */
359 +SYSENTER_RETURN_OFFSET:
360         pop %ebp
361  .Lpop_ebp:
362         pop %edx
363 Index: linux-2.6.10/arch/i386/mm/init.c
364 ===================================================================
365 --- linux-2.6.10.orig/arch/i386/mm/init.c       2005-04-05 16:29:28.016331544 +0800
366 +++ linux-2.6.10/arch/i386/mm/init.c    2005-04-05 16:34:18.167221904 +0800
367 @@ -518,7 +518,10 @@
368         set_nx();
369         if (nx_enabled)
370                 printk("NX (Execute Disable) protection: active\n");
371 +       else
372  #endif
373 +       if (exec_shield)
374 +               printk("Using x86 segment limits to approximate NX protection\n");
375  
376         pagetable_init();
377  
378 Index: linux-2.6.10/arch/i386/mm/mmap.c
379 ===================================================================
380 --- linux-2.6.10.orig/arch/i386/mm/mmap.c       2004-12-25 05:34:33.000000000 +0800
381 +++ linux-2.6.10/arch/i386/mm/mmap.c    2005-04-05 16:43:44.365146736 +0800
382 @@ -26,6 +26,7 @@
383  
384  #include <linux/personality.h>
385  #include <linux/mm.h>
386 +#include <linux/random.h>
387  
388  /*
389   * Top of mmap area (just below the process stack).
390 @@ -38,13 +39,17 @@
391  static inline unsigned long mmap_base(struct mm_struct *mm)
392  {
393         unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
394 +       unsigned long random_factor = 0;
395 +
396 +       if (current->flags & PF_RELOCEXEC)
397 +               random_factor = get_random_int() % (1024*1024);
398  
399         if (gap < MIN_GAP)
400                 gap = MIN_GAP;
401         else if (gap > MAX_GAP)
402                 gap = MAX_GAP;
403  
404 -       return TASK_SIZE - (gap & PAGE_MASK);
405 +       return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
406  }
407  
408  /*
409 @@ -57,15 +62,17 @@
410          * Fall back to the standard layout if the personality
411          * bit is set, or if the expected stack growth is unlimited:
412          */
413 -       if (sysctl_legacy_va_layout ||
414 +       if ((exec_shield != 2) && (sysctl_legacy_va_layout ||
415                         (current->personality & ADDR_COMPAT_LAYOUT) ||
416 -                       current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
417 +                       current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)){
418                 mm->mmap_base = TASK_UNMAPPED_BASE;
419                 mm->get_unmapped_area = arch_get_unmapped_area;
420                 mm->unmap_area = arch_unmap_area;
421         } else {
422                 mm->mmap_base = mmap_base(mm);
423                 mm->get_unmapped_area = arch_get_unmapped_area_topdown;
424 +               if (current->flags & PF_RELOCEXEC)
425 +                       mm->get_unmapped_exec_area = arch_get_unmapped_exec_area;
426                 mm->unmap_area = arch_unmap_area_topdown;
427         }
428  }
429 Index: linux-2.6.10/arch/ia64/ia32/binfmt_elf32.c
430 ===================================================================
431 --- linux-2.6.10.orig/arch/ia64/ia32/binfmt_elf32.c     2004-12-25 05:35:28.000000000 +0800
432 +++ linux-2.6.10/arch/ia64/ia32/binfmt_elf32.c  2005-04-05 16:34:18.174220840 +0800
433 @@ -272,7 +272,7 @@
434  }
435  
436  static unsigned long
437 -elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
438 +elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused)
439  {
440         unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;
441  
442 Index: linux-2.6.10/arch/x86_64/ia32/ia32_binfmt.c
443 ===================================================================
444 --- linux-2.6.10.orig/arch/x86_64/ia32/ia32_binfmt.c    2004-12-25 05:33:49.000000000 +0800
445 +++ linux-2.6.10/arch/x86_64/ia32/ia32_binfmt.c 2005-04-05 16:34:18.175220688 +0800
446 @@ -390,7 +390,7 @@
447  }
448  
449  static unsigned long
450 -elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
451 +elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused)
452  {
453         unsigned long map_addr;
454         struct task_struct *me = current; 
455 Index: linux-2.6.10/drivers/char/random.c
456 ===================================================================
457 --- linux-2.6.10.orig/drivers/char/random.c     2005-04-05 16:29:24.214909448 +0800
458 +++ linux-2.6.10/drivers/char/random.c  2005-04-05 16:34:18.197217344 +0800
459 @@ -2469,3 +2469,37 @@
460  }
461  #endif
462  #endif /* CONFIG_INET */
463 +
464 +/*
465 + * Get a random word:
466 + */
467 +unsigned int get_random_int(void)
468 +{
469 +       unsigned int val = 0;
470 +
471 +       if (!exec_shield_randomize)
472 +               return 0;
473 +
474 +#ifdef CONFIG_X86_HAS_TSC
475 +       rdtscl(val);
476 +#endif
477 +       val += current->pid + jiffies + (int)val;
478 +
479 +       /*
480 +        * Use IP's RNG. It suits our purpose perfectly: it re-keys itself
481 +        * every second, from the entropy pool (and thus creates a limited
482 +        * drain on it), and uses halfMD4Transform within the second. We
483 +        * also spice it with the TSC (if available), jiffies, PID and the
484 +        * stack address:
485 +        */
486 +       return secure_ip_id(val);
487 +}
488 +
489 +unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len)
490 +{
491 +       unsigned long range = end - len - start;
492 +       if (end <= start + len)
493 +               return 0;
494 +       return PAGE_ALIGN(get_random_int() % range + start);
495 +}
496 +
497 Index: linux-2.6.10/fs/binfmt_elf.c
498 ===================================================================
499 --- linux-2.6.10.orig/fs/binfmt_elf.c   2005-04-05 16:29:24.353888320 +0800
500 +++ linux-2.6.10/fs/binfmt_elf.c        2005-04-05 16:39:25.042569760 +0800
501 @@ -494,7 +494,7 @@
502         unsigned long reloc_func_desc = 0;
503         char passed_fileno[6];
504         struct files_struct *files;
505 -       int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
506 +       int have_pt_gnu_stack, relocexec, executable_stack = EXSTACK_DEFAULT;
507         unsigned long def_flags = 0;
508         struct {
509                 struct elfhdr elf_ex;
510 @@ -660,6 +660,24 @@
511                 }
512         have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
513  
514 +        relocexec = 0;
515 +
516 +        if (current->personality == PER_LINUX)
517 +        switch (exec_shield) {
518 +        case 1:
519 +                if (executable_stack == EXSTACK_DISABLE_X) {
520 +                        current->flags |= PF_RELOCEXEC;
521 +                        relocexec = PF_RELOCEXEC;
522 +                }
523 +                break;
524 +
525 +        case 2:
526 +                executable_stack = EXSTACK_DISABLE_X;
527 +                current->flags |= PF_RELOCEXEC;
528 +                relocexec = PF_RELOCEXEC;
529 +                break;
530 +        }
531 +
532         /* Some simple consistency checks for the interpreter */
533         if (elf_interpreter) {
534                 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
535 @@ -713,6 +731,15 @@
536         if (retval)
537                 goto out_free_dentry;
538  
539 +        current->flags |= relocexec;
540 +#ifdef __i386__
541 +        /*
542 +         * Turn off the CS limit completely if exec-shield disabled or
543 +         * NX active:
544 +         */
545 +        if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || nx_enabled)
546 +                arch_add_exec_range(current->mm, -1);
547 +#endif
548         /* Discard our unneeded old files struct */
549         if (files) {
550                 steal_locks(files);
551 @@ -731,7 +758,8 @@
552         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
553            may depend on the personality.  */
554         SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
555 -       if (elf_read_implies_exec(loc->elf_ex, have_pt_gnu_stack))
556 +       if (exec_shield != 2 &&
557 +                       elf_read_implies_exec(loc->elf_ex, have_pt_gnu_stack))
558                 current->personality |= READ_IMPLIES_EXEC;
559  
560         arch_pick_mmap_layout(current->mm);
561 @@ -894,6 +922,14 @@
562  
563         set_binfmt(&elf_format);
564  
565 +        /*
566 +         * Map the vsyscall trampoline. This address is then passed via
567 +         * AT_SYSINFO.
568 +         */
569 +#ifdef __HAVE_ARCH_VSYSCALL
570 +        map_vsyscall();
571 +#endif
572 +
573         compute_creds(bprm);
574         current->flags &= ~PF_FORKNOEXEC;
575         create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT),
576 Index: linux-2.6.10/fs/exec.c
577 ===================================================================
578 --- linux-2.6.10.orig/fs/exec.c 2005-04-05 16:29:30.270988784 +0800
579 +++ linux-2.6.10/fs/exec.c      2005-04-05 16:34:18.177220384 +0800
580 @@ -396,7 +396,12 @@
581         while (i < MAX_ARG_PAGES)
582                 bprm->page[i++] = NULL;
583  #else
584 +#ifdef __HAVE_ARCH_ALIGN_STACK
585 +       stack_base = arch_align_stack(STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE);
586 +       stack_base = PAGE_ALIGN(stack_base);
587 +#else
588         stack_base = STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE;
589 +#endif
590         bprm->p += stack_base;
591         mm->arg_start = bprm->p;
592         arg_size = STACK_TOP - (PAGE_MASK & (unsigned long) mm->arg_start);
593 @@ -854,6 +859,7 @@
594         tcomm[i] = '\0';
595         set_task_comm(current, tcomm);
596  
597 +       current->flags &= ~PF_RELOCEXEC;
598         flush_thread();
599  
600         if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 
601 Index: linux-2.6.10/fs/proc/array.c
602 ===================================================================
603 --- linux-2.6.10.orig/fs/proc/array.c   2004-12-25 05:35:00.000000000 +0800
604 +++ linux-2.6.10/fs/proc/array.c        2005-04-05 16:34:18.180219928 +0800
605 @@ -373,8 +373,12 @@
606         ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
607         read_unlock(&tasklist_lock);
608  
609 -       if (!whole || num_threads<2)
610 -               wchan = get_wchan(task);
611 +       if (!whole || num_threads<2) {
612 +               wchan = 0;
613 +               if (current->uid == task->uid || current->euid == task->uid ||
614 +                                               capable(CAP_SYS_NICE))
615 +                       wchan = get_wchan(task);
616 +       }
617         if (!whole) {
618                 min_flt = task->min_flt;
619                 maj_flt = task->maj_flt;
620 Index: linux-2.6.10/fs/proc/base.c
621 ===================================================================
622 --- linux-2.6.10.orig/fs/proc/base.c    2005-04-05 16:29:24.361887104 +0800
623 +++ linux-2.6.10/fs/proc/base.c 2005-04-05 16:34:18.179220080 +0800
624 @@ -117,7 +117,7 @@
625         E(PROC_TGID_CMDLINE,   "cmdline", S_IFREG|S_IRUGO),
626         E(PROC_TGID_STAT,      "stat",    S_IFREG|S_IRUGO),
627         E(PROC_TGID_STATM,     "statm",   S_IFREG|S_IRUGO),
628 -       E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUGO),
629 +       E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUSR),
630         E(PROC_TGID_MEM,       "mem",     S_IFREG|S_IRUSR|S_IWUSR),
631         E(PROC_TGID_CWD,       "cwd",     S_IFLNK|S_IRWXUGO),
632         E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
633 @@ -142,7 +142,7 @@
634         E(PROC_TID_CMDLINE,    "cmdline", S_IFREG|S_IRUGO),
635         E(PROC_TID_STAT,       "stat",    S_IFREG|S_IRUGO),
636         E(PROC_TID_STATM,      "statm",   S_IFREG|S_IRUGO),
637 -       E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUGO),
638 +       E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUSR),
639         E(PROC_TID_MEM,        "mem",     S_IFREG|S_IRUSR|S_IWUSR),
640         E(PROC_TID_CWD,        "cwd",     S_IFLNK|S_IRWXUGO),
641         E(PROC_TID_ROOT,       "root",    S_IFLNK|S_IRWXUGO),
642 Index: linux-2.6.10/fs/proc/task_mmu.c
643 ===================================================================
644 --- linux-2.6.10.orig/fs/proc/task_mmu.c        2004-12-25 05:34:01.000000000 +0800
645 +++ linux-2.6.10/fs/proc/task_mmu.c     2005-04-05 16:41:11.796340720 +0800
646 @@ -14,19 +14,27 @@
647         buffer += sprintf(buffer,
648                 "VmSize:\t%8lu kB\n"
649                 "VmLck:\t%8lu kB\n"
650 -               "VmRSS:\t%8lu kB\n"
651 -               "VmData:\t%8lu kB\n"
652 -               "VmStk:\t%8lu kB\n"
653 -               "VmExe:\t%8lu kB\n"
654 -               "VmLib:\t%8lu kB\n"
655 -               "VmPTE:\t%8lu kB\n",
656 -               (mm->total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
657 -               mm->locked_vm << (PAGE_SHIFT-10),
658 -               mm->rss << (PAGE_SHIFT-10),
659 -               data << (PAGE_SHIFT-10),
660 -               mm->stack_vm << (PAGE_SHIFT-10), text, lib,
661 -               (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
662 -       return buffer;
663 +                "VmData:\t%8lu kB\n"
664 +                "VmStk:\t%8lu kB\n"
665 +                "VmExe:\t%8lu kB\n"
666 +                "VmLib:\t%8lu kB\n"
667 +                "VmPTE:\t%8lu kB\n"
668 +                "StaBrk:\t%08lx kB\n"
669 +                "Brk:\t%08lx kB\n"
670 +                "StaStk:\t%08lx kB\n" ,
671 +                (mm->total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
672 +                mm->locked_vm << (PAGE_SHIFT-10),
673 +                mm->rss << (PAGE_SHIFT-10),
674 +                data << (PAGE_SHIFT-10),
675 +                mm->stack_vm << (PAGE_SHIFT-10), text, lib,
676 +                (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10,
677 +                mm->start_brk, mm->brk, mm->start_stack);
678 +#if __i386__
679 +        if (!nx_enabled)
680 +                buffer += sprintf(buffer,
681 +                                "ExecLim:\t%08lx\n", mm->context.exec_limit);
682 +#endif
683 +        return buffer;
684  }
685  
686  unsigned long task_vsize(struct mm_struct *mm)
687 @@ -47,6 +55,9 @@
688  
689  static int show_map(struct seq_file *m, void *v)
690  {
691 +#ifdef __i386__
692 +       struct task_struct *task = m->private;
693 +#endif
694         struct vm_area_struct *map = v;
695         struct file *file = map->vm_file;
696         int flags = map->vm_flags;
697 @@ -65,7 +76,13 @@
698                         map->vm_end,
699                         flags & VM_READ ? 'r' : '-',
700                         flags & VM_WRITE ? 'w' : '-',
701 -                       flags & VM_EXEC ? 'x' : '-',
702 +                       (flags & VM_EXEC
703 +#ifdef __i386__
704 +                               || (!nx_enabled &&
705 +                               (map->vm_start < task->mm->context.exec_limit))
706 +#endif
707 +                       )
708 +                               ? 'x' : '-',
709                         flags & VM_MAYSHARE ? 's' : 'p',
710                         map->vm_pgoff << PAGE_SHIFT,
711                         MAJOR(dev), MINOR(dev), ino, &len);
712 Index: linux-2.6.10/include/asm-i386/desc.h
713 ===================================================================
714 --- linux-2.6.10.orig/include/asm-i386/desc.h   2005-04-05 16:29:30.129010368 +0800
715 +++ linux-2.6.10/include/asm-i386/desc.h        2005-04-05 16:34:18.188218712 +0800
716 @@ -129,6 +129,20 @@
717  extern int __modify_ldt(struct mm_struct * mm, int func, void __user *ptr,
718                       unsigned long bytecount);
719  
720 +static inline void set_user_cs(struct desc_struct *desc, unsigned long limit)
721 +{
722 +       limit = (limit - 1) / PAGE_SIZE;
723 +       desc->a = limit & 0xffff;
724 +       desc->b = (limit & 0xf0000) | 0x00c0fb00;
725 +}
726 +
727 +#define load_user_cs_desc(cpu, mm) \
728 +       per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs
729 +
730 +extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit);
731 +extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);
732 +extern void arch_flush_exec_range(struct mm_struct *mm);
733 +
734  #endif /* !__ASSEMBLY__ */
735  
736  #endif
737 Index: linux-2.6.10/include/asm-i386/elf.h
738 ===================================================================
739 --- linux-2.6.10.orig/include/asm-i386/elf.h    2004-12-25 05:35:15.000000000 +0800
740 +++ linux-2.6.10/include/asm-i386/elf.h 2005-04-05 16:34:18.188218712 +0800
741 @@ -9,6 +9,7 @@
742  #include <asm/user.h>
743  #include <asm/processor.h>
744  #include <asm/system.h>                /* for savesegment */
745 +#include <asm/desc.h>
746  
747  #include <linux/utsname.h>
748  
749 @@ -133,15 +134,22 @@
750  #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
751  #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs)
752  
753 -#define VSYSCALL_BASE  (__fix_to_virt(FIX_VSYSCALL))
754 -#define VSYSCALL_EHDR  ((const struct elfhdr *) VSYSCALL_BASE)
755 -#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall)
756  extern void __kernel_vsyscall;
757 +#define VSYSCALL_BASE  ((unsigned long)current->mm->context.vdso)
758 +#define VSYSCALL_EHDR  ((const struct elfhdr *) VSYSCALL_BASE)
759 +#define VSYSCALL_OFFSET        ((unsigned long) &__kernel_vsyscall)
760 +#define VSYSCALL_ENTRY (VSYSCALL_BASE + VSYSCALL_OFFSET)
761  
762 -#define ARCH_DLINFO                                            \
763 -do {                                                           \
764 -               NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY);        \
765 -               NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE);    \
766 +/* kernel-internal fixmap address: */
767 +#define __VSYSCALL_BASE        (__fix_to_virt(FIX_VSYSCALL))
768 +#define __VSYSCALL_EHDR        ((const struct elfhdr *) __VSYSCALL_BASE)
769 +
770 +#define ARCH_DLINFO                                                    \
771 +do {                                                                   \
772 +       if (VSYSCALL_BASE) {                                            \
773 +               NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY);                \
774 +               NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE);            \
775 +       }                                                               \
776  } while (0)
777  
778  /*
779 @@ -152,15 +160,15 @@
780   * Dumping its extra ELF program headers includes all the other information
781   * a debugger needs to easily find how the vsyscall DSO was being used.
782   */
783 -#define ELF_CORE_EXTRA_PHDRS           (VSYSCALL_EHDR->e_phnum)
784 +#define ELF_CORE_EXTRA_PHDRS           (__VSYSCALL_EHDR->e_phnum)
785  #define ELF_CORE_WRITE_EXTRA_PHDRS                                           \
786  do {                                                                         \
787         const struct elf_phdr *const vsyscall_phdrs =                         \
788 -               (const struct elf_phdr *) (VSYSCALL_BASE                      \
789 -                                          + VSYSCALL_EHDR->e_phoff);         \
790 +               (const struct elf_phdr *) (__VSYSCALL_BASE                    \
791 +                                          + __VSYSCALL_EHDR->e_phoff);       \
792         int i;                                                                \
793         Elf32_Off ofs = 0;                                                    \
794 -       for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) {                        \
795 +       for (i = 0; i < __VSYSCALL_EHDR->e_phnum; ++i) {                      \
796                 struct elf_phdr phdr = vsyscall_phdrs[i];                     \
797                 if (phdr.p_type == PT_LOAD) {                                 \
798                         BUG_ON(ofs != 0);                                     \
799 @@ -178,10 +186,10 @@
800  #define ELF_CORE_WRITE_EXTRA_DATA                                            \
801  do {                                                                         \
802         const struct elf_phdr *const vsyscall_phdrs =                         \
803 -               (const struct elf_phdr *) (VSYSCALL_BASE                      \
804 -                                          + VSYSCALL_EHDR->e_phoff);         \
805 +               (const struct elf_phdr *) (__VSYSCALL_BASE                    \
806 +                                          + __VSYSCALL_EHDR->e_phoff);       \
807         int i;                                                                \
808 -       for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) {                        \
809 +       for (i = 0; i < __VSYSCALL_EHDR->e_phnum; ++i) {                      \
810                 if (vsyscall_phdrs[i].p_type == PT_LOAD)                      \
811                         DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr,        \
812                                    PAGE_ALIGN(vsyscall_phdrs[i].p_memsz));    \
813 @@ -190,4 +198,10 @@
814  
815  #endif
816  
817 +#define __HAVE_ARCH_RANDOMIZE_BRK
818 +extern void randomize_brk(unsigned long old_brk);
819 +
820 +#define __HAVE_ARCH_VSYSCALL
821 +extern void map_vsyscall(void);
822 +
823  #endif
824 Index: linux-2.6.10/include/asm-i386/mmu.h
825 ===================================================================
826 --- linux-2.6.10.orig/include/asm-i386/mmu.h    2004-12-25 05:35:00.000000000 +0800
827 +++ linux-2.6.10/include/asm-i386/mmu.h 2005-04-05 16:34:18.189218560 +0800
828 @@ -7,11 +7,17 @@
829   * we put the segment information here.
830   *
831   * cpu_vm_mask is used to optimize ldt flushing.
832 + *
833 + * exec_limit is used to track the range PROT_EXEC
834 + * mappings span.
835   */
836  typedef struct { 
837         int size;
838         struct semaphore sem;
839         void *ldt;
840 +       struct desc_struct user_cs;
841 +       unsigned long exec_limit;
842 +       void *vdso;
843  } mm_context_t;
844  
845  #endif
846 Index: linux-2.6.10/include/asm-i386/pgalloc.h
847 ===================================================================
848 --- linux-2.6.10.orig/include/asm-i386/pgalloc.h        2004-12-25 05:33:50.000000000 +0800
849 +++ linux-2.6.10/include/asm-i386/pgalloc.h     2005-04-05 16:34:18.190218408 +0800
850 @@ -4,6 +4,7 @@
851  #include <linux/config.h>
852  #include <asm/processor.h>
853  #include <asm/fixmap.h>
854 +#include <asm/desc.h>
855  #include <linux/threads.h>
856  #include <linux/mm.h>          /* for struct page */
857  
858 Index: linux-2.6.10/include/asm-i386/processor.h
859 ===================================================================
860 --- linux-2.6.10.orig/include/asm-i386/processor.h      2004-12-25 05:33:50.000000000 +0800
861 +++ linux-2.6.10/include/asm-i386/processor.h   2005-04-05 16:34:18.189218560 +0800
862 @@ -296,7 +296,10 @@
863  /* This decides where the kernel will search for a free chunk of vm
864   * space during mmap's.
865   */
866 -#define TASK_UNMAPPED_BASE     (PAGE_ALIGN(TASK_SIZE / 3))
867 +#define TASK_UNMAPPED_BASE     PAGE_ALIGN(TASK_SIZE/3)
868 +
869 +#define __HAVE_ARCH_ALIGN_STACK
870 +extern unsigned long arch_align_stack(unsigned long sp);
871  
872  #define HAVE_ARCH_PICK_MMAP_LAYOUT
873  
874 @@ -478,6 +481,7 @@
875         regs->xcs = __USER_CS;                                  \
876         regs->eip = new_eip;                                    \
877         regs->esp = new_esp;                                    \
878 +       load_user_cs_desc(smp_processor_id(), current->mm);     \
879  } while (0)
880  
881  /* Forward declaration, a strange C thing */
882 Index: linux-2.6.10/include/asm-i386/thread_info.h
883 ===================================================================
884 --- linux-2.6.10.orig/include/asm-i386/thread_info.h    2005-04-05 16:29:30.127010672 +0800
885 +++ linux-2.6.10/include/asm-i386/thread_info.h 2005-04-05 16:34:18.190218408 +0800
886 @@ -38,6 +38,7 @@
887                                                    0-0xBFFFFFFF for user-thead
888                                                    0-0xFFFFFFFF for kernel-thread
889                                                 */
890 +       void                    *sysenter_return;
891         struct restart_block    restart_block;
892  
893         unsigned long           previous_esp;   /* ESP of the previous stack in case
894 Index: linux-2.6.10/include/asm-ia64/pgalloc.h
895 ===================================================================
896 --- linux-2.6.10.orig/include/asm-ia64/pgalloc.h        2004-12-25 05:33:49.000000000 +0800
897 +++ linux-2.6.10/include/asm-ia64/pgalloc.h     2005-04-05 16:34:18.184219320 +0800
898 @@ -23,6 +23,10 @@
899  #include <asm/mmu_context.h>
900  #include <asm/processor.h>
901  
902 +#define arch_add_exec_range(mm, limit)         do { ; } while (0)
903 +#define arch_flush_exec_range(mm)              do { ; } while (0)
904 +#define arch_remove_exec_range(mm, limit)      do { ; } while (0)
905 +
906  /*
907   * Very stupidly, we used to get new pgd's and pmd's, init their contents
908   * to point to the NULL versions of the next level page table, later on
909 Index: linux-2.6.10/include/asm-ppc64/pgalloc.h
910 ===================================================================
911 --- linux-2.6.10.orig/include/asm-ppc64/pgalloc.h       2004-12-25 05:33:50.000000000 +0800
912 +++ linux-2.6.10/include/asm-ppc64/pgalloc.h    2005-04-05 16:34:18.185219168 +0800
913 @@ -11,6 +11,11 @@
914  
915  extern kmem_cache_t *zero_cache;
916  
917 +/* Dummy functions since we don't support execshield on ppc */
918 +#define arch_add_exec_range(mm, limit) do { ; } while (0)
919 +#define arch_flush_exec_range(mm)      do { ; } while (0)
920 +#define arch_remove_exec_range(mm, limit) do { ; } while (0)
921 +
922  /*
923   * This program is free software; you can redistribute it and/or
924   * modify it under the terms of the GNU General Public License
925 Index: linux-2.6.10/include/asm-ppc/pgalloc.h
926 ===================================================================
927 --- linux-2.6.10.orig/include/asm-ppc/pgalloc.h 2004-12-25 05:33:48.000000000 +0800
928 +++ linux-2.6.10/include/asm-ppc/pgalloc.h      2005-04-05 16:34:18.183219472 +0800
929 @@ -40,5 +40,10 @@
930  
931  #define check_pgt_cache()      do { } while (0)
932  
933 +#define arch_add_exec_range(mm, limit)         do { ; } while (0)
934 +#define arch_flush_exec_range(mm)              do { ; } while (0)
935 +#define arch_remove_exec_range(mm, limit)      do { ; } while (0)
936 +
937 +
938  #endif /* _PPC_PGALLOC_H */
939  #endif /* __KERNEL__ */
940 Index: linux-2.6.10/include/asm-s390/pgalloc.h
941 ===================================================================
942 --- linux-2.6.10.orig/include/asm-s390/pgalloc.h        2004-12-25 05:35:00.000000000 +0800
943 +++ linux-2.6.10/include/asm-s390/pgalloc.h     2005-04-05 16:34:18.186219016 +0800
944 @@ -19,6 +19,10 @@
945  #include <linux/gfp.h>
946  #include <linux/mm.h>
947  
948 +#define arch_add_exec_range(mm, limit) do { ; } while (0)
949 +#define arch_flush_exec_range(mm)      do { ; } while (0)
950 +#define arch_remove_exec_range(mm, limit) do { ; } while (0)
951 +
952  #define check_pgt_cache()      do {} while (0)
953  
954  extern void diag10(unsigned long addr);
955 Index: linux-2.6.10/include/asm-sparc64/pgalloc.h
956 ===================================================================
957 --- linux-2.6.10.orig/include/asm-sparc64/pgalloc.h     2004-12-25 05:35:29.000000000 +0800
958 +++ linux-2.6.10/include/asm-sparc64/pgalloc.h  2005-04-05 16:34:18.187218864 +0800
959 @@ -261,4 +261,8 @@
960  #define pgd_free(pgd)          free_pgd_fast(pgd)
961  #define pgd_alloc(mm)          get_pgd_fast()
962  
963 +#define arch_add_exec_range(mm, limit)         do { ; } while (0)
964 +#define arch_flush_exec_range(mm)              do { ; } while (0)
965 +#define arch_remove_exec_range(mm, limit)      do { ; } while (0)
966 +
967  #endif /* _SPARC64_PGALLOC_H */
968 Index: linux-2.6.10/include/asm-sparc/pgalloc.h
969 ===================================================================
970 --- linux-2.6.10.orig/include/asm-sparc/pgalloc.h       2004-12-25 05:33:51.000000000 +0800
971 +++ linux-2.6.10/include/asm-sparc/pgalloc.h    2005-04-05 16:34:18.191218256 +0800
972 @@ -66,4 +66,8 @@
973  #define pte_free(pte)          BTFIXUP_CALL(pte_free)(pte)
974  #define __pte_free_tlb(tlb, pte)       pte_free(pte)
975  
976 +#define arch_add_exec_range(mm, limit)         do { ; } while (0)
977 +#define arch_flush_exec_range(mm)              do { ; } while (0)
978 +#define arch_remove_exec_range(mm, limit)      do { ; } while (0)
979 +
980  #endif /* _SPARC_PGALLOC_H */
981 Index: linux-2.6.10/include/asm-x86_64/pgalloc.h
982 ===================================================================
983 --- linux-2.6.10.orig/include/asm-x86_64/pgalloc.h      2004-12-25 05:34:57.000000000 +0800
984 +++ linux-2.6.10/include/asm-x86_64/pgalloc.h   2005-04-05 16:34:18.185219168 +0800
985 @@ -7,6 +7,11 @@
986  #include <linux/threads.h>
987  #include <linux/mm.h>
988  
989 +#define arch_add_exec_range(mm, limit) do { ; } while (0)
990 +#define arch_flush_exec_range(mm)      do { ; } while (0)
991 +#define arch_remove_exec_range(mm, limit) do { ; } while (0)
992 +
993 +
994  #define pmd_populate_kernel(mm, pmd, pte) \
995                 set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
996  #define pgd_populate(mm, pgd, pmd) \
997 Index: linux-2.6.10/include/linux/mm.h
998 ===================================================================
999 --- linux-2.6.10.orig/include/linux/mm.h        2005-04-05 16:29:30.250991824 +0800
1000 +++ linux-2.6.10/include/linux/mm.h     2005-04-05 16:43:44.366146584 +0800
1001 @@ -685,7 +685,14 @@
1002         unsigned long addr, unsigned long len, pgoff_t pgoff);
1003  extern void exit_mmap(struct mm_struct *);
1004  
1005 -extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
1006 +extern unsigned long get_unmapped_area_prot(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, int);
1007 +
1008 +
1009 +static inline unsigned long get_unmapped_area(struct file * file, unsigned long addr, 
1010 +               unsigned long len, unsigned long pgoff, unsigned long flags)
1011 +{
1012 +       return get_unmapped_area_prot(file, addr, len, pgoff, flags, 0);        
1013 +}
1014  
1015  extern unsigned long __do_mmap_pgoff(struct mm_struct *mm, struct file *file,
1016                                    unsigned long addr, unsigned long len,
1017 Index: linux-2.6.10/include/linux/random.h
1018 ===================================================================
1019 --- linux-2.6.10.orig/include/linux/random.h    2004-12-25 05:35:40.000000000 +0800
1020 +++ linux-2.6.10/include/linux/random.h 2005-04-05 16:34:18.183219472 +0800
1021 @@ -69,6 +69,9 @@
1022  extern struct file_operations random_fops, urandom_fops;
1023  #endif
1024  
1025 +unsigned int get_random_int(void);
1026 +unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
1027 +
1028  #endif /* __KERNEL___ */
1029  
1030  #endif /* _LINUX_RANDOM_H */
1031 Index: linux-2.6.10/include/linux/resource.h
1032 ===================================================================
1033 --- linux-2.6.10.orig/include/linux/resource.h  2004-12-25 05:33:52.000000000 +0800
1034 +++ linux-2.6.10/include/linux/resource.h       2005-04-05 16:34:18.182219624 +0800
1035 @@ -52,8 +52,11 @@
1036  /*
1037   * Limit the stack by to some sane default: root can always
1038   * increase this limit if needed..  8MB seems reasonable.
1039 + *
1040 + * (2MB more to cover randomization effects.)
1041   */
1042 -#define _STK_LIM       (8*1024*1024)
1043 +#define _STK_LIM       (10*1024*1024)
1044 +#define EXEC_STACK_BIAS        (2*1024*1024)
1045  
1046  /*
1047   * GPG wants 32kB of mlocked memory, to make sure pass phrases
1048 Index: linux-2.6.10/include/linux/sched.h
1049 ===================================================================
1050 --- linux-2.6.10.orig/include/linux/sched.h     2005-04-05 16:29:27.971338384 +0800
1051 +++ linux-2.6.10/include/linux/sched.h  2005-04-05 16:43:44.367146432 +0800
1052 @@ -32,6 +32,9 @@
1053  #include <linux/topology.h>
1054  
1055  struct exec_domain;
1056 +extern int exec_shield;
1057 +extern int exec_shield_randomize;
1058 +extern int print_fatal_signals;
1059  
1060  /*
1061   * cloning flags:
1062 @@ -193,6 +196,10 @@
1063  extern unsigned long
1064  arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
1065                        unsigned long, unsigned long);
1066 +
1067 +extern unsigned long
1068 +arch_get_unmapped_exec_area(struct file *, unsigned long, unsigned long,
1069 +                      unsigned long, unsigned long);
1070  extern unsigned long
1071  arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
1072                           unsigned long len, unsigned long pgoff,
1073 @@ -208,6 +215,9 @@
1074         unsigned long (*get_unmapped_area) (struct file *filp,
1075                                 unsigned long addr, unsigned long len,
1076                                 unsigned long pgoff, unsigned long flags);
1077 +       unsigned long (*get_unmapped_exec_area) (struct file *filp,
1078 +                               unsigned long addr, unsigned long len,
1079 +                               unsigned long pgoff, unsigned long flags);
1080         void (*unmap_area) (struct vm_area_struct *area);
1081         unsigned long mmap_base;                /* base of mmap area */
1082         unsigned long free_area_cache;          /* first hole */
1083 @@ -720,6 +730,7 @@
1084  #define PF_LESS_THROTTLE 0x00100000    /* Throttle me less: I clean memory */
1085  #define PF_SYNCWRITE   0x00200000      /* I am doing a sync write */
1086  #define PF_BORROWED_MM 0x00400000      /* I am a kthread doing use_mm */
1087 +#define PF_RELOCEXEC   0x00800000      /* relocate shared libraries */
1088  
1089  #ifdef CONFIG_SMP
1090  extern int set_cpus_allowed(task_t *p, cpumask_t new_mask);
1091 Index: linux-2.6.10/kernel/signal.c
1092 ===================================================================
1093 --- linux-2.6.10.orig/kernel/signal.c   2005-04-05 16:29:27.951341424 +0800
1094 +++ linux-2.6.10/kernel/signal.c        2005-04-05 16:43:17.077295120 +0800
1095 @@ -1608,6 +1608,35 @@
1096         spin_unlock_irq(&current->sighand->siglock);
1097  }
1098  
1099 +int print_fatal_signals = 0;
1100 +
1101 +static void print_fatal_signal(struct pt_regs *regs, int signr)
1102 +{
1103 +       int i;
1104 +       unsigned char insn;
1105 +       printk("%s/%d: potentially unexpected fatal signal %d.\n",
1106 +               current->comm, current->pid, signr);
1107 +
1108 +#ifdef __i386__
1109 +       printk("code at %08lx: ", regs->eip);
1110 +       for (i = 0; i < 16; i++) {
1111 +               __get_user(insn, (unsigned char *)(regs->eip + i));
1112 +               printk("%02x ", insn);
1113 +       }
1114 +#endif
1115 +       printk("\n");
1116 +       show_regs(regs);
1117 +}
1118 +
1119 +static int __init setup_print_fatal_signals(char *str)
1120 +{
1121 +       get_option (&str, &print_fatal_signals);
1122 +
1123 +       return 1;
1124 +}
1125 +
1126 +__setup("print-fatal-signals=", setup_print_fatal_signals);
1127 +
1128  #ifndef HAVE_ARCH_GET_SIGNAL_TO_DELIVER
1129  
1130  static void
1131 @@ -1808,6 +1837,12 @@
1132                 if (!signr)
1133                         break; /* will return 0 */
1134  
1135 +               if ((signr == SIGSEGV) && print_fatal_signals) {
1136 +                       spin_unlock_irq(&current->sighand->siglock);
1137 +                       print_fatal_signal(regs, signr);
1138 +                       spin_lock_irq(&current->sighand->siglock);
1139 +               }
1140 +
1141                 if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
1142                         ptrace_signal_deliver(regs, cookie);
1143  
1144 @@ -1904,6 +1939,8 @@
1145                  * Anything else is fatal, maybe with a core dump.
1146                  */
1147                 current->flags |= PF_SIGNALED;
1148 +               if (print_fatal_signals)
1149 +                       print_fatal_signal(regs, signr);
1150                 if (sig_kernel_coredump(signr)) {
1151                         /*
1152                          * If it was able to dump core, this kills all
1153 Index: linux-2.6.10/kernel/sysctl.c
1154 ===================================================================
1155 --- linux-2.6.10.orig/kernel/sysctl.c   2005-04-05 16:29:24.394882088 +0800
1156 +++ linux-2.6.10/kernel/sysctl.c        2005-04-05 16:43:17.078294968 +0800
1157 @@ -75,6 +75,29 @@
1158                                   void __user *, size_t *, loff_t *);
1159  #endif
1160  
1161 +extern unsigned int vdso_enabled;
1162 +
1163 +int exec_shield = 1;
1164 +int exec_shield_randomize = 1;
1165 +
1166 +static int __init setup_exec_shield(char *str)
1167 +{
1168 +        get_option (&str, &exec_shield);
1169 +
1170 +        return 1;
1171 +}
1172 +
1173 +__setup("exec-shield=", setup_exec_shield);
1174 +
1175 +static int __init setup_exec_shield_randomize(char *str)
1176 +{
1177 +        get_option (&str, &exec_shield_randomize);
1178 +
1179 +        return 1;
1180 +}
1181 +
1182 +__setup("exec-shield-randomize=", setup_exec_shield_randomize);
1183 +
1184  /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
1185  static int maxolduid = 65535;
1186  static int minolduid;
1187 @@ -276,6 +299,40 @@
1188                 .proc_handler   = &proc_dointvec,
1189         },
1190         {
1191 +               .ctl_name       = KERN_PANIC,
1192 +               .procname       = "exec-shield",
1193 +               .data           = &exec_shield,
1194 +               .maxlen         = sizeof(int),
1195 +               .mode           = 0644,
1196 +               .proc_handler   = &proc_dointvec,
1197 +       },
1198 +       {
1199 +               .ctl_name       = KERN_PANIC,
1200 +               .procname       = "exec-shield-randomize",
1201 +               .data           = &exec_shield_randomize,
1202 +               .maxlen         = sizeof(int),
1203 +               .mode           = 0644,
1204 +               .proc_handler   = &proc_dointvec,
1205 +       },
1206 +       {
1207 +               .ctl_name       = KERN_PANIC,
1208 +               .procname       = "print-fatal-signals",
1209 +               .data           = &print_fatal_signals,
1210 +               .maxlen         = sizeof(int),
1211 +               .mode           = 0644,
1212 +               .proc_handler   = &proc_dointvec,
1213 +       },
1214 +#if __i386__
1215 +       {
1216 +               .ctl_name       = KERN_PANIC,
1217 +               .procname       = "vdso",
1218 +               .data           = &vdso_enabled,
1219 +               .maxlen         = sizeof(int),
1220 +               .mode           = 0644,
1221 +               .proc_handler   = &proc_dointvec,
1222 +       },
1223 +#endif
1224 +       {
1225                 .ctl_name       = KERN_CORE_USES_PID,
1226                 .procname       = "core_uses_pid",
1227                 .data           = &core_uses_pid,
1228 Index: linux-2.6.10/mm/mmap.c
1229 ===================================================================
1230 --- linux-2.6.10.orig/mm/mmap.c 2005-04-05 16:29:30.134009608 +0800
1231 +++ linux-2.6.10/mm/mmap.c      2005-04-05 16:43:44.369146128 +0800
1232 @@ -23,6 +23,7 @@
1233  #include <linux/mount.h>
1234  #include <linux/mempolicy.h>
1235  #include <linux/rmap.h>
1236 +#include <linux/random.h>
1237  
1238  #include <asm/uaccess.h>
1239  #include <asm/cacheflush.h>
1240 @@ -245,6 +246,8 @@
1241  __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
1242                 struct vm_area_struct *prev, struct rb_node *rb_parent)
1243  {
1244 +       if (vma->vm_flags & VM_EXEC)
1245 +               arch_add_exec_range(mm, vma->vm_end);
1246         if (prev) {
1247                 vma->vm_next = prev->vm_next;
1248                 prev->vm_next = vma;
1249 @@ -347,6 +350,8 @@
1250         rb_erase(&vma->vm_rb, &mm->mm_rb);
1251         if (mm->mmap_cache == vma)
1252                 mm->mmap_cache = prev;
1253 +       if (vma->vm_flags & VM_EXEC)
1254 +               arch_remove_exec_range(mm, vma->vm_end);
1255  }
1256  
1257  /*
1258 @@ -642,6 +647,8 @@
1259                 } else                                  /* cases 2, 5, 7 */
1260                         vma_adjust(prev, prev->vm_start,
1261                                 end, prev->vm_pgoff, NULL);
1262 +               if (prev->vm_flags & VM_EXEC)
1263 +                       arch_add_exec_range(mm, prev->vm_end);
1264                 return prev;
1265         }
1266  
1267 @@ -813,7 +820,7 @@
1268         /* Obtain the address to map to. we verify (or select) it and ensure
1269          * that it represents a valid section of the address space.
1270          */
1271 -       addr = get_unmapped_area(file, addr, len, pgoff, flags);
1272 +       addr = get_unmapped_area_prot(file, addr, len, pgoff, flags, prot & PROT_EXEC);
1273         if (addr & ~PAGE_MASK)
1274                 return addr;
1275  
1276 @@ -1207,9 +1214,10 @@
1277                 area->vm_mm->free_area_cache = area->vm_end;
1278  }
1279  
1280 +
1281  unsigned long
1282 -get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1283 -               unsigned long pgoff, unsigned long flags)
1284 +get_unmapped_area_prot(struct file *file, unsigned long addr, unsigned long len,
1285 +               unsigned long pgoff, unsigned long flags, int exec)
1286  {
1287         if (flags & MAP_FIXED) {
1288                 unsigned long ret;
1289 @@ -1241,10 +1249,80 @@
1290                 return file->f_op->get_unmapped_area(file, addr, len,
1291                                                 pgoff, flags);
1292  
1293 -       return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
1294 +       if (exec && current->mm->get_unmapped_exec_area)
1295 +               return current->mm->get_unmapped_exec_area(file, addr, len, pgoff, flags);
1296 +       else
1297 +               return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
1298  }
1299  
1300 -EXPORT_SYMBOL(get_unmapped_area);
1301 +EXPORT_SYMBOL(get_unmapped_area_prot);
1302 +
1303 +
1304 +#define SHLIB_BASE             0x00111000
1305 +
1306 +unsigned long arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0,
1307 +               unsigned long len0, unsigned long pgoff, unsigned long flags)
1308 +{
1309 +       unsigned long addr = addr0, len = len0;
1310 +       struct mm_struct *mm = current->mm;
1311 +       struct vm_area_struct *vma;
1312 +       unsigned long tmp;
1313 +
1314 +       if (len > TASK_SIZE)
1315 +               return -ENOMEM;
1316 +               
1317 +       if (!addr && !(flags & MAP_FIXED))
1318 +               addr = randomize_range(SHLIB_BASE, 0x01000000, len);
1319 +
1320 +       if (addr) {
1321 +               addr = PAGE_ALIGN(addr);
1322 +               vma = find_vma(mm, addr);
1323 +               if (TASK_SIZE - len >= addr &&
1324 +                   (!vma || addr + len <= vma->vm_start)) {
1325 +                       return addr;
1326 +               }
1327 +       }
1328 +
1329 +       addr = SHLIB_BASE;
1330 +
1331 +       for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
1332 +               /* At this point:  (!vma || addr < vma->vm_end). */
1333 +               if (TASK_SIZE - len < addr) {
1334 +                       return -ENOMEM;
1335 +               }
1336 +               if (!vma || addr + len <= vma->vm_start) {
1337 +                       /*
1338 +                        * Must not let a PROT_EXEC mapping get into the
1339 +                        * brk area:
1340 +                        */
1341 +                       if (addr + len > mm->brk)
1342 +                               goto failed;
1343 +                       
1344 +                       /*
1345 +                        * Up until the brk area we randomize addresses
1346 +                        * as much as possible:
1347 +                        */
1348 +                       if (addr >= 0x01000000) {
1349 +                               tmp = randomize_range(0x01000000, mm->brk, len);
1350 +                               vma = find_vma(mm, tmp);
1351 +                               if (TASK_SIZE - len >= tmp &&
1352 +                                   (!vma || tmp + len <= vma->vm_start))
1353 +                                       return tmp;
1354 +                       }
1355 +                       /*
1356 +                        * Ok, randomization didnt work out - return
1357 +                        * the result of the linear search:
1358 +                        */
1359 +                       return addr;
1360 +               }
1361 +               addr = vma->vm_end;
1362 +       }
1363 +       
1364 +failed:
1365 +       return current->mm->get_unmapped_area(filp, addr0, len0, pgoff, flags);
1366 +}
1367 +
1368 +
1369  
1370  /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
1371  struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
1372 @@ -1319,6 +1397,14 @@
1373         return prev ? prev->vm_next : vma;
1374  }
1375  
1376 +
1377 +static int over_stack_limit(unsigned long sz)
1378 +{
1379 +       if (sz < EXEC_STACK_BIAS)
1380 +               return 0;
1381 +       return (sz - EXEC_STACK_BIAS) > current->signal->rlim[RLIMIT_STACK].rlim_cur;
1382 +}
1383 +
1384  #ifdef CONFIG_STACK_GROWSUP
1385  /*
1386   * vma is the first one with address > vma->vm_end.  Have to extend vma.
1387 @@ -1358,7 +1444,7 @@
1388                 return -ENOMEM;
1389         }
1390         
1391 -       if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur ||
1392 +       if (over_stack_limit(address - vma->vm_start) ||
1393                         ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
1394                         current->signal->rlim[RLIMIT_AS].rlim_cur) {
1395                 anon_vma_unlock(vma);
1396 @@ -1432,7 +1518,7 @@
1397                 return -ENOMEM;
1398         }
1399         
1400 -       if (vma->vm_end - address > current->signal->rlim[RLIMIT_STACK].rlim_cur ||
1401 +       if (over_stack_limit(vma->vm_end - address) ||
1402                         ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
1403                         current->signal->rlim[RLIMIT_AS].rlim_cur) {
1404                 anon_vma_unlock(vma);
1405 @@ -1668,10 +1754,14 @@
1406         if (new->vm_ops && new->vm_ops->open)
1407                 new->vm_ops->open(new);
1408  
1409 -       if (new_below)
1410 +       if (new_below) {
1411 +               unsigned long old_end = vma->vm_end;
1412 +
1413                 vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1414                         ((addr - new->vm_start) >> PAGE_SHIFT), new);
1415 -       else
1416 +               if (vma->vm_flags & VM_EXEC)
1417 +                       arch_remove_exec_range(mm, old_end);
1418 +       } else
1419                 vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1420  
1421         return 0;
1422 @@ -1890,6 +1980,7 @@
1423         mm->rss = 0;
1424         mm->total_vm = 0;
1425         mm->locked_vm = 0;
1426 +       arch_flush_exec_range(mm);
1427  
1428         spin_unlock(&mm->page_table_lock);
1429  
1430 Index: linux-2.6.10/mm/mprotect.c
1431 ===================================================================
1432 --- linux-2.6.10.orig/mm/mprotect.c     2005-04-05 16:29:30.135009456 +0800
1433 +++ linux-2.6.10/mm/mprotect.c  2005-04-05 16:34:18.193217952 +0800
1434 @@ -22,6 +22,7 @@
1435  
1436  #include <asm/uaccess.h>
1437  #include <asm/pgtable.h>
1438 +#include <asm/pgalloc.h>
1439  #include <asm/cacheflush.h>
1440  #include <asm/tlbflush.h>
1441  
1442 @@ -117,7 +118,7 @@
1443         struct mm_struct * mm = vma->vm_mm;
1444         unsigned long oldflags = vma->vm_flags;
1445         long nrpages = (end - start) >> PAGE_SHIFT;
1446 -       unsigned long charged = 0;
1447 +       unsigned long charged = 0, old_end = vma->vm_end;
1448         pgprot_t newprot;
1449         pgoff_t pgoff;
1450         int error;
1451 @@ -179,8 +180,11 @@
1452          * vm_flags and vm_page_prot are protected by the mmap_sem
1453          * held in write mode.
1454          */
1455 +       oldflags = vma->vm_flags;
1456         vma->vm_flags = newflags;
1457         vma->vm_page_prot = newprot;
1458 +       if (oldflags & VM_EXEC)
1459 +               arch_remove_exec_range(current->mm, old_end);
1460         change_protection(vma, start, end, newprot);
1461         __vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
1462         __vm_stat_account(mm, newflags, vma->vm_file, nrpages);
1463 Index: linux-2.6.10/mm/mremap.c
1464 ===================================================================
1465 --- linux-2.6.10.orig/mm/mremap.c       2004-12-25 05:34:58.000000000 +0800
1466 +++ linux-2.6.10/mm/mremap.c    2005-04-05 16:43:44.370145976 +0800
1467 @@ -385,8 +385,8 @@
1468                         if (vma->vm_flags & VM_MAYSHARE)
1469                                 map_flags |= MAP_SHARED;
1470  
1471 -                       new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
1472 -                                               vma->vm_pgoff, map_flags);
1473 +                       new_addr = get_unmapped_area_prot(vma->vm_file, 0, new_len, 
1474 +                               vma->vm_pgoff, map_flags, vma->vm_flags & VM_EXEC);
1475                         ret = new_addr;
1476                         if (new_addr & ~PAGE_MASK)
1477                                 goto out;