1 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/MAINTAINERS
2 ===================================================================
3 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/MAINTAINERS 2004-11-18 20:59:11.000000000 -0500
4 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/MAINTAINERS 2004-11-18 23:25:15.000000000 -0500
6 W: http://www.cse.unsw.edu.au/~neilb/patches/linux-devel/
12 +M: ebiederm@xmission.com
14 +W: http://www.xmission.com/~ebiederm/files/kexec/
15 +W: http://developer.osdl.org/rddunlap/kexec/
16 +L: linux-kernel@vger.kernel.org
20 LANMEDIA WAN CARD DRIVER
21 P: Andrew Stanley-Jones
23 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/Kconfig
24 ===================================================================
25 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/Kconfig 2004-11-18 20:59:11.000000000 -0500
26 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/Kconfig 2004-11-18 23:25:15.000000000 -0500
28 depends on IA32_EMULATION
32 + bool "kexec system call (EXPERIMENTAL)"
33 + depends on EXPERIMENTAL
35 + kexec is a system call that implements the ability to shutdown your
36 + current kernel, and to start another kernel. It is like a reboot
37 + but it is indepedent of the system firmware. And like a reboot
38 + you can start any kernel with it, not just Linux.
40 + The name comes from the similiarity to the exec system call.
42 + It is an ongoing process to be certain the hardware in a machine
43 + is properly shutdown, so do not be surprised if this code does not
44 + initially work for you. It may help to enable device hotplugging
45 + support. As of this writing the exact hardware interface is
46 + strongly in flux, so no good recommendation can be made.
50 source drivers/Kconfig
51 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/Makefile
52 ===================================================================
53 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/Makefile 2004-11-11 10:28:46.000000000 -0500
54 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/Makefile 2004-11-18 23:26:29.000000000 -0500
56 obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
57 obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o \
58 genapic.o genapic_cluster.o genapic_flat.o
59 +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
60 obj-$(CONFIG_PM) += suspend.o
61 obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
62 obj-$(CONFIG_CPU_FREQ) += cpufreq/
63 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/apic.c
64 ===================================================================
65 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/apic.c 2004-11-11 10:28:46.000000000 -0500
66 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/apic.c 2004-11-18 23:25:15.000000000 -0500
72 + /* Go back to Virtual Wire compatibility mode */
73 + unsigned long value;
75 + /* For the spurious interrupt use vector F, and enable it */
76 + value = apic_read(APIC_SPIV);
77 + value &= ~APIC_VECTOR_MASK;
78 + value |= APIC_SPIV_APIC_ENABLED;
80 + apic_write_around(APIC_SPIV, value);
82 + /* For LVT0 make it edge triggered, active high, external and enabled */
83 + value = apic_read(APIC_LVT0);
84 + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
85 + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
86 + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
87 + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
88 + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXINT);
89 + apic_write_around(APIC_LVT0, value);
91 + /* For LVT1 make it edge triggered, active high, nmi and enabled */
92 + value = apic_read(APIC_LVT1);
94 + APIC_MODE_MASK | APIC_SEND_PENDING |
95 + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
96 + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
97 + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
98 + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
99 + apic_write_around(APIC_LVT1, value);
103 void disable_local_APIC(void)
104 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/e820.c
105 ===================================================================
106 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/e820.c 2004-04-03 22:36:53.000000000 -0500
107 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/e820.c 2004-11-18 23:25:15.000000000 -0500
110 for (i = 0; i < e820.nr_map; i++) {
111 struct resource *res;
112 - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
114 res = alloc_bootmem_low(sizeof(struct resource));
115 switch (e820.map[i].type) {
116 case E820_RAM: res->name = "System RAM"; break;
117 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/i8259.c
118 ===================================================================
119 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/i8259.c 2004-11-18 20:59:11.000000000 -0500
120 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/i8259.c 2004-11-18 23:25:15.000000000 -0500
125 +static int i8259A_resume(struct sys_device *dev)
131 +static int i8259A_shutdown(struct sys_device *dev)
133 + /* Put the i8259A into a quiescent state that
134 + * the kernel initialization code can get it
137 + outb(0xff, 0x21); /* mask all of 8259A-1 */
138 + outb(0xff, 0xA1); /* mask all of 8259A-1 */
142 +static struct sysdev_class i8259_sysdev_class = {
143 + set_kset_name("i8259"),
144 + .resume = i8259A_resume,
145 + .shutdown = i8259A_shutdown,
148 +static struct sys_device device_i8259A = {
150 + .cls = &i8259_sysdev_class,
153 +static int __init i8259A_init_sysfs(void)
155 + int error = sysdev_class_register(&i8259_sysdev_class);
157 + error = sysdev_register(&device_i8259A);
161 +device_initcall(i8259A_init_sysfs);
163 void __init init_8259A(int auto_eoi)
166 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/io_apic.c
167 ===================================================================
168 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/io_apic.c 2004-11-11 10:28:46.000000000 -0500
169 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/io_apic.c 2004-11-18 23:25:15.000000000 -0500
172 * Find the pin to which IRQ[irq] (ISA) is connected
174 -static int __init find_isa_irq_pin(int irq, int type)
175 +static int find_isa_irq_pin(int irq, int type)
179 @@ -1112,11 +1112,43 @@
181 void disable_IO_APIC(void)
185 * Clear the IO-APIC before rebooting:
190 + * If the i82559 is routed through an IOAPIC
191 + * Put that IOAPIC in virtual wire mode
192 + * so legacy interrups can be delivered.
194 + pin = find_isa_irq_pin(0, mp_ExtINT);
196 + struct IO_APIC_route_entry entry;
197 + unsigned long flags;
199 + memset(&entry, 0, sizeof(entry));
200 + entry.mask = 0; /* Enabled */
201 + entry.trigger = 0; /* Edge */
203 + entry.polarity = 0; /* High */
204 + entry.delivery_status = 0;
205 + entry.dest_mode = 0; /* Physical */
206 + entry.delivery_mode = 7; /* ExtInt */
208 + entry.dest.physical.physical_dest = 0;
212 + * Add it to the IO-APIC irq-routing table:
214 + spin_lock_irqsave(&ioapic_lock, flags);
215 + io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
216 + io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
217 + spin_unlock_irqrestore(&ioapic_lock, flags);
220 disconnect_bsp_APIC();
223 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/machine_kexec.c
224 ===================================================================
225 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/machine_kexec.c 1969-12-31 19:00:00.000000000 -0500
226 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/machine_kexec.c 2004-11-18 23:25:15.000000000 -0500
229 + * machine_kexec.c - handle transition of Linux booting another kernel
230 + * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
232 + * This source code is licensed under the GNU General Public License,
233 + * Version 2. See the file COPYING for more details.
236 +#include <linux/mm.h>
237 +#include <linux/kexec.h>
238 +#include <linux/delay.h>
239 +#include <linux/string.h>
240 +#include <linux/reboot.h>
241 +#include <asm/pda.h>
242 +#include <asm/pgtable.h>
243 +#include <asm/pgalloc.h>
244 +#include <asm/tlbflush.h>
245 +#include <asm/mmu_context.h>
247 +#include <asm/apic.h>
248 +#include <asm/cpufeature.h>
249 +#include <asm/hw_irq.h>
251 +#define LEVEL0_SIZE (1UL << 12UL)
252 +#define LEVEL1_SIZE (1UL << 21UL)
253 +#define LEVEL2_SIZE (1UL << 30UL)
254 +#define LEVEL3_SIZE (1UL << 39UL)
255 +#define LEVEL4_SIZE (1UL << 48UL)
257 +#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
258 +#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE)
259 +#define L2_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
260 +#define L3_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
262 +static void init_level2_page(
263 + uint64_t *level2p, unsigned long addr)
265 + unsigned long end_addr;
267 + end_addr = addr + LEVEL2_SIZE;
268 + while(addr < end_addr) {
269 + *(level2p++) = addr | L1_ATTR;
270 + addr += LEVEL1_SIZE;
274 +static int init_level3_page(struct kimage *image,
275 + uint64_t *level3p, unsigned long addr, unsigned long last_addr)
277 + unsigned long end_addr;
281 + end_addr = addr + LEVEL3_SIZE;
282 + while((addr < last_addr) && (addr < end_addr)) {
285 + page = kimage_alloc_control_pages(image, 0);
290 + level2p = (uint64_t *)page_address(page);
291 + init_level2_page(level2p, addr);
292 + *(level3p++) = __pa(level2p) | L2_ATTR;
293 + addr += LEVEL2_SIZE;
295 + /* clear the unused entries */
296 + while(addr < end_addr) {
298 + addr += LEVEL2_SIZE;
305 +static int init_level4_page(struct kimage *image,
306 + uint64_t *level4p, unsigned long addr, unsigned long last_addr)
308 + unsigned long end_addr;
312 + end_addr = addr + LEVEL4_SIZE;
313 + while((addr < last_addr) && (addr < end_addr)) {
316 + page = kimage_alloc_control_pages(image, 0);
321 + level3p = (uint64_t *)page_address(page);
322 + result = init_level3_page(image, level3p, addr, last_addr);
326 + *(level4p++) = __pa(level3p) | L3_ATTR;
327 + addr += LEVEL3_SIZE;
329 + /* clear the unused entries */
330 + while(addr < end_addr) {
332 + addr += LEVEL3_SIZE;
339 +static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
342 + level4p = (uint64_t *)__va(start_pgtable);
343 + return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
346 +static void set_idt(void *newidt, __u16 limit)
348 + unsigned char curidt[10];
350 + /* x86-64 supports unaliged loads & stores */
351 + (*(__u16 *)(curidt)) = limit;
352 + (*(__u64 *)(curidt +2)) = (unsigned long)(newidt);
354 + __asm__ __volatile__ (
361 +static void set_gdt(void *newgdt, __u16 limit)
363 + unsigned char curgdt[10];
365 + /* x86-64 supports unaligned loads & stores */
366 + (*(__u16 *)(curgdt)) = limit;
367 + (*(__u64 *)(curgdt +2)) = (unsigned long)(newgdt);
369 + __asm__ __volatile__ (
375 +static void load_segments(void)
377 + __asm__ __volatile__ (
378 + "\tmovl $"STR(__KERNEL_DS)",%eax\n"
379 + "\tmovl %eax,%ds\n"
380 + "\tmovl %eax,%es\n"
381 + "\tmovl %eax,%ss\n"
382 + "\tmovl %eax,%fs\n"
383 + "\tmovl %eax,%gs\n"
389 +typedef void (*relocate_new_kernel_t)(
390 + unsigned long indirection_page, unsigned long control_code_buffer,
391 + unsigned long start_address, unsigned long pgtable);
393 +const extern unsigned char relocate_new_kernel[];
394 +extern void relocate_new_kernel_end(void);
395 +const extern unsigned long relocate_new_kernel_size;
397 +int machine_kexec_prepare(struct kimage *image)
399 + unsigned long start_pgtable, control_code_buffer;
402 + /* Calculate the offsets */
403 + start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
404 + control_code_buffer = start_pgtable + 4096UL;
406 + /* Setup the identity mapped 64bit page table */
407 + result = init_pgtable(image, start_pgtable);
412 + /* Place the code in the reboot code buffer */
413 + memcpy(__va(control_code_buffer), relocate_new_kernel, relocate_new_kernel_size);
418 +void machine_kexec_cleanup(struct kimage *image)
424 + * Do not allocate memory (or fail in any way) in machine_kexec().
425 + * We are past the point of no return, committed to rebooting now.
427 +void machine_kexec(struct kimage *image)
429 + unsigned long indirection_page;
430 + unsigned long control_code_buffer;
431 + unsigned long start_pgtable;
432 + relocate_new_kernel_t rnk;
434 + /* Interrupts aren't acceptable while we reboot */
435 + local_irq_disable();
437 + /* Calculate the offsets */
438 + indirection_page = image->head & PAGE_MASK;
439 + start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
440 + control_code_buffer = start_pgtable + 4096UL;
442 + /* Set the low half of the page table to my identity mapped
443 + * page table for kexec. Leave the high half pointing at the
444 + * kernel pages. Don't bother to flush the global pages
445 + * as that will happen when I fully switch to my identity mapped
446 + * page table anyway.
448 + memcpy((void *)read_pda(level4_pgt), __va(start_pgtable), PAGE_SIZE/2);
452 + /* The segment registers are funny things, they are
453 + * automatically loaded from a table, in memory wherever you
454 + * set them to a specific selector, but this table is never
455 + * accessed again unless you set the segment to a different selector.
457 + * The more common model are caches where the behide
458 + * the scenes work is done, but is also dropped at arbitrary
461 + * I take advantage of this here by force loading the
462 + * segments, before I zap the gdt with an invalid value.
465 + /* The gdt & idt are now invalid.
466 + * If you want to load them you must set up your own idt & gdt.
468 + set_gdt(phys_to_virt(0),0);
469 + set_idt(phys_to_virt(0),0);
471 + rnk = (relocate_new_kernel_t) control_code_buffer;
472 + (*rnk)(indirection_page, control_code_buffer, image->start, start_pgtable);
474 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/reboot.c
475 ===================================================================
476 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/reboot.c 2004-04-03 22:37:59.000000000 -0500
477 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/reboot.c 2004-11-18 23:25:15.000000000 -0500
479 [target] "b" (WARMBOOT_TRAMP));
483 -static void smp_halt(void)
485 - int cpuid = safe_smp_processor_id();
486 - static int first_entry = 1;
490 - smp_call_function((void *)machine_restart, NULL, 1, 0);
495 - /* AP calling this. Just halt */
496 - if (cpuid != boot_cpu_id) {
501 - /* Wait for all other CPUs to have run smp_stop_cpu */
502 - while (!cpus_empty(cpu_online_map))
507 static inline void kb_wait(void)
510 @@ -125,23 +100,45 @@
514 -void machine_restart(char * __unused)
515 +void machine_shutdown(void)
519 + /* Stop the cpus and apics */
525 + /* The boot cpu is always logical cpu 0 */
528 + /* Make certain the cpu I'm about to reboot on is online */
529 + if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
530 + reboot_cpu_id = smp_processor_id();
533 + /* Make certain I only run on the appropriate processor */
534 + set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
536 + /* O.K Now that I'm on the appropriate processor,
537 + * stop all of the others.
546 disable_local_APIC();
555 +void machine_restart(char * __unused)
559 + machine_shutdown();
561 /* Tell the BIOS if we want cold or warm reboot */
562 *((unsigned short *)__va(0x472)) = reboot_mode;
563 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/relocate_kernel.S
564 ===================================================================
565 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/relocate_kernel.S 1969-12-31 19:00:00.000000000 -0500
566 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/relocate_kernel.S 2004-11-18 23:25:15.000000000 -0500
569 + * relocate_kernel.S - put the kernel image in place to boot
570 + * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
572 + * This source code is licensed under the GNU General Public License,
573 + * Version 2. See the file COPYING for more details.
576 +#include <linux/linkage.h>
579 + * Must be relocatable PIC code callable as a C function, that once
580 + * it starts can not use the previous processes stack.
582 + .globl relocate_new_kernel
584 +relocate_new_kernel:
585 + /* %rdi indirection_page
586 + * %rsi reboot_code_buffer
587 + * %rdx start address
593 + /* zero out flags, and disable interrupts */
597 + /* set a new stack at the bottom of our page... */
598 + lea 4096(%rsi), %rsp
600 + /* store the parameters back on the stack */
601 + pushq %rdx /* store the start address */
603 + /* Set cr0 to a known state:
604 + * 31 1 == Paging enabled
605 + * 18 0 == Alignment check disabled
606 + * 16 0 == Write protect disabled
607 + * 3 0 == No task switch
608 + * 2 0 == Don't do FP software emulation.
609 + * 0 1 == Proctected mode enabled
612 + andq $~((1<<18)|(1<<16)|(1<<3)|(1<<2)), %rax
613 + orl $((1<<31)|(1<<0)), %eax
616 + /* Set cr4 to a known state:
617 + * 10 0 == xmm exceptions disabled
618 + * 9 0 == xmm registers instructions disabled
619 + * 8 0 == performance monitoring counter disabled
620 + * 7 0 == page global disabled
621 + * 6 0 == machine check exceptions disabled
622 + * 5 1 == physical address extension enabled
623 + * 4 0 == page size extensions disabled
624 + * 3 0 == Debug extensions disabled
625 + * 2 0 == Time stamp disable (disabled)
626 + * 1 0 == Protected mode virtual interrupts disabled
627 + * 0 0 == VME disabled
630 + movq $((1<<5)), %rax
636 + /* Switch to the identity mapped page tables,
637 + * and flush the TLB.
641 + /* Do the copies */
642 + movq %rdi, %rbx /* Put the indirection page in %rbx */
646 +0: /* top, read another word for the indirection page */
650 + testq $0x1, %rcx /* is it a destination page? */
653 + andq $0xfffffffffffff000, %rdi
656 + testq $0x2, %rcx /* is it an indirection page? */
659 + andq $0xfffffffffffff000, %rbx
662 + testq $0x4, %rcx /* is it the done indicator? */
666 + testq $0x8, %rcx /* is it the source indicator? */
667 + jz 0b /* Ignore it otherwise */
668 + movq %rcx, %rsi /* For ever source page do a copy */
669 + andq $0xfffffffffffff000, %rsi
676 + /* To be certain of avoiding problems with self-modifying code
677 + * I need to execute a serializing instruction here.
678 + * So I flush the TLB by reloading %cr3 here, it's handy,
679 + * and not processor dependent.
684 + /* set all of the registers to known values */
685 + /* leave %rsp alone */
704 +relocate_new_kernel_end:
706 + .globl relocate_new_kernel_size
707 +relocate_new_kernel_size:
708 + .quad relocate_new_kernel_end - relocate_new_kernel
709 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/apicdef.h
710 ===================================================================
711 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/apicdef.h 2004-11-11 10:28:46.000000000 -0500
712 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/apicdef.h 2004-11-18 23:26:05.000000000 -0500
714 #define SET_APIC_LOGICAL_ID(x) (((x)<<24))
715 #define APIC_ALL_CPUS 0xFFu
716 #define APIC_DFR 0xE0
717 -#define APIC_DFR_CLUSTER 0x0FFFFFFFu
718 -#define APIC_DFR_FLAT 0xFFFFFFFFu
719 +#define APIC_DFR_CLUSTER 0x0FFFFFFFul
720 +#define APIC_DFR_FLAT 0xFFFFFFFFul
721 #define APIC_SPIV 0xF0
722 #define APIC_SPIV_FOCUS_DISABLED (1<<9)
723 #define APIC_SPIV_APIC_ENABLED (1<<8)
725 #define APIC_LVT_REMOTE_IRR (1<<14)
726 #define APIC_INPUT_POLARITY (1<<13)
727 #define APIC_SEND_PENDING (1<<12)
728 +#define APIC_MODE_MASK 0x700
729 #define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7)
730 #define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8))
731 #define APIC_MODE_FIXED 0x0
732 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/kexec.h
733 ===================================================================
734 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/kexec.h 1969-12-31 19:00:00.000000000 -0500
735 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/kexec.h 2004-11-18 23:25:15.000000000 -0500
737 +#ifndef _X86_64_KEXEC_H
738 +#define _X86_64_KEXEC_H
740 +#include <asm/page.h>
741 +#include <asm/proto.h>
744 + * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
745 + * I.e. Maximum page that is mapped directly into kernel memory,
746 + * and kmap is not required.
748 + * So far x86_64 is limited to 40 physical address bits.
751 +/* Maximum physical address we can use pages from */
752 +#define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL)
753 +/* Maximum address we can reach in physical address mode */
754 +#define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL)
755 +/* Maximum address we can use for the control pages */
756 +#define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL)
758 +/* Allocate one page for the pdp and the second for the code */
759 +#define KEXEC_CONTROL_CODE_SIZE (4096UL + 4096UL)
761 +#endif /* _X86_64_KEXEC_H */
762 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/unistd.h
763 ===================================================================
764 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/unistd.h 2004-11-11 10:28:49.000000000 -0500
765 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/unistd.h 2004-11-18 23:27:18.000000000 -0500
767 #define __NR_mq_getsetattr 245
768 __SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
770 -#define __NR_syscall_max __NR_mq_getsetattr
771 +#define __NR_mq_open 240
772 +__SYSCALL(__NR_mq_open, sys_ni_syscall)
773 +#define __NR_mq_unlink 241
774 +__SYSCALL(__NR_mq_unlink, sys_ni_syscall)
775 +#define __NR_mq_timedsend 242
776 +__SYSCALL(__NR_mq_timedsend, sys_ni_syscall)
777 +#define __NR_mq_timedreceive 243
778 +__SYSCALL(__NR_mq_timedreceive, sys_ni_syscall)
779 +#define __NR_mq_notify 244
780 +__SYSCALL(__NR_mq_notify, sys_ni_syscall)
781 +#define __NR_mq_getsetattr 245
782 +__SYSCALL(__NR_mq_getsetattr, sys_ni_syscall)
783 +#define __NR_kexec_load 246
784 +__SYSCALL(__NR_kexec_load, sys_kexec_load)
786 +#define __NR_syscall_max __NR_kexec_load
789 /* user-visible error numbers are in the range -1 - -4095 */
790 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/kexec.h
791 ===================================================================
792 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/linux/kexec.h 1969-12-31 19:00:00.000000000 -0500
793 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/kexec.h 2004-11-18 23:25:15.000000000 -0500
795 +#ifndef LINUX_KEXEC_H
796 +#define LINUX_KEXEC_H
799 +#include <linux/types.h>
800 +#include <linux/list.h>
801 +#include <asm/kexec.h>
804 + * This structure is used to hold the arguments that are used when loading
808 +typedef unsigned long kimage_entry_t;
809 +#define IND_DESTINATION 0x1
810 +#define IND_INDIRECTION 0x2
811 +#define IND_DONE 0x4
812 +#define IND_SOURCE 0x8
814 +#define KEXEC_SEGMENT_MAX 8
815 +struct kexec_segment {
823 + kimage_entry_t head;
824 + kimage_entry_t *entry;
825 + kimage_entry_t *last_entry;
827 + unsigned long destination;
829 + unsigned long start;
830 + struct page *control_code_page;
832 + unsigned long nr_segments;
833 + struct kexec_segment segment[KEXEC_SEGMENT_MAX];
835 + struct list_head control_pages;
836 + struct list_head dest_pages;
837 + struct list_head unuseable_pages;
841 +/* kexec interface functions */
842 +extern void machine_kexec(struct kimage *image);
843 +extern int machine_kexec_prepare(struct kimage *image);
844 +extern void machine_kexec_cleanup(struct kimage *image);
845 +extern asmlinkage long sys_kexec(unsigned long entry, long nr_segments,
846 + struct kexec_segment *segments);
847 +extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order);
848 +extern struct kimage *kexec_image;
850 +#endif /* LINUX_KEXEC_H */
851 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/reboot.h
852 ===================================================================
853 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/linux/reboot.h 2004-04-03 22:38:27.000000000 -0500
854 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/reboot.h 2004-11-18 23:25:15.000000000 -0500
856 * POWER_OFF Stop OS and remove all power from system, if possible.
857 * RESTART2 Restart system using given command string.
858 * SW_SUSPEND Suspend system using software suspend if compiled in.
859 + * KEXEC Restart system using a previously loaded Linux kernel
862 #define LINUX_REBOOT_CMD_RESTART 0x01234567
864 #define LINUX_REBOOT_CMD_POWER_OFF 0x4321FEDC
865 #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4
866 #define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2
867 +#define LINUX_REBOOT_CMD_KEXEC 0x45584543
872 extern void machine_halt(void);
873 extern void machine_power_off(void);
875 +extern void machine_shutdown(void);
879 #endif /* _LINUX_REBOOT_H */
880 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/syscalls.h
881 ===================================================================
882 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/linux/syscalls.h 2004-11-11 10:28:49.000000000 -0500
883 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/syscalls.h 2004-11-18 23:25:15.000000000 -0500
888 +struct kexec_segment;
890 struct linux_dirent64;
893 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd,
895 asmlinkage long sys_restart_syscall(void);
896 +asmlinkage long sys_kexec_load(void *entry, unsigned long nr_segments,
897 + struct kexec_segment *segments, unsigned long flags);
899 asmlinkage long sys_exit(int error_code);
900 asmlinkage void sys_exit_group(int error_code);
901 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/Makefile
902 ===================================================================
903 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/kernel/Makefile 2004-11-11 10:28:43.000000000 -0500
904 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/Makefile 2004-11-18 23:25:15.000000000 -0500
906 obj-$(CONFIG_KALLSYMS) += kallsyms.o
907 obj-$(CONFIG_PM) += power/
908 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
909 +obj-$(CONFIG_KEXEC) += kexec.o
910 obj-$(CONFIG_COMPAT) += compat.o
911 obj-$(CONFIG_PAGG) += pagg.o
912 obj-$(CONFIG_IKCONFIG) += configs.o
913 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/kexec.c
914 ===================================================================
915 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/kernel/kexec.c 1969-12-31 19:00:00.000000000 -0500
916 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/kexec.c 2004-11-18 23:25:15.000000000 -0500
919 + * kexec.c - kexec system call
920 + * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
922 + * This source code is licensed under the GNU General Public License,
923 + * Version 2. See the file COPYING for more details.
926 +#include <linux/mm.h>
927 +#include <linux/file.h>
928 +#include <linux/slab.h>
929 +#include <linux/fs.h>
930 +#include <linux/kexec.h>
931 +#include <linux/spinlock.h>
932 +#include <linux/list.h>
933 +#include <linux/highmem.h>
934 +#include <net/checksum.h>
935 +#include <asm/page.h>
936 +#include <asm/uaccess.h>
938 +#include <asm/system.h>
941 + * When kexec transitions to the new kernel there is a one-to-one
942 + * mapping between physical and virtual addresses. On processors
943 + * where you can disable the MMU this is trivial, and easy. For
944 + * others it is still a simple predictable page table to setup.
946 + * In that environment kexec copies the new kernel to its final
947 + * resting place. This means I can only support memory whose
948 + * physical address can fit in an unsigned long. In particular
949 + * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
950 + * If the assembly stub has more restrictive requirements
951 + * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
952 + * defined more restrictively in <asm/kexec.h>.
954 + * The code for the transition from the current kernel to the
955 + * the new kernel is placed in the control_code_buffer, whose size
956 + * is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single
957 + * page of memory is necessary, but some architectures require more.
958 + * Because this memory must be identity mapped in the transition from
959 + * virtual to physical addresses it must live in the range
960 + * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
963 + * The assembly stub in the control code buffer is passed a linked list
964 + * of descriptor pages detailing the source pages of the new kernel,
965 + * and the destination addresses of those source pages. As this data
966 + * structure is not used in the context of the current OS, it must
967 + * be self-contained.
969 + * The code has been made to work with highmem pages and will use a
970 + * destination page in its final resting place (if it happens
971 + * to allocate it). The end product of this is that most of the
972 + * physical address space, and most of RAM can be used.
974 + * Future directions include:
975 + * - allocating a page table with the control code buffer identity
976 + * mapped, to simplify machine_kexec and make kexec_on_panic more
981 + * KIMAGE_NO_DEST is an impossible destination address..., for
982 + * allocating pages whose destination address we do not care about.
984 +#define KIMAGE_NO_DEST (-1UL)
986 +static int kimage_is_destination_range(
987 + struct kimage *image, unsigned long start, unsigned long end);
988 +static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long dest);
991 +static int kimage_alloc(struct kimage **rimage,
992 + unsigned long nr_segments, struct kexec_segment *segments)
995 + struct kimage *image;
996 + size_t segment_bytes;
999 + /* Allocate a controlling structure */
1001 + image = kmalloc(sizeof(*image), GFP_KERNEL);
1005 + memset(image, 0, sizeof(*image));
1007 + image->entry = &image->head;
1008 + image->last_entry = &image->head;
1010 + /* Initialize the list of control pages */
1011 + INIT_LIST_HEAD(&image->control_pages);
1013 + /* Initialize the list of destination pages */
1014 + INIT_LIST_HEAD(&image->dest_pages);
1016 + /* Initialize the list of unuseable pages */
1017 + INIT_LIST_HEAD(&image->unuseable_pages);
1019 + /* Read in the segments */
1020 + image->nr_segments = nr_segments;
1021 + segment_bytes = nr_segments * sizeof*segments;
1022 + result = copy_from_user(image->segment, segments, segment_bytes);
1027 + * Verify we have good destination addresses. The caller is
1028 + * responsible for making certain we don't attempt to load
1029 + * the new image into invalid or reserved areas of RAM. This
1030 + * just verifies it is an address we can use.
1032 + result = -EADDRNOTAVAIL;
1033 + for (i = 0; i < nr_segments; i++) {
1034 + unsigned long mend;
1035 + mend = ((unsigned long)(image->segment[i].mem)) +
1036 + image->segment[i].memsz;
1037 + if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
1042 + * Find a location for the control code buffer, and add it
1043 + * the vector of segments so that it's pages will also be
1044 + * counted as destination pages.
1047 + image->control_code_page = kimage_alloc_control_pages(image,
1048 + get_order(KEXEC_CONTROL_CODE_SIZE));
1049 + if (!image->control_code_page) {
1050 + printk(KERN_ERR "Could not allocate control_code_buffer\n");
1056 + if (result == 0) {
1064 +static int kimage_is_destination_range(
1065 + struct kimage *image, unsigned long start, unsigned long end)
1069 + for (i = 0; i < image->nr_segments; i++) {
1070 + unsigned long mstart, mend;
1071 + mstart = (unsigned long)image->segment[i].mem;
1072 + mend = mstart + image->segment[i].memsz;
1073 + if ((end > mstart) && (start < mend)) {
1080 +static struct page *kimage_alloc_pages(unsigned int gfp_mask, unsigned int order)
1082 + struct page *pages;
1083 + pages = alloc_pages(gfp_mask, order);
1085 + unsigned int count, i;
1086 + pages->mapping = NULL;
1087 + pages->private = order;
1088 + count = 1 << order;
1089 + for(i = 0; i < count; i++) {
1090 + SetPageReserved(pages + i);
1096 +static void kimage_free_pages(struct page *page)
1098 + unsigned int order, count, i;
1099 + order = page->private;
1100 + count = 1 << order;
1101 + for(i = 0; i < count; i++) {
1102 + ClearPageReserved(page + i);
1104 + __free_pages(page, order);
1107 +static void kimage_free_page_list(struct list_head *list)
1109 + struct list_head *pos, *next;
1110 + list_for_each_safe(pos, next, list) {
1111 + struct page *page;
1113 + page = list_entry(pos, struct page, lru);
1114 + list_del(&page->lru);
1116 + kimage_free_pages(page);
1120 +struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order)
1122 + /* Control pages are special, they are the intermediaries
1123 + * that are needed while we copy the rest of the pages
1124 + * to their final resting place. As such they must
1125 + * not conflict with either the destination addresses
1126 + * or memory the kernel is already using.
1128 + * The only case where we really need more than one of
1129 + * these are for architectures where we cannot disable
1130 + * the MMU and must instead generate an identity mapped
1131 + * page table for all of the memory.
1133 + * At worst this runs in O(N) of the image size.
1135 + struct list_head extra_pages;
1136 + struct page *pages;
1137 + unsigned int count;
1139 + count = 1 << order;
1140 + INIT_LIST_HEAD(&extra_pages);
1142 + /* Loop while I can allocate a page and the page allocated
1143 + * is a destination page.
1146 + unsigned long pfn, epfn, addr, eaddr;
1147 + pages = kimage_alloc_pages(GFP_KERNEL, order);
1150 + pfn = page_to_pfn(pages);
1151 + epfn = pfn + count;
1152 + addr = pfn << PAGE_SHIFT;
1153 + eaddr = epfn << PAGE_SHIFT;
1154 + if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
1155 + kimage_is_destination_range(image, addr, eaddr))
1157 + list_add(&pages->lru, &extra_pages);
1162 + /* Remember the allocated page... */
1163 + list_add(&pages->lru, &image->control_pages);
1165 + /* Because the page is already in it's destination
1166 + * location we will never allocate another page at
1167 + * that address. Therefore kimage_alloc_pages
1168 + * will not return it (again) and we don't need
1169 + * to give it an entry in image->segment[].
1172 + /* Deal with the destination pages I have inadvertently allocated.
1174 + * Ideally I would convert multi-page allocations into single
1175 + * page allocations, and add everyting to image->dest_pages.
1177 + * For now it is simpler to just free the pages.
1179 + kimage_free_page_list(&extra_pages);
1184 +static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
1186 + if (*image->entry != 0) {
1189 + if (image->entry == image->last_entry) {
1190 + kimage_entry_t *ind_page;
1191 + struct page *page;
1192 + page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
1196 + ind_page = page_address(page);
1197 + *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
1198 + image->entry = ind_page;
1199 + image->last_entry =
1200 + ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
1202 + *image->entry = entry;
1204 + *image->entry = 0;
1208 +static int kimage_set_destination(
1209 + struct kimage *image, unsigned long destination)
1213 + destination &= PAGE_MASK;
1214 + result = kimage_add_entry(image, destination | IND_DESTINATION);
1215 + if (result == 0) {
1216 + image->destination = destination;
1222 +static int kimage_add_page(struct kimage *image, unsigned long page)
1226 + page &= PAGE_MASK;
1227 + result = kimage_add_entry(image, page | IND_SOURCE);
1228 + if (result == 0) {
1229 + image->destination += PAGE_SIZE;
1235 +static void kimage_free_extra_pages(struct kimage *image)
1237 + /* Walk through and free any extra destination pages I may have */
1238 + kimage_free_page_list(&image->dest_pages);
1240 + /* Walk through and free any unuseable pages I have cached */
1241 + kimage_free_page_list(&image->unuseable_pages);
1244 +static int kimage_terminate(struct kimage *image)
1248 + result = kimage_add_entry(image, IND_DONE);
1249 + if (result == 0) {
1250 + /* Point at the terminating element */
1252 + kimage_free_extra_pages(image);
1257 +#define for_each_kimage_entry(image, ptr, entry) \
1258 + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
1259 + ptr = (entry & IND_INDIRECTION)? \
1260 + phys_to_virt((entry & PAGE_MASK)): ptr +1)
1262 +static void kimage_free_entry(kimage_entry_t entry)
1264 + struct page *page;
1266 + page = pfn_to_page(entry >> PAGE_SHIFT);
1267 + kimage_free_pages(page);
1270 +static void kimage_free(struct kimage *image)
1272 + kimage_entry_t *ptr, entry;
1273 + kimage_entry_t ind = 0;
1277 + kimage_free_extra_pages(image);
1278 + for_each_kimage_entry(image, ptr, entry) {
1279 + if (entry & IND_INDIRECTION) {
1280 + /* Free the previous indirection page */
1281 + if (ind & IND_INDIRECTION) {
1282 + kimage_free_entry(ind);
1284 + /* Save this indirection page until we are
1289 + else if (entry & IND_SOURCE) {
1290 + kimage_free_entry(entry);
1293 + /* Free the final indirection page */
1294 + if (ind & IND_INDIRECTION) {
1295 + kimage_free_entry(ind);
1298 + /* Handle any machine specific cleanup */
1299 + machine_kexec_cleanup(image);
1301 + /* Free the kexec control pages... */
1302 + kimage_free_page_list(&image->control_pages);
1306 +static kimage_entry_t *kimage_dst_used(struct kimage *image, unsigned long page)
1308 + kimage_entry_t *ptr, entry;
1309 + unsigned long destination = 0;
1311 + for_each_kimage_entry(image, ptr, entry) {
1312 + if (entry & IND_DESTINATION) {
1313 + destination = entry & PAGE_MASK;
1315 + else if (entry & IND_SOURCE) {
1316 + if (page == destination) {
1319 + destination += PAGE_SIZE;
1325 +static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long destination)
1328 + * Here we implement safeguards to ensure that a source page
1329 + * is not copied to its destination page before the data on
1330 + * the destination page is no longer useful.
1332 + * To do this we maintain the invariant that a source page is
1333 + * either its own destination page, or it is not a
1334 + * destination page at all.
1336 + * That is slightly stronger than required, but the proof
1337 + * that no problems will not occur is trivial, and the
1338 + * implementation is simply to verify.
1340 + * When allocating all pages normally this algorithm will run
1341 + * in O(N) time, but in the worst case it will run in O(N^2)
1342 + * time. If the runtime is a problem the data structures can
1345 + struct page *page;
1346 + unsigned long addr;
1349 + * Walk through the list of destination pages, and see if I
1352 + list_for_each_entry(page, &image->dest_pages, lru) {
1353 + addr = page_to_pfn(page) << PAGE_SHIFT;
1354 + if (addr == destination) {
1355 + list_del(&page->lru);
1361 + kimage_entry_t *old;
1363 + /* Allocate a page, if we run out of memory give up */
1364 + page = kimage_alloc_pages(gfp_mask, 0);
1368 + /* If the page cannot be used file it away */
1369 + if (page_to_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
1370 + list_add(&page->lru, &image->unuseable_pages);
1373 + addr = page_to_pfn(page) << PAGE_SHIFT;
1375 + /* If it is the destination page we want use it */
1376 + if (addr == destination)
1379 + /* If the page is not a destination page use it */
1380 + if (!kimage_is_destination_range(image, addr, addr + PAGE_SIZE))
1384 + * I know that the page is someones destination page.
1385 + * See if there is already a source page for this
1386 + * destination page. And if so swap the source pages.
1388 + old = kimage_dst_used(image, addr);
1390 + /* If so move it */
1391 + unsigned long old_addr;
1392 + struct page *old_page;
1394 + old_addr = *old & PAGE_MASK;
1395 + old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
1396 + copy_highpage(page, old_page);
1397 + *old = addr | (*old & ~PAGE_MASK);
1399 + /* The old page I have found cannot be a
1400 + * destination page, so return it.
1407 + /* Place the page on the destination list I
1408 + * will use it later.
1410 + list_add(&page->lru, &image->dest_pages);
1416 +static int kimage_load_segment(struct kimage *image,
1417 + struct kexec_segment *segment)
1419 + unsigned long mstart;
1421 + unsigned long offset;
1422 + unsigned long offset_end;
1423 + unsigned char *buf;
1426 + buf = segment->buf;
1427 + mstart = (unsigned long)segment->mem;
1429 + offset_end = segment->memsz;
1431 + result = kimage_set_destination(image, mstart);
1435 + for (offset = 0; offset < segment->memsz; offset += PAGE_SIZE) {
1436 + struct page *page;
1438 + size_t size, leader;
1439 + page = kimage_alloc_page(image, GFP_HIGHUSER, mstart + offset);
1444 + result = kimage_add_page(image, page_to_pfn(page) << PAGE_SHIFT);
1449 + if (segment->bufsz < offset) {
1450 + /* We are past the end zero the whole page */
1451 + memset(ptr, 0, PAGE_SIZE);
1457 + if ((offset == 0)) {
1458 + leader = mstart & ~PAGE_MASK;
1461 + /* We are on the first page zero the unused portion */
1462 + memset(ptr, 0, leader);
1466 + if (size > (segment->bufsz - offset)) {
1467 + size = segment->bufsz - offset;
1469 + if (size < (PAGE_SIZE - leader)) {
1470 + /* zero the trailing part of the page */
1471 + memset(ptr + size, 0, (PAGE_SIZE - leader) - size);
1473 + result = copy_from_user(ptr, buf + offset, size);
1476 + result = (result < 0) ? result : -EIO;
1485 + * Exec Kernel system call: for obvious reasons only root may call it.
1487 + * This call breaks up into three pieces.
1488 + * - A generic part which loads the new kernel from the current
1489 + * address space, and very carefully places the data in the
1490 + * allocated pages.
1492 + * - A generic part that interacts with the kernel and tells all of
1493 + * the devices to shut down. Preventing on-going dmas, and placing
1494 + * the devices in a consistent state so a later kernel can
1495 + * reinitialize them.
1497 + * - A machine specific part that includes the syscall number
1498 + * and the copies the image to it's final destination. And
1499 + * jumps into the image at entry.
1501 + * kexec does not sync, or unmount filesystems so if you need
1502 + * that to happen you need to do that yourself.
1504 +struct kimage *kexec_image = NULL;
1506 +asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
1507 + struct kexec_segment *segments, unsigned long flags)
1509 + struct kimage *image;
1512 + /* We only trust the superuser with rebooting the system. */
1513 + if (!capable(CAP_SYS_BOOT))
1517 + * In case we need just a little bit of special behavior for
1518 + * reboot on panic.
1523 + if (nr_segments > KEXEC_SEGMENT_MAX)
1529 + if (nr_segments > 0) {
1531 + result = kimage_alloc(&image, nr_segments, segments);
1535 + result = machine_kexec_prepare(image);
1539 + image->start = entry;
1540 + for (i = 0; i < nr_segments; i++) {
1541 + result = kimage_load_segment(image, &image->segment[i]);
1546 + result = kimage_terminate(image);
1552 + image = xchg(&kexec_image, image);
1555 + kimage_free(image);
1558 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/sys.c
1559 ===================================================================
1560 --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/kernel/sys.c 2004-11-11 10:28:49.000000000 -0500
1561 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/sys.c 2004-11-18 23:25:15.000000000 -0500
1563 #include <linux/init.h>
1564 #include <linux/highuid.h>
1565 #include <linux/fs.h>
1566 +#include <linux/kernel.h>
1567 +#include <linux/kexec.h>
1568 #include <linux/workqueue.h>
1569 #include <linux/device.h>
1570 #include <linux/times.h>
1572 cond_syscall(sys_lookup_dcookie)
1573 cond_syscall(sys_swapon)
1574 cond_syscall(sys_swapoff)
1575 +cond_syscall(sys_kexec_load)
1576 cond_syscall(sys_init_module)
1577 cond_syscall(sys_delete_module)
1578 cond_syscall(sys_socketpair)
1579 @@ -505,6 +508,24 @@
1580 machine_restart(buffer);
1583 +#ifdef CONFIG_KEXEC
1584 + case LINUX_REBOOT_CMD_KEXEC:
1586 + struct kimage *image;
1587 + image = xchg(&kexec_image, 0);
1592 + notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
1593 + system_state = SYSTEM_BOOTING;
1594 + device_shutdown();
1595 + printk(KERN_EMERG "Starting new kernel\n");
1596 + machine_shutdown();
1597 + machine_kexec(image);
1601 #ifdef CONFIG_SOFTWARE_SUSPEND
1602 case LINUX_REBOOT_CMD_SW_SUSPEND: