From 06777424a97a75bb59cb20239fcfaa8f20de0bee Mon Sep 17 00:00:00 2001 From: wangdi Date: Thu, 9 Oct 2003 14:51:34 +0000 Subject: [PATCH] add 2.6.0-test6-mm4.patch --- .../kernel_patches/patches/2.6.0-test6-mm4.patch | 113290 ++++++++++++++++++ 1 file changed, 113290 insertions(+) create mode 100644 lustre/kernel_patches/patches/2.6.0-test6-mm4.patch diff --git a/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch b/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch new file mode 100644 index 0000000..9f8569d --- /dev/null +++ b/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch @@ -0,0 +1,113290 @@ +--- linux-2.6.0-test6/arch/alpha/Kconfig 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/alpha/Kconfig 2003-10-05 00:36:11.000000000 -0700 +@@ -471,21 +471,6 @@ config EISA + bool + depends on ALPHA_GENERIC || ALPHA_JENSEN || ALPHA_ALCOR || ALPHA_MIKASA || ALPHA_SABLE || ALPHA_LYNX || ALPHA_NORITAKE || ALPHA_RAWHIDE + default y +- ---help--- +- The Extended Industry Standard Architecture (EISA) bus was +- developed as an open alternative to the IBM MicroChannel bus. +- +- The EISA bus provided some of the features of the IBM MicroChannel +- bus while maintaining backward compatibility with cards made for +- the older ISA bus. The EISA bus saw limited use between 1988 and +- 1995 when it was made obsolete by the PCI bus. +- +- Say Y here if you are building a kernel for an EISA-based machine. +- +- Otherwise, say N. +- +-config EISA_ALWAYS +- def_bool EISA + + config SMP + bool "Symmetric multi-processing support" +--- linux-2.6.0-test6/arch/alpha/kernel/core_irongate.c 2003-06-14 12:18:25.000000000 -0700 ++++ 25/arch/alpha/kernel/core_irongate.c 2003-10-05 00:33:23.000000000 -0700 +@@ -391,7 +391,7 @@ irongate_ioremap(unsigned long addr, uns + cur_gatt = phys_to_virt(GET_GATT(baddr) & ~1); + pte = cur_gatt[GET_GATT_OFF(baddr)] & ~1; + +- if (__alpha_remap_area_pages(VMALLOC_VMADDR(vaddr), ++ if (__alpha_remap_area_pages(vaddr, + pte, PAGE_SIZE, 0)) { + printk("AGP ioremap: FAILED to map...\n"); + vfree(area->addr); +--- linux-2.6.0-test6/arch/alpha/kernel/core_marvel.c 2003-08-22 19:23:39.000000000 -0700 ++++ 25/arch/alpha/kernel/core_marvel.c 2003-10-05 00:33:23.000000000 -0700 +@@ -696,7 +696,7 @@ marvel_ioremap(unsigned long addr, unsig + } + pfn >>= 1; /* make it a true pfn */ + +- if (__alpha_remap_area_pages(VMALLOC_VMADDR(vaddr), ++ if (__alpha_remap_area_pages(vaddr, + pfn << PAGE_SHIFT, + PAGE_SIZE, 0)) { + printk("FAILED to map...\n"); +--- linux-2.6.0-test6/arch/alpha/kernel/core_titan.c 2003-09-08 13:58:55.000000000 -0700 ++++ 25/arch/alpha/kernel/core_titan.c 2003-10-05 00:33:23.000000000 -0700 +@@ -534,7 +534,7 @@ titan_ioremap(unsigned long addr, unsign + } + pfn >>= 1; /* make it a true pfn */ + +- if (__alpha_remap_area_pages(VMALLOC_VMADDR(vaddr), ++ if (__alpha_remap_area_pages(vaddr, + pfn << PAGE_SHIFT, + PAGE_SIZE, 0)) { + printk("FAILED to map...\n"); +--- linux-2.6.0-test6/arch/alpha/kernel/setup.c 2003-08-22 19:23:39.000000000 -0700 ++++ 25/arch/alpha/kernel/setup.c 2003-10-05 00:36:11.000000000 -0700 +@@ -33,6 +33,7 @@ + #include + #include + #include ++#include + #ifdef CONFIG_MAGIC_SYSRQ + #include + #include +@@ -680,6 +681,11 @@ setup_arch(char **cmdline_p) + /* Default root filesystem to sda2. */ + ROOT_DEV = Root_SDA2; + ++#ifdef CONFIG_EISA ++ /* FIXME: only set this when we actually have EISA in this box? */ ++ EISA_bus = 1; ++#endif ++ + /* + * Check ASN in HWRPB for validity, report if bad. + * FIXME: how was this failing? Should we trust it instead, +@@ -1203,7 +1209,7 @@ show_cpuinfo(struct seq_file *f, void *s + platform_string(), nr_processors); + + #ifdef CONFIG_SMP +- seq_printf(f, "cpus active\t\t: %d\n" ++ seq_printf(f, "cpus active\t\t: %ld\n" + "cpu active mask\t\t: %016lx\n", + num_online_cpus(), cpu_present_mask); + #endif +--- linux-2.6.0-test6/arch/alpha/kernel/smp.c 2003-08-22 19:23:40.000000000 -0700 ++++ 25/arch/alpha/kernel/smp.c 2003-10-05 00:33:23.000000000 -0700 +@@ -597,7 +597,7 @@ smp_cpus_done(unsigned int max_cpus) + if (cpu_online(cpu)) + bogosum += cpu_data[cpu].loops_per_jiffy; + +- printk(KERN_INFO "SMP: Total of %d processors activated " ++ printk(KERN_INFO "SMP: Total of %ld processors activated " + "(%lu.%02lu BogoMIPS).\n", + num_online_cpus(), + (bogosum + 2500) / (500000/HZ), +--- linux-2.6.0-test6/arch/alpha/kernel/time.c 2003-08-08 22:55:10.000000000 -0700 ++++ 25/arch/alpha/kernel/time.c 2003-10-05 00:33:23.000000000 -0700 +@@ -89,6 +89,16 @@ static inline __u32 rpcc(void) + return result; + } + ++/* ++ * Scheduler clock - returns current time in nanosec units. ++ * ++ * Copied from ARM code for expediency... ;-} ++ */ ++unsigned long long sched_clock(void) ++{ ++ return (unsigned long long)jiffies * (1000000000 / HZ); ++} ++ + + /* + * timer_interrupt() needs to keep up the real-time clock, +@@ -239,8 +249,9 @@ validate_cc_value(unsigned long cc) + * arch/i386/time.c. + */ + +-#define CALIBRATE_LATCH (52 * LATCH) +-#define CALIBRATE_TIME (52 * 1000020 / HZ) ++#define PIC_TICK_RATE 1193180UL ++#define CALIBRATE_LATCH 0xffff ++#define TIMEOUT_COUNT 0x100000 + + static unsigned long __init + calibrate_cc_with_pic(void) +@@ -263,19 +274,15 @@ calibrate_cc_with_pic(void) + + cc = rpcc(); + do { +- count+=100; /* by 1 takes too long to timeout from 0 */ +- } while ((inb(0x61) & 0x20) == 0 && count > 0); ++ count++; ++ } while ((inb(0x61) & 0x20) == 0 && count < TIMEOUT_COUNT); + cc = rpcc() - cc; + + /* Error: ECTCNEVERSET or ECPUTOOFAST. */ +- if (count <= 100) +- return 0; +- +- /* Error: ECPUTOOSLOW. */ +- if (cc <= CALIBRATE_TIME) ++ if (count <= 1 || count == TIMEOUT_COUNT) + return 0; + +- return (cc * 1000000UL) / CALIBRATE_TIME; ++ return ((long)cc * PIC_TICK_RATE) / (CALIBRATE_LATCH + 1); + } + + /* The Linux interpretation of the CMOS clock register contents: +--- linux-2.6.0-test6/arch/alpha/mm/init.c 2003-07-27 12:14:38.000000000 -0700 ++++ 25/arch/alpha/mm/init.c 2003-10-05 00:33:23.000000000 -0700 +@@ -210,7 +210,8 @@ callback_init(void * kernel_end) + /* Allocate one PGD and one PMD. In the case of SRM, we'll need + these to actually remap the console. There is an assumption + here that only one of each is needed, and this allows for 8MB. +- Currently (late 1999), big consoles are still under 4MB. ++ On systems with larger consoles, additional pages will be ++ allocated as needed during the mapping process. + + In the case of not SRM, but not CONFIG_ALPHA_LARGE_VMALLOC, + we need to allocate the PGD we use for vmalloc before we start +@@ -237,6 +238,15 @@ callback_init(void * kernel_end) + unsigned long pfn = crb->map[i].pa >> PAGE_SHIFT; + crb->map[i].va = vaddr; + for (j = 0; j < crb->map[i].count; ++j) { ++ /* Newer console's (especially on larger ++ systems) may require more pages of ++ PTEs. Grab additional pages as needed. */ ++ if (pmd != pmd_offset(pgd, vaddr)) { ++ memset(kernel_end, 0, PAGE_SIZE); ++ pmd = pmd_offset(pgd, vaddr); ++ pmd_set(pmd, (pte_t *)kernel_end); ++ kernel_end += PAGE_SIZE; ++ } + set_pte(pte_offset_kernel(pmd, vaddr), + pfn_pte(pfn, PAGE_KERNEL)); + pfn++; +--- linux-2.6.0-test6/arch/arm/boot/compressed/head.S 2003-06-14 12:18:33.000000000 -0700 ++++ 25/arch/arm/boot/compressed/head.S 2003-10-05 00:33:23.000000000 -0700 +@@ -477,6 +477,12 @@ proc_types: + @ b __arm6_cache_off + @ b __armv3_cache_flush + ++ .word 0x00000000 @ old ARM ID ++ .word 0x0000f000 ++ mov pc, lr ++ mov pc, lr ++ mov pc, lr ++ + .word 0x41007000 @ ARM7/710 + .word 0xfff8fe00 + b __arm7_cache_off +@@ -489,6 +495,14 @@ proc_types: + b __armv4_cache_off + mov pc, lr + ++ .word 0x00007000 @ ARM7 IDs ++ .word 0x0000f000 ++ mov pc, lr ++ mov pc, lr ++ mov pc, lr ++ ++ @ Everything from here on will be the new ID system. ++ + .word 0x41129200 @ ARM920T + .word 0xff00fff0 + b __armv4_cache_on +@@ -507,8 +521,16 @@ proc_types: + b __armv4_cache_off + b __armv4_cache_flush + +- .word 0x69050000 @ xscale +- .word 0xffff0000 ++ @ These match on the architecture ID ++ ++ .word 0x00050000 @ ARMv5TE ++ .word 0x000f0000 ++ b __armv4_cache_on ++ b __armv4_cache_off ++ b __armv4_cache_flush ++ ++ .word 0x00060000 @ ARMv5TEJ ++ .word 0x000f0000 + b __armv4_cache_on + b __armv4_cache_off + b __armv4_cache_flush +--- linux-2.6.0-test6/arch/arm/Kconfig 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/arm/Kconfig 2003-10-05 00:33:23.000000000 -0700 +@@ -239,7 +239,7 @@ config DISCONTIGMEM + + # Now handle the bus types + config PCI +- bool "PCI support" if ARCH_INTEGRATOR ++ bool "PCI support" if ARCH_INTEGRATOR_AP + default y if ARCH_FTVPCI || ARCH_SHARK || FOOTBRIDGE_HOST || ARCH_IOP3XX + help + Find out whether you have a PCI motherboard. PCI is the name of a +@@ -645,8 +645,6 @@ source "drivers/misc/Kconfig" + + source "drivers/usb/Kconfig" + +-source "net/bluetooth/Kconfig" +- + + menu "Kernel hacking" + +--- linux-2.6.0-test6/arch/arm/kernel/apm.c 2003-09-08 13:58:55.000000000 -0700 ++++ 25/arch/arm/kernel/apm.c 2003-10-05 00:33:23.000000000 -0700 +@@ -179,13 +179,10 @@ static void queue_event(apm_event_t even + wake_up_interruptible(&apm_waitqueue); + } + +-/* defined in pm.c */ +-extern int suspend(void); +- + static int apm_suspend(void) + { + struct list_head *l; +- int err = suspend(); ++ int err = pm_suspend(PM_SUSPEND_MEM); + + /* + * Anyone on the APM queues will think we're still suspended. +--- linux-2.6.0-test6/arch/arm/kernel/entry-armv.S 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/arm/kernel/entry-armv.S 2003-10-05 00:33:23.000000000 -0700 +@@ -439,20 +439,25 @@ ENTRY(soft_irq_mask) + + .macro get_irqnr_and_base, irqnr, irqstat, base, tmp + /* FIXME: should not be using soo many LDRs here */ +- ldr \irqnr, =IO_ADDRESS(INTEGRATOR_IC_BASE) +- ldr \irqstat, [\irqnr, #IRQ_STATUS] @ get masked status +- ldr \irqnr, =IO_ADDRESS(INTEGRATOR_HDR_BASE) +- ldr \irqnr, [\irqnr, #(INTEGRATOR_HDR_IC_OFFSET+IRQ_STATUS)] +- orr \irqstat, \irqstat, \irqnr, lsl #INTEGRATOR_CM_INT0 ++ ldr \base, =IO_ADDRESS(INTEGRATOR_IC_BASE) ++ mov \irqnr, #IRQ_PIC_START ++ ldr \irqstat, [\base, #IRQ_STATUS] @ get masked status ++ ldr \base, =IO_ADDRESS(INTEGRATOR_HDR_BASE) ++ teq \irqstat, #0 ++ ldreq \irqstat, [\base, #(INTEGRATOR_HDR_IC_OFFSET+IRQ_STATUS)] ++ moveq \irqnr, #IRQ_CIC_START + +- mov \irqnr, #0 +-1001: tst \irqstat, #1 ++1001: tst \irqstat, #15 + bne 1002f ++ add \irqnr, \irqnr, #4 ++ movs \irqstat, \irqstat, lsr #4 ++ bne 1001b ++1002: tst \irqstat, #1 ++ bne 1003f + add \irqnr, \irqnr, #1 +- mov \irqstat, \irqstat, lsr #1 +- cmp \irqnr, #22 +- bcc 1001b +-1002: /* EQ will be set if we reach 22 */ ++ movs \irqstat, \irqstat, lsr #1 ++ bne 1002b ++1003: /* EQ will be set if no irqs pending */ + .endm + + .macro irq_prio_table +--- linux-2.6.0-test6/arch/arm/kernel/pm.c 2003-09-08 13:58:55.000000000 -0700 ++++ 25/arch/arm/kernel/pm.c 2003-10-05 00:33:23.000000000 -0700 +@@ -9,68 +9,18 @@ + * sleep. + */ + #include ++#include ++#include + #include +-#include +-#include + #include + #include + +-#include +-#include +- +-/* +- * Tell the linker that pm_do_suspend may not be present. +- */ +-extern int pm_do_suspend(void) __attribute__((weak)); +- +-int suspend(void) +-{ +- int ret; +- +- if (!pm_do_suspend) +- return -ENOSYS; +- +- /* +- * Suspend "legacy" devices. +- */ +- ret = pm_send_all(PM_SUSPEND, (void *)3); +- if (ret != 0) +- goto out; +- +- ret = device_suspend(3); +- if (ret) +- goto resume_legacy; +- +- local_irq_disable(); +- leds_event(led_stop); +- +- sysdev_suspend(3); +- +- ret = pm_do_suspend(); +- +- sysdev_resume(); +- +- leds_event(led_start); +- local_irq_enable(); +- +- device_resume(); +- +- resume_legacy: +- pm_send_all(PM_RESUME, (void *)0); +- +- out: +- return ret; +-} +- + #ifdef CONFIG_SYSCTL + /* + * We really want this to die. It's a disgusting hack using unallocated + * sysctl numbers. We should be using a real interface. + */ + +-#include +-#include +- + static int + pm_sysctl_proc_handler(ctl_table *ctl, int write, struct file *filp, + void *buffer, size_t *lenp) +@@ -79,7 +29,7 @@ pm_sysctl_proc_handler(ctl_table *ctl, i + printk("PM: task %s (pid %d) uses deprecated sysctl PM interface\n", + current->comm, current->pid); + if (write) +- ret = suspend(); ++ ret = pm_suspend(PM_SUSPEND_MEM); + return ret; + } + +--- linux-2.6.0-test6/arch/arm/kernel/process.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/arm/kernel/process.c 2003-10-05 00:33:23.000000000 -0700 +@@ -117,12 +117,10 @@ __setup("reboot=", reboot_setup); + + void machine_halt(void) + { +- leds_event(led_halted); + } + + void machine_power_off(void) + { +- leds_event(led_halted); + if (pm_power_off) + pm_power_off(); + } +--- linux-2.6.0-test6/arch/arm/kernel/setup.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/arm/kernel/setup.c 2003-10-05 00:33:23.000000000 -0700 +@@ -182,7 +182,7 @@ static const char *proc_arch[] = { + "5", + "5T", + "5TE", +- "?(8)", ++ "5TEJ", + "?(9)", + "?(10)", + "?(11)", +--- linux-2.6.0-test6/arch/arm/kernel/signal.c 2003-06-14 12:17:56.000000000 -0700 ++++ 25/arch/arm/kernel/signal.c 2003-10-05 00:33:23.000000000 -0700 +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -539,6 +540,11 @@ static int do_signal(sigset_t *oldset, s + if (!user_mode(regs)) + return 0; + ++ if (current->flags & PF_FREEZE) { ++ refrigerator(0); ++ goto no_signal; ++ } ++ + if (current->ptrace & PT_SINGLESTEP) + ptrace_cancel_bpt(current); + +@@ -550,6 +556,7 @@ static int do_signal(sigset_t *oldset, s + return 1; + } + ++ no_signal: + /* + * No signal to deliver to the process - restart the syscall. + */ +--- linux-2.6.0-test6/arch/arm/kernel/time.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/arm/kernel/time.c 2003-10-05 00:33:23.000000000 -0700 +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -72,8 +73,6 @@ unsigned long (*gettimeoffset)(void) = d + */ + unsigned long long sched_clock(void) + { +- unsigned long long this_offset; +- + return (unsigned long long)jiffies * (1000000000 / HZ); + } + +@@ -137,6 +136,47 @@ static void dummy_leds_event(led_event_t + + void (*leds_event)(led_event_t) = dummy_leds_event; + ++static int leds_suspend(struct sys_device *dev, u32 state) ++{ ++ leds_event(led_stop); ++ return 0; ++} ++ ++static int leds_resume(struct sys_device *dev) ++{ ++ leds_event(led_start); ++ return 0; ++} ++ ++static int leds_shutdown(struct sys_device *dev) ++{ ++ leds_event(led_halted); ++ return 0; ++} ++ ++static struct sysdev_class leds_sysclass = { ++ set_kset_name("leds"), ++ .shutdown = leds_shutdown, ++ .suspend = leds_suspend, ++ .resume = leds_resume, ++}; ++ ++static struct sys_device leds_device = { ++ .id = 0, ++ .cls = &leds_sysclass, ++}; ++ ++static int __init leds_init(void) ++{ ++ int ret; ++ ret = sysdev_class_register(&leds_sysclass); ++ if (ret == 0) ++ ret = sys_device_register(&leds_device); ++ return ret; ++} ++ ++device_initcall(leds_init); ++ + EXPORT_SYMBOL(leds_event); + #endif + +--- linux-2.6.0-test6/arch/arm/kernel/traps.c 2003-08-22 19:23:40.000000000 -0700 ++++ 25/arch/arm/kernel/traps.c 2003-10-05 00:33:23.000000000 -0700 +@@ -212,10 +212,10 @@ NORET_TYPE void die(const char *str, str + printk("CPU: %d\n", smp_processor_id()); + show_regs(regs); + printk("Process %s (pid: %d, stack limit = 0x%p)\n", +- current->comm, current->pid, tsk->thread_info + 1); ++ tsk->comm, tsk->pid, tsk->thread_info + 1); + + if (!user_mode(regs) || in_interrupt()) { +- dump_mem("Stack: ", (unsigned long)(regs + 1), 8192+(unsigned long)tsk->thread_info); ++ dump_mem("Stack: ", regs->ARM_sp, 8192+(unsigned long)tsk->thread_info); + dump_backtrace(regs, tsk); + dump_instr(regs); + } +--- linux-2.6.0-test6/arch/arm/lib/io-readsl-armv4.S 2003-06-14 12:18:30.000000000 -0700 ++++ 25/arch/arm/lib/io-readsl-armv4.S 2003-10-05 00:33:23.000000000 -0700 +@@ -9,7 +9,6 @@ + */ + #include + #include +-#include + + /* + * Note that some reads can be aligned on half-word boundaries. +@@ -31,6 +30,10 @@ ENTRY(__raw_readsl) + blt 4f + bgt 6f + ++#ifndef __ARMEB__ ++ ++ /* little endian code */ ++ + strh ip, [r1], #2 + mov ip, ip, lsr #16 + 3: subs r2, r2, #1 +@@ -68,3 +71,48 @@ ENTRY(__raw_readsl) + strb ip, [r1] + mov pc, lr + ++#else ++ ++ /* big endian code */ ++ ++ ++ mov r3, ip, lsr #16 ++ strh r3, [r1], #2 ++3: mov r3, ip, lsl #16 ++ subs r2, r2, #1 ++ ldrne ip, [r0] ++ orrne r3, r3, ip, lsr #16 ++ strne r3, [r1], #4 ++ bne 3b ++ strh ip, [r1], #2 ++ mov pc, lr ++ ++4: mov r3, ip, lsr #24 ++ strb r3, [r1], #1 ++ mov r3, ip, lsr #8 ++ strh r3, [r1], #2 ++5: mov r3, ip, lsl #24 ++ subs r2, r2, #1 ++ ldrne ip, [r0] ++ orrne r3, r3, ip, lsr #8 ++ strne r3, [r1], #4 ++ bne 5b ++ strb ip, [r1], #1 ++ mov pc, lr ++ ++6: mov r3, ip, lsr #24 ++ strb r3, [r1], #1 ++7: mov r3, ip, lsl #8 ++ subs r2, r2, #1 ++ ldrne ip, [r0] ++ orrne r3, r3, ip, lsr #24 ++ strne r3, [r1], #4 ++ bne 7b ++ mov r3, ip, lsr #8 ++ strh r3, [r1], #2 ++ strb ip, [r1], #1 ++ mov pc, lr ++ ++#endif ++ ++ +--- linux-2.6.0-test6/arch/arm/lib/io-readsw-armv4.S 2003-06-14 12:18:23.000000000 -0700 ++++ 25/arch/arm/lib/io-readsw-armv4.S 2003-10-05 00:33:23.000000000 -0700 +@@ -9,7 +9,14 @@ + */ + #include + #include +-#include ++ ++ .macro pack, rd, hw1, hw2 ++#ifndef __ARMEB__ ++ orr \rd, \hw1, \hw2, lsl #16 ++#else ++ orr \rd, \hw2, \hw1, lsl #16 ++#endif ++ .endm + + .insw_bad_alignment: + adr r0, .insw_bad_align_msg +@@ -41,19 +48,19 @@ ENTRY(__raw_readsw) + + .insw_8_lp: ldrh r3, [r0] + ldrh r4, [r0] +- orr r3, r3, r4, lsl #16 ++ pack r3, r3, r4 + + ldrh r4, [r0] + ldrh r5, [r0] +- orr r4, r4, r5, lsl #16 ++ pack r4, r4, r5 + + ldrh r5, [r0] + ldrh ip, [r0] +- orr r5, r5, ip, lsl #16 ++ pack r5, r5, ip + + ldrh ip, [r0] + ldrh lr, [r0] +- orr ip, ip, lr, lsl #16 ++ pack ip, ip, lr + + stmia r1!, {r3 - r5, ip} + +@@ -68,11 +75,11 @@ ENTRY(__raw_readsw) + + ldrh r3, [r0] + ldrh r4, [r0] +- orr r3, r3, r4, lsl #16 ++ pack r3, r3, r4 + + ldrh r4, [r0] + ldrh ip, [r0] +- orr r4, r4, ip, lsl #16 ++ pack r4, r4, ip + + stmia r1!, {r3, r4} + +@@ -81,7 +88,7 @@ ENTRY(__raw_readsw) + + ldrh r3, [r0] + ldrh ip, [r0] +- orr r3, r3, ip, lsl #16 ++ pack r3, r3, ip + + str r3, [r1], #4 + +--- linux-2.6.0-test6/arch/arm/lib/io-writesw-armv4.S 2003-06-14 12:18:34.000000000 -0700 ++++ 25/arch/arm/lib/io-writesw-armv4.S 2003-10-05 00:33:23.000000000 -0700 +@@ -9,7 +9,18 @@ + */ + #include + #include +-#include ++ ++ .macro outword, rd ++#ifndef __ARMEB__ ++ strh \rd, [r0] ++ mov \rd, \rd, lsr #16 ++ strh \rd, [r0] ++#else ++ mov lr, \rd, lsr #16 ++ strh lr, [r0] ++ strh \rd, [r0] ++#endif ++ .endm + + .outsw_bad_alignment: + adr r0, .outsw_bad_align_msg +@@ -40,20 +51,10 @@ ENTRY(__raw_writesw) + bmi .no_outsw_8 + + .outsw_8_lp: ldmia r1!, {r3, r4, r5, ip} +- +- strh r3, [r0] +- mov r3, r3, lsr #16 +- strh r3, [r0] +- strh r4, [r0] +- mov r4, r4, lsr #16 +- strh r4, [r0] +- strh r5, [r0] +- mov r5, r5, lsr #16 +- strh r5, [r0] +- strh ip, [r0] +- mov ip, ip, lsr #16 +- strh ip, [r0] +- ++ outword r3 ++ outword r4 ++ outword r5 ++ outword ip + subs r2, r2, #8 + bpl .outsw_8_lp + +@@ -64,20 +65,14 @@ ENTRY(__raw_writesw) + beq .no_outsw_4 + + ldmia r1!, {r3, ip} +- strh r3, [r0] +- mov r3, r3, lsr #16 +- strh r3, [r0] +- strh ip, [r0] +- mov ip, ip, lsr #16 +- strh ip, [r0] ++ outword r3 ++ outword ip + + .no_outsw_4: tst r2, #2 + beq .no_outsw_2 + + ldr r3, [r1], #4 +- strh r3, [r0] +- mov r3, r3, lsr #16 +- strh r3, [r0] ++ outword r3 + + .no_outsw_2: tst r2, #1 + ldrneh r3, [r1] +--- linux-2.6.0-test6/arch/arm/lib/lib1funcs.S 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/arm/lib/lib1funcs.S 2003-10-05 00:33:23.000000000 -0700 +@@ -1,7 +1,12 @@ +-@ libgcc1 routines for ARM cpu. +-@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) ++/* ++ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines ++ * ++ * Author: Nicolas Pitre ++ * - contributed to gcc-3.4 on Sep 30, 2003 ++ * - adapted for the Linux kernel on Oct 2, 2003 ++ */ + +-/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc. ++/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the +@@ -10,11 +15,12 @@ later version. + + In addition to the permissions in the GNU General Public License, the + Free Software Foundation gives you unlimited permission to link the +-compiled version of this file with other programs, and to distribute +-those programs without any restriction coming from the use of this +-file. (The General Public License restrictions do apply in other +-respects; for example, they cover modification of the file, and +-distribution when not linked into another program.) ++compiled version of this file into combinations with other programs, ++and to distribute those combinations without any restriction coming ++from the use of this file. (The General Public License restrictions ++do apply in other respects; for example, they cover modification of ++the file, and distribution when not linked into a combine ++executable.) + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -26,286 +32,283 @@ along with this program; see the file CO + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +-/* As a special exception, if you link this library with other files, +- some of which are compiled with GCC, to produce an executable, +- this library does not by itself cause the resulting executable +- to be covered by the GNU General Public License. +- This exception does not however invalidate any other reasons why +- the executable file might be covered by the GNU General Public License. +- */ +-/* This code is derived from gcc 2.95.3 +- * 29/07/01 Adapted for linux +- * 27/03/03 Ian Molton Clean up CONFIG_CPU +- */ + + #include + #include +-#include + +-#define RET mov +-#define RETc(x) mov##x +-#define RETCOND +- +-dividend .req r0 +-divisor .req r1 +-result .req r2 +-overdone .req r2 +-curbit .req r3 ++ ++.macro ARM_DIV_BODY dividend, divisor, result, curbit ++ ++#if __LINUX_ARM_ARCH__ >= 5 ++ ++ clz \curbit, \divisor ++ clz \result, \dividend ++ sub \result, \curbit, \result ++ mov \curbit, #1 ++ mov \divisor, \divisor, lsl \result ++ mov \curbit, \curbit, lsl \result ++ mov \result, #0 + +-ENTRY(__udivsi3) +- cmp divisor, #0 +- beq Ldiv0 +- mov curbit, #1 +- mov result, #0 +- cmp dividend, divisor +- bcc Lgot_result_udivsi3 +-1: ++#else ++ ++ @ Initially shift the divisor left 3 bits if possible, ++ @ set curbit accordingly. This allows for curbit to be located ++ @ at the left end of each 4 bit nibbles in the division loop ++ @ to save one loop in most cases. ++ tst \divisor, #0xe0000000 ++ moveq \divisor, \divisor, lsl #3 ++ moveq \curbit, #8 ++ movne \curbit, #1 ++ + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +- cmp divisor, #0x10000000 +- cmpcc divisor, dividend +- movcc divisor, divisor, lsl #4 +- movcc curbit, curbit, lsl #4 +- bcc 1b ++1: cmp \divisor, #0x10000000 ++ cmplo \divisor, \dividend ++ movlo \divisor, \divisor, lsl #4 ++ movlo \curbit, \curbit, lsl #4 ++ blo 1b + +-2: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +- cmp divisor, #0x80000000 +- cmpcc divisor, dividend +- movcc divisor, divisor, lsl #1 +- movcc curbit, curbit, lsl #1 +- bcc 2b +- +-3: +- @ Test for possible subtractions, and note which bits +- @ are done in the result. On the final pass, this may subtract +- @ too much from the dividend, but the result will be ok, since the +- @ "bit" will have been shifted out at the bottom. +- cmp dividend, divisor +- subcs dividend, dividend, divisor +- orrcs result, result, curbit +- cmp dividend, divisor, lsr #1 +- subcs dividend, dividend, divisor, lsr #1 +- orrcs result, result, curbit, lsr #1 +- cmp dividend, divisor, lsr #2 +- subcs dividend, dividend, divisor, lsr #2 +- orrcs result, result, curbit, lsr #2 +- cmp dividend, divisor, lsr #3 +- subcs dividend, dividend, divisor, lsr #3 +- orrcs result, result, curbit, lsr #3 +- cmp dividend, #0 @ Early termination? +- movnes curbit, curbit, lsr #4 @ No, any more bits to do? +- movne divisor, divisor, lsr #4 +- bne 3b +-Lgot_result_udivsi3: +- mov r0, result +- RET pc, lr ++1: cmp \divisor, #0x80000000 ++ cmplo \divisor, \dividend ++ movlo \divisor, \divisor, lsl #1 ++ movlo \curbit, \curbit, lsl #1 ++ blo 1b + +-Ldiv0: +- str lr, [sp, #-4]! +- bl __div0 +- mov r0, #0 @ about as wrong as it could be +- ldmia sp!, {pc}RETCOND ++ mov \result, #0 + +-/* __umodsi3 ----------------------- */ ++#endif ++ ++ @ Division loop ++1: cmp \dividend, \divisor ++ subhs \dividend, \dividend, \divisor ++ orrhs \result, \result, \curbit ++ cmp \dividend, \divisor, lsr #1 ++ subhs \dividend, \dividend, \divisor, lsr #1 ++ orrhs \result, \result, \curbit, lsr #1 ++ cmp \dividend, \divisor, lsr #2 ++ subhs \dividend, \dividend, \divisor, lsr #2 ++ orrhs \result, \result, \curbit, lsr #2 ++ cmp \dividend, \divisor, lsr #3 ++ subhs \dividend, \dividend, \divisor, lsr #3 ++ orrhs \result, \result, \curbit, lsr #3 ++ cmp \dividend, #0 @ Early termination? ++ movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? ++ movne \divisor, \divisor, lsr #4 ++ bne 1b ++ ++.endm ++ ++ ++.macro ARM_DIV2_ORDER divisor, order ++ ++#if __LINUX_ARM_ARCH__ >= 5 ++ ++ clz \order, \divisor ++ rsb \order, \order, #31 ++ ++#else ++ ++ cmp \divisor, #(1 << 16) ++ movhs \divisor, \divisor, lsr #16 ++ movhs \order, #16 ++ movlo \order, #0 ++ ++ cmp \divisor, #(1 << 8) ++ movhs \divisor, \divisor, lsr #8 ++ addhs \order, \order, #8 ++ ++ cmp \divisor, #(1 << 4) ++ movhs \divisor, \divisor, lsr #4 ++ addhs \order, \order, #4 ++ ++ cmp \divisor, #(1 << 2) ++ addhi \order, \order, #3 ++ addls \order, \order, \divisor, lsr #1 ++ ++#endif ++ ++.endm ++ ++ ++.macro ARM_MOD_BODY dividend, divisor, order, spare ++ ++#if __LINUX_ARM_ARCH__ >= 5 ++ ++ clz \order, \divisor ++ clz \spare, \dividend ++ sub \order, \order, \spare ++ mov \divisor, \divisor, lsl \order ++ ++#else ++ ++ mov \order, #0 + +-ENTRY(__umodsi3) +- cmp divisor, #0 +- beq Ldiv0 +- mov curbit, #1 +- cmp dividend, divisor +- RETc(cc) pc, lr +-1: + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +- cmp divisor, #0x10000000 +- cmpcc divisor, dividend +- movcc divisor, divisor, lsl #4 +- movcc curbit, curbit, lsl #4 +- bcc 1b ++1: cmp \divisor, #0x10000000 ++ cmplo \divisor, \dividend ++ movlo \divisor, \divisor, lsl #4 ++ addlo \order, \order, #4 ++ blo 1b + +-2: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +- cmp divisor, #0x80000000 +- cmpcc divisor, dividend +- movcc divisor, divisor, lsl #1 +- movcc curbit, curbit, lsl #1 +- bcc 2b +- +-3: +- @ Test for possible subtractions. On the final pass, this may +- @ subtract too much from the dividend, so keep track of which +- @ subtractions are done, we can fix them up afterwards... +- mov overdone, #0 +- cmp dividend, divisor +- subcs dividend, dividend, divisor +- cmp dividend, divisor, lsr #1 +- subcs dividend, dividend, divisor, lsr #1 +- orrcs overdone, overdone, curbit, ror #1 +- cmp dividend, divisor, lsr #2 +- subcs dividend, dividend, divisor, lsr #2 +- orrcs overdone, overdone, curbit, ror #2 +- cmp dividend, divisor, lsr #3 +- subcs dividend, dividend, divisor, lsr #3 +- orrcs overdone, overdone, curbit, ror #3 +- mov ip, curbit +- cmp dividend, #0 @ Early termination? +- movnes curbit, curbit, lsr #4 @ No, any more bits to do? +- movne divisor, divisor, lsr #4 +- bne 3b +- +- @ Any subtractions that we should not have done will be recorded in +- @ the top three bits of "overdone". Exactly which were not needed +- @ are governed by the position of the bit, stored in ip. +- @ If we terminated early, because dividend became zero, +- @ then none of the below will match, since the bit in ip will not be +- @ in the bottom nibble. +- ands overdone, overdone, #0xe0000000 +- RETc(eq) pc, lr @ No fixups needed +- tst overdone, ip, ror #3 +- addne dividend, dividend, divisor, lsr #3 +- tst overdone, ip, ror #2 +- addne dividend, dividend, divisor, lsr #2 +- tst overdone, ip, ror #1 +- addne dividend, dividend, divisor, lsr #1 +- RET pc, lr ++1: cmp \divisor, #0x80000000 ++ cmplo \divisor, \dividend ++ movlo \divisor, \divisor, lsl #1 ++ addlo \order, \order, #1 ++ blo 1b ++ ++#endif ++ ++ @ Perform all needed substractions to keep only the reminder. ++ @ Do comparisons in batch of 4 first. ++ subs \order, \order, #3 @ yes, 3 is intended here ++ blt 2f ++ ++1: cmp \dividend, \divisor ++ subhs \dividend, \dividend, \divisor ++ cmp \dividend, \divisor, lsr #1 ++ subhs \dividend, \dividend, \divisor, lsr #1 ++ cmp \dividend, \divisor, lsr #2 ++ subhs \dividend, \dividend, \divisor, lsr #2 ++ cmp \dividend, \divisor, lsr #3 ++ subhs \dividend, \dividend, \divisor, lsr #3 ++ cmp \dividend, #1 ++ mov \divisor, \divisor, lsr #4 ++ subges \order, \order, #4 ++ bge 1b ++ ++ tst \order, #3 ++ teqne \dividend, #0 ++ beq 5f ++ ++ @ Either 1, 2 or 3 comparison/substractions are left. ++2: cmn \order, #2 ++ blt 4f ++ beq 3f ++ cmp \dividend, \divisor ++ subhs \dividend, \dividend, \divisor ++ mov \divisor, \divisor, lsr #1 ++3: cmp \dividend, \divisor ++ subhs \dividend, \dividend, \divisor ++ mov \divisor, \divisor, lsr #1 ++4: cmp \dividend, \divisor ++ subhs \dividend, \dividend, \divisor ++5: ++.endm ++ ++ ++ENTRY(__udivsi3) ++ ++ subs r2, r1, #1 ++ moveq pc, lr ++ bcc Ldiv0 ++ cmp r0, r1 ++ bls 11f ++ tst r1, r2 ++ beq 12f ++ ++ ARM_DIV_BODY r0, r1, r2, r3 ++ ++ mov r0, r2 ++ mov pc, lr ++ ++11: moveq r0, #1 ++ movne r0, #0 ++ mov pc, lr ++ ++12: ARM_DIV2_ORDER r1, r2 ++ ++ mov r0, r0, lsr r2 ++ mov pc, lr ++ ++ ++ENTRY(__umodsi3) ++ ++ subs r2, r1, #1 @ compare divisor with 1 ++ bcc Ldiv0 ++ cmpne r0, r1 @ compare dividend with divisor ++ moveq r0, #0 ++ tsthi r1, r2 @ see if divisor is power of 2 ++ andeq r0, r0, r2 ++ movls pc, lr ++ ++ ARM_MOD_BODY r0, r1, r2, r3 ++ ++ mov pc, lr ++ + + ENTRY(__divsi3) +- eor ip, dividend, divisor @ Save the sign of the result. +- mov curbit, #1 +- mov result, #0 +- cmp divisor, #0 +- rsbmi divisor, divisor, #0 @ Loops below use unsigned. ++ ++ cmp r1, #0 ++ eor ip, r0, r1 @ save the sign of the result. + beq Ldiv0 +- cmp dividend, #0 +- rsbmi dividend, dividend, #0 +- cmp dividend, divisor +- bcc Lgot_result_divsi3 ++ rsbmi r1, r1, #0 @ loops below use unsigned. ++ subs r2, r1, #1 @ division by 1 or -1 ? ++ beq 10f ++ movs r3, r0 ++ rsbmi r3, r0, #0 @ positive dividend value ++ cmp r3, r1 ++ bls 11f ++ tst r1, r2 @ divisor is power of 2 ? ++ beq 12f + +-1: +- @ Unless the divisor is very big, shift it up in multiples of +- @ four bits, since this is the amount of unwinding in the main +- @ division loop. Continue shifting until the divisor is +- @ larger than the dividend. +- cmp divisor, #0x10000000 +- cmpcc divisor, dividend +- movcc divisor, divisor, lsl #4 +- movcc curbit, curbit, lsl #4 +- bcc 1b ++ ARM_DIV_BODY r3, r1, r0, r2 + +-2: +- @ For very big divisors, we must shift it a bit at a time, or +- @ we will be in danger of overflowing. +- cmp divisor, #0x80000000 +- cmpcc divisor, dividend +- movcc divisor, divisor, lsl #1 +- movcc curbit, curbit, lsl #1 +- bcc 2b +- +-3: +- @ Test for possible subtractions, and note which bits +- @ are done in the result. On the final pass, this may subtract +- @ too much from the dividend, but the result will be ok, since the +- @ "bit" will have been shifted out at the bottom. +- cmp dividend, divisor +- subcs dividend, dividend, divisor +- orrcs result, result, curbit +- cmp dividend, divisor, lsr #1 +- subcs dividend, dividend, divisor, lsr #1 +- orrcs result, result, curbit, lsr #1 +- cmp dividend, divisor, lsr #2 +- subcs dividend, dividend, divisor, lsr #2 +- orrcs result, result, curbit, lsr #2 +- cmp dividend, divisor, lsr #3 +- subcs dividend, dividend, divisor, lsr #3 +- orrcs result, result, curbit, lsr #3 +- cmp dividend, #0 @ Early termination? +- movnes curbit, curbit, lsr #4 @ No, any more bits to do? +- movne divisor, divisor, lsr #4 +- bne 3b +-Lgot_result_divsi3: +- mov r0, result + cmp ip, #0 + rsbmi r0, r0, #0 +- RET pc, lr ++ mov pc, lr ++ ++10: teq ip, r0 @ same sign ? ++ rsbmi r0, r0, #0 ++ mov pc, lr ++ ++11: movlo r0, #0 ++ moveq r0, ip, asr #31 ++ orreq r0, r0, #1 ++ mov pc, lr ++ ++12: ARM_DIV2_ORDER r1, r2 ++ ++ cmp ip, #0 ++ mov r0, r3, lsr r2 ++ rsbmi r0, r0, #0 ++ mov pc, lr ++ + + ENTRY(__modsi3) +- mov curbit, #1 +- cmp divisor, #0 +- rsbmi divisor, divisor, #0 @ Loops below use unsigned. ++ ++ cmp r1, #0 + beq Ldiv0 +- @ Need to save the sign of the dividend, unfortunately, we need +- @ ip later on; this is faster than pushing lr and using that. +- str dividend, [sp, #-4]! +- cmp dividend, #0 +- rsbmi dividend, dividend, #0 +- cmp dividend, divisor +- bcc Lgot_result_modsi3 ++ rsbmi r1, r1, #0 @ loops below use unsigned. ++ movs ip, r0 @ preserve sign of dividend ++ rsbmi r0, r0, #0 @ if negative make positive ++ subs r2, r1, #1 @ compare divisor with 1 ++ cmpne r0, r1 @ compare dividend with divisor ++ moveq r0, #0 ++ tsthi r1, r2 @ see if divisor is power of 2 ++ andeq r0, r0, r2 ++ bls 10f ++ ++ ARM_MOD_BODY r0, r1, r2, r3 ++ ++10: cmp ip, #0 ++ rsbmi r0, r0, #0 ++ mov pc, lr ++ ++ ++Ldiv0: ++ ++ str lr, [sp, #-4]! ++ bl __div0 ++ mov r0, #0 @ About as wrong as it could be. ++ ldr pc, [sp], #4 + +-1: +- @ Unless the divisor is very big, shift it up in multiples of +- @ four bits, since this is the amount of unwinding in the main +- @ division loop. Continue shifting until the divisor is +- @ larger than the dividend. +- cmp divisor, #0x10000000 +- cmpcc divisor, dividend +- movcc divisor, divisor, lsl #4 +- movcc curbit, curbit, lsl #4 +- bcc 1b + +-2: +- @ For very big divisors, we must shift it a bit at a time, or +- @ we will be in danger of overflowing. +- cmp divisor, #0x80000000 +- cmpcc divisor, dividend +- movcc divisor, divisor, lsl #1 +- movcc curbit, curbit, lsl #1 +- bcc 2b +- +-3: +- @ Test for possible subtractions. On the final pass, this may +- @ subtract too much from the dividend, so keep track of which +- @ subtractions are done, we can fix them up afterwards... +- mov overdone, #0 +- cmp dividend, divisor +- subcs dividend, dividend, divisor +- cmp dividend, divisor, lsr #1 +- subcs dividend, dividend, divisor, lsr #1 +- orrcs overdone, overdone, curbit, ror #1 +- cmp dividend, divisor, lsr #2 +- subcs dividend, dividend, divisor, lsr #2 +- orrcs overdone, overdone, curbit, ror #2 +- cmp dividend, divisor, lsr #3 +- subcs dividend, dividend, divisor, lsr #3 +- orrcs overdone, overdone, curbit, ror #3 +- mov ip, curbit +- cmp dividend, #0 @ Early termination? +- movnes curbit, curbit, lsr #4 @ No, any more bits to do? +- movne divisor, divisor, lsr #4 +- bne 3b +- +- @ Any subtractions that we should not have done will be recorded in +- @ the top three bits of "overdone". Exactly which were not needed +- @ are governed by the position of the bit, stored in ip. +- @ If we terminated early, because dividend became zero, +- @ then none of the below will match, since the bit in ip will not be +- @ in the bottom nibble. +- ands overdone, overdone, #0xe0000000 +- beq Lgot_result_modsi3 +- tst overdone, ip, ror #3 +- addne dividend, dividend, divisor, lsr #3 +- tst overdone, ip, ror #2 +- addne dividend, dividend, divisor, lsr #2 +- tst overdone, ip, ror #1 +- addne dividend, dividend, divisor, lsr #1 +-Lgot_result_modsi3: +- ldr ip, [sp], #4 +- cmp ip, #0 +- rsbmi dividend, dividend, #0 +- RET pc, lr +--- linux-2.6.0-test6/arch/arm/mach-integrator/core.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/arm/mach-integrator/core.c 2003-10-05 00:33:23.000000000 -0700 +@@ -1,134 +1,59 @@ + /* +- * linux/arch/arm/mach-integrator/arch.c ++ * linux/arch/arm/mach-integrator/core.c + * +- * Copyright (C) 2000 Deep Blue Solutions Ltd ++ * Copyright (C) 2000-2003 Deep Blue Solutions Ltd + * + * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, write to the Free Software +- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * it under the terms of the GNU General Public License version 2, as ++ * published by the Free Software Foundation. + */ + #include + #include + #include +-#include + #include +-#include +-#include + + #include +-#include + #include +-#include +-#include + #include +-#include +- +-#include +- +-#include +-#include +-#include +- +-/* +- * All IO addresses are mapped onto VA 0xFFFx.xxxx, where x.xxxx +- * is the (PA >> 12). +- * +- * Setup a VA for the Integrator interrupt controller (for header #0, +- * just for now). +- */ +-#define VA_IC_BASE IO_ADDRESS(INTEGRATOR_IC_BASE) +-#define VA_SC_BASE IO_ADDRESS(INTEGRATOR_SC_BASE) +-#define VA_CMIC_BASE IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_IC_OFFSET +- +-/* +- * Logical Physical +- * e8000000 40000000 PCI memory PHYS_PCI_MEM_BASE (max 512M) +- * ec000000 61000000 PCI config space PHYS_PCI_CONFIG_BASE (max 16M) +- * ed000000 62000000 PCI V3 regs PHYS_PCI_V3_BASE (max 64k) +- * ee000000 60000000 PCI IO PHYS_PCI_IO_BASE (max 16M) +- * ef000000 Cache flush +- * f1000000 10000000 Core module registers +- * f1100000 11000000 System controller registers +- * f1200000 12000000 EBI registers +- * f1300000 13000000 Counter/Timer +- * f1400000 14000000 Interrupt controller +- * f1500000 15000000 RTC +- * f1600000 16000000 UART 0 +- * f1700000 17000000 UART 1 +- * f1a00000 1a000000 Debug LEDs +- * f1b00000 1b000000 GPIO +- */ + +-static struct map_desc integrator_io_desc[] __initdata = { +- { IO_ADDRESS(INTEGRATOR_HDR_BASE), INTEGRATOR_HDR_BASE, SZ_4K, MT_DEVICE }, +- { IO_ADDRESS(INTEGRATOR_SC_BASE), INTEGRATOR_SC_BASE, SZ_4K, MT_DEVICE }, +- { IO_ADDRESS(INTEGRATOR_EBI_BASE), INTEGRATOR_EBI_BASE, SZ_4K, MT_DEVICE }, +- { IO_ADDRESS(INTEGRATOR_CT_BASE), INTEGRATOR_CT_BASE, SZ_4K, MT_DEVICE }, +- { IO_ADDRESS(INTEGRATOR_IC_BASE), INTEGRATOR_IC_BASE, SZ_4K, MT_DEVICE }, +- { IO_ADDRESS(INTEGRATOR_RTC_BASE), INTEGRATOR_RTC_BASE, SZ_4K, MT_DEVICE }, +- { IO_ADDRESS(INTEGRATOR_UART0_BASE), INTEGRATOR_UART0_BASE, SZ_4K, MT_DEVICE }, +- { IO_ADDRESS(INTEGRATOR_UART1_BASE), INTEGRATOR_UART1_BASE, SZ_4K, MT_DEVICE }, +- { IO_ADDRESS(INTEGRATOR_DBG_BASE), INTEGRATOR_DBG_BASE, SZ_4K, MT_DEVICE }, +- { IO_ADDRESS(INTEGRATOR_GPIO_BASE), INTEGRATOR_GPIO_BASE, SZ_4K, MT_DEVICE }, +- { PCI_MEMORY_VADDR, PHYS_PCI_MEM_BASE, SZ_16M, MT_DEVICE }, +- { PCI_CONFIG_VADDR, PHYS_PCI_CONFIG_BASE, SZ_16M, MT_DEVICE }, +- { PCI_V3_VADDR, PHYS_PCI_V3_BASE, SZ_64K, MT_DEVICE }, +- { PCI_IO_VADDR, PHYS_PCI_IO_BASE, SZ_64K, MT_DEVICE } ++static struct amba_device rtc_device = { ++ .dev = { ++ .bus_id = "mb:15", ++ }, ++ .res = { ++ .start = INTEGRATOR_RTC_BASE, ++ .end = INTEGRATOR_RTC_BASE + SZ_4K - 1, ++ .flags = IORESOURCE_MEM, ++ }, ++ .irq = IRQ_RTCINT, ++ .periphid = 0x00041030, + }; + +-static void __init integrator_map_io(void) +-{ +- iotable_init(integrator_io_desc, ARRAY_SIZE(integrator_io_desc)); +-} +- +-#define ALLPCI ( (1 << IRQ_PCIINT0) | (1 << IRQ_PCIINT1) | (1 << IRQ_PCIINT2) | (1 << IRQ_PCIINT3) ) +- +-static void sc_mask_irq(unsigned int irq) +-{ +- writel(1 << irq, VA_IC_BASE + IRQ_ENABLE_CLEAR); +-} +- +-static void sc_unmask_irq(unsigned int irq) +-{ +- writel(1 << irq, VA_IC_BASE + IRQ_ENABLE_SET); +-} +- +-static struct irqchip sc_chip = { +- .ack = sc_mask_irq, +- .mask = sc_mask_irq, +- .unmask = sc_unmask_irq, ++static struct amba_device uart0_device = { ++ .dev = { ++ .bus_id = "mb:16", ++ }, ++ .res = { ++ .start = INTEGRATOR_UART0_BASE, ++ .end = INTEGRATOR_UART0_BASE + SZ_4K - 1, ++ .flags = IORESOURCE_MEM, ++ }, ++ .irq = IRQ_UARTINT0, ++ .periphid = 0x0041010, + }; + +-static void __init integrator_init_irq(void) +-{ +- unsigned int i; +- +- /* Disable all interrupts initially. */ +- /* Do the core module ones */ +- writel(-1, VA_CMIC_BASE + IRQ_ENABLE_CLEAR); +- +- /* do the header card stuff next */ +- writel(-1, VA_IC_BASE + IRQ_ENABLE_CLEAR); +- writel(-1, VA_IC_BASE + FIQ_ENABLE_CLEAR); +- +- for (i = 0; i < NR_IRQS; i++) { +- if (((1 << i) && INTEGRATOR_SC_VALID_INT) != 0) { +- set_irq_chip(i, &sc_chip); +- set_irq_handler(i, do_level_IRQ); +- set_irq_flags(i, IRQF_VALID | IRQF_PROBE); +- } +- } +-} ++static struct amba_device uart1_device = { ++ .dev = { ++ .bus_id = "mb:17", ++ }, ++ .res = { ++ .start = INTEGRATOR_UART1_BASE, ++ .end = INTEGRATOR_UART1_BASE + SZ_4K - 1, ++ .flags = IORESOURCE_MEM, ++ }, ++ .irq = IRQ_UARTINT1, ++ .periphid = 0x0041010, ++}; + + static struct amba_device kmi0_device = { + .dev = { +@@ -136,7 +61,7 @@ static struct amba_device kmi0_device = + }, + .res = { + .start = KMI0_BASE, +- .end = KMI0_BASE + KMI_SIZE - 1, ++ .end = KMI0_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + .irq = IRQ_KMIINT0, +@@ -149,7 +74,7 @@ static struct amba_device kmi1_device = + }, + .res = { + .start = KMI1_BASE, +- .end = KMI1_BASE + KMI_SIZE - 1, ++ .end = KMI1_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + .irq = IRQ_KMIINT1, +@@ -157,52 +82,23 @@ static struct amba_device kmi1_device = + }; + + static struct amba_device *amba_devs[] __initdata = { ++ &rtc_device, ++ &uart0_device, ++ &uart1_device, + &kmi0_device, + &kmi1_device, + }; + +-static int __init register_devices(void) ++static int __init integrator_init(void) + { +- unsigned long sc_dec; + int i; + + for (i = 0; i < ARRAY_SIZE(amba_devs); i++) { + struct amba_device *d = amba_devs[i]; +- + amba_device_register(d, &iomem_resource); + } + +- sc_dec = readl(VA_SC_BASE + INTEGRATOR_SC_DEC_OFFSET); +- for (i = 0; i < 4; i++) { +- struct lm_device *lmdev; +- +- if ((sc_dec & (16 << i)) == 0) +- continue; +- +- lmdev = kmalloc(sizeof(struct lm_device), GFP_KERNEL); +- if (!lmdev) +- continue; +- +- memset(lmdev, 0, sizeof(struct lm_device)); +- +- lmdev->resource.start = 0xc0000000 + 0x10000000 * i; +- lmdev->resource.end = lmdev->resource.start + 0x0fffffff; +- lmdev->resource.flags = IORESOURCE_MEM; +- lmdev->irq = IRQ_EXPINT0 + i; +- lmdev->id = i; +- +- lm_device_register(lmdev); +- } +- + return 0; + } + +-arch_initcall(register_devices); +- +-MACHINE_START(INTEGRATOR, "ARM-Integrator") +- MAINTAINER("ARM Ltd/Deep Blue Solutions Ltd") +- BOOT_MEM(0x00000000, 0x16000000, 0xf1600000) +- BOOT_PARAMS(0x00000100) +- MAPIO(integrator_map_io) +- INITIRQ(integrator_init_irq) +-MACHINE_END ++arch_initcall(integrator_init); +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25/arch/arm/mach-integrator/integrator_ap.c 2003-10-05 00:33:23.000000000 -0700 +@@ -0,0 +1,294 @@ ++/* ++ * linux/arch/arm/mach-integrator/integrator_ap.c ++ * ++ * Copyright (C) 2000-2003 Deep Blue Solutions Ltd ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++ ++/* ++ * All IO addresses are mapped onto VA 0xFFFx.xxxx, where x.xxxx ++ * is the (PA >> 12). ++ * ++ * Setup a VA for the Integrator interrupt controller (for header #0, ++ * just for now). ++ */ ++#define VA_IC_BASE IO_ADDRESS(INTEGRATOR_IC_BASE) ++#define VA_SC_BASE IO_ADDRESS(INTEGRATOR_SC_BASE) ++#define VA_EBI_BASE IO_ADDRESS(INTEGRATOR_EBI_BASE) ++#define VA_CMIC_BASE IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_IC_OFFSET ++ ++/* ++ * Logical Physical ++ * e8000000 40000000 PCI memory PHYS_PCI_MEM_BASE (max 512M) ++ * ec000000 61000000 PCI config space PHYS_PCI_CONFIG_BASE (max 16M) ++ * ed000000 62000000 PCI V3 regs PHYS_PCI_V3_BASE (max 64k) ++ * ee000000 60000000 PCI IO PHYS_PCI_IO_BASE (max 16M) ++ * ef000000 Cache flush ++ * f1000000 10000000 Core module registers ++ * f1100000 11000000 System controller registers ++ * f1200000 12000000 EBI registers ++ * f1300000 13000000 Counter/Timer ++ * f1400000 14000000 Interrupt controller ++ * f1500000 15000000 RTC ++ * f1600000 16000000 UART 0 ++ * f1700000 17000000 UART 1 ++ * f1a00000 1a000000 Debug LEDs ++ * f1b00000 1b000000 GPIO ++ */ ++ ++static struct map_desc ap_io_desc[] __initdata = { ++ { IO_ADDRESS(INTEGRATOR_HDR_BASE), INTEGRATOR_HDR_BASE, SZ_4K, MT_DEVICE }, ++ { IO_ADDRESS(INTEGRATOR_SC_BASE), INTEGRATOR_SC_BASE, SZ_4K, MT_DEVICE }, ++ { IO_ADDRESS(INTEGRATOR_EBI_BASE), INTEGRATOR_EBI_BASE, SZ_4K, MT_DEVICE }, ++ { IO_ADDRESS(INTEGRATOR_CT_BASE), INTEGRATOR_CT_BASE, SZ_4K, MT_DEVICE }, ++ { IO_ADDRESS(INTEGRATOR_IC_BASE), INTEGRATOR_IC_BASE, SZ_4K, MT_DEVICE }, ++ { IO_ADDRESS(INTEGRATOR_RTC_BASE), INTEGRATOR_RTC_BASE, SZ_4K, MT_DEVICE }, ++ { IO_ADDRESS(INTEGRATOR_UART0_BASE), INTEGRATOR_UART0_BASE, SZ_4K, MT_DEVICE }, ++ { IO_ADDRESS(INTEGRATOR_UART1_BASE), INTEGRATOR_UART1_BASE, SZ_4K, MT_DEVICE }, ++ { IO_ADDRESS(INTEGRATOR_DBG_BASE), INTEGRATOR_DBG_BASE, SZ_4K, MT_DEVICE }, ++ { IO_ADDRESS(INTEGRATOR_GPIO_BASE), INTEGRATOR_GPIO_BASE, SZ_4K, MT_DEVICE }, ++ { PCI_MEMORY_VADDR, PHYS_PCI_MEM_BASE, SZ_16M, MT_DEVICE }, ++ { PCI_CONFIG_VADDR, PHYS_PCI_CONFIG_BASE, SZ_16M, MT_DEVICE }, ++ { PCI_V3_VADDR, PHYS_PCI_V3_BASE, SZ_64K, MT_DEVICE }, ++ { PCI_IO_VADDR, PHYS_PCI_IO_BASE, SZ_64K, MT_DEVICE } ++}; ++ ++static void __init ap_map_io(void) ++{ ++ iotable_init(ap_io_desc, ARRAY_SIZE(ap_io_desc)); ++} ++ ++#define INTEGRATOR_SC_VALID_INT 0x003fffff ++ ++static void sc_mask_irq(unsigned int irq) ++{ ++ writel(1 << irq, VA_IC_BASE + IRQ_ENABLE_CLEAR); ++} ++ ++static void sc_unmask_irq(unsigned int irq) ++{ ++ writel(1 << irq, VA_IC_BASE + IRQ_ENABLE_SET); ++} ++ ++static struct irqchip sc_chip = { ++ .ack = sc_mask_irq, ++ .mask = sc_mask_irq, ++ .unmask = sc_unmask_irq, ++}; ++ ++static void __init ap_init_irq(void) ++{ ++ unsigned int i; ++ ++ /* Disable all interrupts initially. */ ++ /* Do the core module ones */ ++ writel(-1, VA_CMIC_BASE + IRQ_ENABLE_CLEAR); ++ ++ /* do the header card stuff next */ ++ writel(-1, VA_IC_BASE + IRQ_ENABLE_CLEAR); ++ writel(-1, VA_IC_BASE + FIQ_ENABLE_CLEAR); ++ ++ for (i = 0; i < NR_IRQS; i++) { ++ if (((1 << i) && INTEGRATOR_SC_VALID_INT) != 0) { ++ set_irq_chip(i, &sc_chip); ++ set_irq_handler(i, do_level_IRQ); ++ set_irq_flags(i, IRQF_VALID | IRQF_PROBE); ++ } ++ } ++} ++ ++#ifdef CONFIG_PM ++static unsigned long ic_irq_enable; ++ ++static int irq_suspend(struct sys_device *dev, u32 state) ++{ ++ ic_irq_enable = readl(VA_IC_BASE + IRQ_ENABLE); ++ return 0; ++} ++ ++static int irq_resume(struct sys_device *dev) ++{ ++ /* disable all irq sources */ ++ writel(-1, VA_CMIC_BASE + IRQ_ENABLE_CLEAR); ++ writel(-1, VA_IC_BASE + IRQ_ENABLE_CLEAR); ++ writel(-1, VA_IC_BASE + FIQ_ENABLE_CLEAR); ++ ++ writel(ic_irq_enable, VA_IC_BASE + IRQ_ENABLE_SET); ++ return 0; ++} ++#else ++#define irq_suspend NULL ++#define irq_resume NULL ++#endif ++ ++static struct sysdev_class irq_class = { ++ set_kset_name("irq"), ++ .suspend = irq_suspend, ++ .resume = irq_resume, ++}; ++ ++static struct sys_device irq_device = { ++ .id = 0, ++ .cls = &irq_class, ++}; ++ ++static int __init irq_init_sysfs(void) ++{ ++ int ret = sysdev_class_register(&irq_class); ++ if (ret == 0) ++ ret = sys_device_register(&irq_device); ++ return ret; ++} ++ ++device_initcall(irq_init_sysfs); ++ ++/* ++ * Flash handling. ++ */ ++#define SC_CTRLC (VA_SC_BASE + INTEGRATOR_SC_CTRLC_OFFSET) ++#define SC_CTRLS (VA_SC_BASE + INTEGRATOR_SC_CTRLS_OFFSET) ++#define EBI_CSR1 (VA_EBI_BASE + INTEGRATOR_EBI_CSR1_OFFSET) ++#define EBI_LOCK (VA_EBI_BASE + INTEGRATOR_EBI_LOCK_OFFSET) ++ ++static int ap_flash_init(void) ++{ ++ u32 tmp; ++ ++ writel(INTEGRATOR_SC_CTRL_nFLVPPEN | INTEGRATOR_SC_CTRL_nFLWP, SC_CTRLC); ++ ++ tmp = readl(EBI_CSR1) | INTEGRATOR_EBI_WRITE_ENABLE; ++ writel(tmp, EBI_CSR1); ++ ++ if (!(readl(EBI_CSR1) & INTEGRATOR_EBI_WRITE_ENABLE)) { ++ writel(0xa05f, EBI_LOCK); ++ writel(tmp, EBI_CSR1); ++ writel(0, EBI_LOCK); ++ } ++ return 0; ++} ++ ++static void ap_flash_exit(void) ++{ ++ u32 tmp; ++ ++ writel(INTEGRATOR_SC_CTRL_nFLVPPEN | INTEGRATOR_SC_CTRL_nFLWP, SC_CTRLC); ++ ++ tmp = readl(EBI_CSR1) & ~INTEGRATOR_EBI_WRITE_ENABLE; ++ writel(tmp, EBI_CSR1); ++ ++ if (readl(EBI_CSR1) & INTEGRATOR_EBI_WRITE_ENABLE) { ++ writel(0xa05f, EBI_LOCK); ++ writel(tmp, EBI_CSR1); ++ writel(0, EBI_LOCK); ++ } ++} ++ ++static void ap_flash_set_vpp(int on) ++{ ++ unsigned long reg = on ? SC_CTRLS : SC_CTRLC; ++ ++ writel(INTEGRATOR_SC_CTRL_nFLVPPEN, reg); ++} ++ ++static struct flash_platform_data ap_flash_data = { ++ .map_name = "cfi_probe", ++ .width = 4, ++ .init = ap_flash_init, ++ .exit = ap_flash_exit, ++ .set_vpp = ap_flash_set_vpp, ++}; ++ ++static struct resource cfi_flash_resource = { ++ .start = INTEGRATOR_FLASH_BASE, ++ .end = INTEGRATOR_FLASH_BASE + INTEGRATOR_FLASH_SIZE - 1, ++ .flags = IORESOURCE_MEM, ++}; ++ ++static struct platform_device cfi_flash_device = { ++ .name = "armflash", ++ .id = 0, ++ .dev = { ++ .platform_data = &ap_flash_data, ++ }, ++ .num_resources = 1, ++ .resource = &cfi_flash_resource, ++}; ++ ++static int __init ap_init(void) ++{ ++ unsigned long sc_dec; ++ int i; ++ ++ platform_add_device(&cfi_flash_device); ++ ++ sc_dec = readl(VA_SC_BASE + INTEGRATOR_SC_DEC_OFFSET); ++ for (i = 0; i < 4; i++) { ++ struct lm_device *lmdev; ++ ++ if ((sc_dec & (16 << i)) == 0) ++ continue; ++ ++ lmdev = kmalloc(sizeof(struct lm_device), GFP_KERNEL); ++ if (!lmdev) ++ continue; ++ ++ memset(lmdev, 0, sizeof(struct lm_device)); ++ ++ lmdev->resource.start = 0xc0000000 + 0x10000000 * i; ++ lmdev->resource.end = lmdev->resource.start + 0x0fffffff; ++ lmdev->resource.flags = IORESOURCE_MEM; ++ lmdev->irq = IRQ_AP_EXPINT0 + i; ++ lmdev->id = i; ++ ++ lm_device_register(lmdev); ++ } ++ ++ return 0; ++} ++ ++arch_initcall(ap_init); ++ ++MACHINE_START(INTEGRATOR, "ARM-Integrator") ++ MAINTAINER("ARM Ltd/Deep Blue Solutions Ltd") ++ BOOT_MEM(0x00000000, 0x16000000, 0xf1600000) ++ BOOT_PARAMS(0x00000100) ++ MAPIO(ap_map_io) ++ INITIRQ(ap_init_irq) ++MACHINE_END +--- linux-2.6.0-test6/arch/arm/mach-integrator/Kconfig 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/arm/mach-integrator/Kconfig 2003-10-05 00:33:23.000000000 -0700 +@@ -1,8 +1,15 @@ + menu "Integrator Options" + depends on ARCH_INTEGRATOR + ++config ARCH_INTEGRATOR_AP ++ bool "Support Integrator/AP and Integrator/PP2 platforms" ++ help ++ Include support for the ARM(R) Integrator/AP and ++ Integrator/PP2 platforms. ++ + config INTEGRATOR_IMPD1 + tristate "Include support for Integrator/IM-PD1" ++ depends on ARCH_INTEGRATOR_AP + help + The IM-PD1 is an add-on logic module for the Integrator which + allows ARM(R) Ltd PrimeCells to be developed and evaluated. +--- linux-2.6.0-test6/arch/arm/mach-integrator/Makefile 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/arm/mach-integrator/Makefile 2003-10-05 00:33:23.000000000 -0700 +@@ -4,9 +4,10 @@ + + # Object file lists. + +-obj-y := core.o lm.o time.o ++obj-y := core.o lm.o time.o ++obj-$(CONFIG_ARCH_INTEGRATOR_AP) += integrator_ap.o + +-obj-$(CONFIG_LEDS) += leds.o +-obj-$(CONFIG_PCI) += pci_v3.o pci.o ++obj-$(CONFIG_LEDS) += leds.o ++obj-$(CONFIG_PCI) += pci_v3.o pci.o + obj-$(CONFIG_CPU_FREQ_INTEGRATOR) += cpu.o + obj-$(CONFIG_INTEGRATOR_IMPD1) += impd1.o +--- linux-2.6.0-test6/arch/arm/mach-integrator/pci.c 2003-06-14 12:18:51.000000000 -0700 ++++ 25/arch/arm/mach-integrator/pci.c 2003-10-05 00:33:23.000000000 -0700 +@@ -96,7 +96,7 @@ static u8 __init integrator_swizzle(stru + } + + static int irq_tab[4] __initdata = { +- IRQ_PCIINT0, IRQ_PCIINT1, IRQ_PCIINT2, IRQ_PCIINT3 ++ IRQ_AP_PCIINT0, IRQ_AP_PCIINT1, IRQ_AP_PCIINT2, IRQ_AP_PCIINT3 + }; + + /* +--- linux-2.6.0-test6/arch/arm/mach-integrator/pci_v3.c 2003-06-14 12:18:28.000000000 -0700 ++++ 25/arch/arm/mach-integrator/pci_v3.c 2003-10-05 00:33:23.000000000 -0700 +@@ -575,7 +575,7 @@ void __init pci_v3_preinit(void) + /* + * Grab the PCI error interrupt. + */ +- ret = request_irq(IRQ_V3INT, v3_irq, 0, "V3", NULL); ++ ret = request_irq(IRQ_AP_V3INT, v3_irq, 0, "V3", NULL); + if (ret) + printk(KERN_ERR "PCI: unable to grab PCI error " + "interrupt: %d\n", ret); +@@ -596,7 +596,7 @@ void __init pci_v3_postinit(void) + v3_writeb(V3_LB_IMASK, 0x68); + + #if 0 +- ret = request_irq(IRQ_LBUSTIMEOUT, lb_timeout, 0, "bus timeout", NULL); ++ ret = request_irq(IRQ_AP_LBUSTIMEOUT, lb_timeout, 0, "bus timeout", NULL); + if (ret) + printk(KERN_ERR "PCI: unable to grab local bus timeout " + "interrupt: %d\n", ret); +--- linux-2.6.0-test6/arch/arm/mach-pxa/irq.c 2003-06-14 12:18:33.000000000 -0700 ++++ 25/arch/arm/mach-pxa/irq.c 2003-10-05 00:33:23.000000000 -0700 +@@ -58,7 +58,19 @@ static int pxa_gpio_irq_type(unsigned in + { + int gpio, idx; + +- gpio = irq - ((irq >= IRQ_GPIO(2)) ? IRQ_GPIO(2) + 2 : IRQ_GPIO(0)); ++ gpio = IRQ_TO_GPIO(irq); ++ idx = gpio >> 5; ++ ++ if (type == IRQT_PROBE) { ++ /* Don't mess with enabled GPIOs using preconfigured edges or ++ GPIOs set to alternate function during probe */ ++ if ((GPIO_IRQ_rising_edge[idx] | GPIO_IRQ_falling_edge[idx]) & ++ GPIO_bit(gpio)) ++ return 0; ++ if (GAFR(gpio) & (0x3 << (((gpio) & 0xf)*2))) ++ return 0; ++ type = __IRQT_RISEDGE | __IRQT_FALEDGE; ++ } + + printk(KERN_DEBUG "IRQ%d (GPIO%d): ", irq, gpio); + +@@ -78,10 +90,8 @@ static int pxa_gpio_irq_type(unsigned in + + printk("edges\n"); + +- idx = gpio >> 5; + GRER(gpio) = GPIO_IRQ_rising_edge[idx] & GPIO_IRQ_mask[idx]; + GFER(gpio) = GPIO_IRQ_falling_edge[idx] & GPIO_IRQ_mask[idx]; +- + return 0; + } + +--- linux-2.6.0-test6/arch/arm/mach-pxa/leds.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/arm/mach-pxa/leds.c 2003-10-05 00:33:23.000000000 -0700 +@@ -7,6 +7,7 @@ + * + * Copyright (c) 2001 Jeff Sutherland, Accelent Systems Inc. + */ ++#include + #include + + #include +--- linux-2.6.0-test6/arch/arm/mach-pxa/lubbock.c 2003-08-22 19:23:40.000000000 -0700 ++++ 25/arch/arm/mach-pxa/lubbock.c 2003-10-05 00:33:23.000000000 -0700 +@@ -78,7 +78,7 @@ static void __init lubbock_init_irq(void + pxa_init_irq(); + + /* setup extra lubbock irqs */ +- for (irq = LUBBOCK_IRQ(0); irq <= LUBBOCK_IRQ(5); irq++) { ++ for (irq = LUBBOCK_IRQ(0); irq <= LUBBOCK_LAST_IRQ; irq++) { + set_irq_chip(irq, &lubbock_irq_chip); + set_irq_handler(irq, do_level_IRQ); + set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); +@@ -124,6 +124,7 @@ static struct map_desc lubbock_io_desc[] + { 0xf0000000, 0x08000000, 0x00100000, MT_DEVICE }, /* CPLD */ + { 0xf1000000, 0x0c000000, 0x00100000, MT_DEVICE }, /* LAN91C96 IO */ + { 0xf1100000, 0x0e000000, 0x00100000, MT_DEVICE }, /* LAN91C96 Attr */ ++ { 0xf4000000, 0x10000000, 0x00800000, MT_DEVICE }, /* SA1111 */ + }; + + static void __init lubbock_map_io(void) +--- linux-2.6.0-test6/arch/arm/mach-pxa/pm.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/arm/mach-pxa/pm.c 2003-10-05 00:33:23.000000000 -0700 +@@ -11,13 +11,14 @@ + * modify it under the terms of the GNU General Public License. + */ + #include ++#include ++#include + #include + #include + + #include + #include + #include +-#include + + + /* +@@ -60,13 +61,16 @@ enum { SLEEP_SAVE_START = 0, + }; + + +-int pm_do_suspend(void) ++static int pxa_pm_enter(u32 state) + { + unsigned long sleep_save[SLEEP_SAVE_SIZE]; + unsigned long checksum = 0; + unsigned long delta; + int i; + ++ if (state != PM_SUSPEND_MEM) ++ return -EINVAL; ++ + /* preserve current time */ + delta = xtime.tv_sec - RCNR; + +@@ -194,3 +198,37 @@ unsigned long sleep_phys_sp(void *sp) + { + return virt_to_phys(sp); + } ++ ++/* ++ * Called after processes are frozen, but before we shut down devices. ++ */ ++static int pxa_pm_prepare(u32 state) ++{ ++ return 0; ++} ++ ++/* ++ * Called after devices are re-setup, but before processes are thawed. ++ */ ++static int pxa_pm_finish(u32 state) ++{ ++ return 0; ++} ++ ++/* ++ * Set to PM_DISK_FIRMWARE so we can quickly veto suspend-to-disk. ++ */ ++static struct pm_ops pxa_pm_ops = { ++ .pm_disk_mode = PM_DISK_FIRMWARE, ++ .prepare = pxa_pm_prepare, ++ .enter = pxa_pm_enter, ++ .finish = pxa_pm_finish, ++}; ++ ++static int __init pxa_pm_init(void) ++{ ++ pm_set_ops(&pxa_pm_ops); ++ return 0; ++} ++ ++late_initcall(pxa_pm_init); +--- linux-2.6.0-test6/arch/arm/mach-sa1100/leds.c 2003-09-08 13:58:55.000000000 -0700 ++++ 25/arch/arm/mach-sa1100/leds.c 2003-10-05 00:33:23.000000000 -0700 +@@ -5,6 +5,7 @@ + * + * Copyright (C) 2001 Nicolas Pitre + */ ++#include + #include + + #include +--- linux-2.6.0-test6/arch/arm/mach-sa1100/pm.c 2003-09-08 13:58:55.000000000 -0700 ++++ 25/arch/arm/mach-sa1100/pm.c 2003-10-05 00:33:23.000000000 -0700 +@@ -22,6 +22,8 @@ + * 2002-05-27: Nicolas Pitre Killed sleep.h and the kmalloced save array. + * Storage is local on the stack now. + */ ++#include ++#include + #include + #include + +@@ -54,11 +56,14 @@ enum { SLEEP_SAVE_SP = 0, + }; + + +-int pm_do_suspend(void) ++static int sa11x0_pm_enter(u32 state) + { + unsigned long sleep_save[SLEEP_SAVE_SIZE]; + unsigned long delta, gpio; + ++ if (state != PM_SUSPEND_MEM) ++ return -EINVAL; ++ + /* preserve current time */ + delta = xtime.tv_sec - RCNR; + gpio = GPLR; +@@ -139,3 +144,37 @@ unsigned long sleep_phys_sp(void *sp) + { + return virt_to_phys(sp); + } ++ ++/* ++ * Called after processes are frozen, but before we shut down devices. ++ */ ++static int sa11x0_pm_prepare(u32 state) ++{ ++ return 0; ++} ++ ++/* ++ * Called after devices are re-setup, but before processes are thawed. ++ */ ++static int sa11x0_pm_finish(u32 state) ++{ ++ return 0; ++} ++ ++/* ++ * Set to PM_DISK_FIRMWARE so we can quickly veto suspend-to-disk. ++ */ ++static struct pm_ops sa11x0_pm_ops = { ++ .pm_disk_mode = PM_DISK_FIRMWARE, ++ .prepare = sa11x0_pm_prepare, ++ .enter = sa11x0_pm_enter, ++ .finish = sa11x0_pm_finish, ++}; ++ ++static int __init sa11x0_pm_init(void) ++{ ++ pm_set_ops(&sa11x0_pm_ops); ++ return 0; ++} ++ ++late_initcall(sa11x0_pm_init); +--- linux-2.6.0-test6/arch/arm/Makefile 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/arm/Makefile 2003-10-05 00:33:23.000000000 -0700 +@@ -182,7 +182,6 @@ define archhelp + echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/zImage)' + echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' + echo ' bootpImage - Combined zImage and initial RAM disk' +- echo ' initrd - Create an initial image' + echo ' install - Install uncompressed kernel' + echo ' zinstall - Install compressed kernel' + echo ' Install using (your) ~/bin/installkernel or' +--- linux-2.6.0-test6/arch/arm/mm/discontig.c 2003-06-14 12:18:31.000000000 -0700 ++++ 25/arch/arm/mm/discontig.c 2003-10-05 00:34:40.000000000 -0700 +@@ -15,7 +15,7 @@ + #include + #include + +-#if NR_NODES != 4 ++#if MAX_NUMNODES != 4 + #error Fix Me Please + #endif + +@@ -23,9 +23,9 @@ + * Our node_data structure for discontiguous memory. + */ + +-static bootmem_data_t node_bootmem_data[NR_NODES]; ++static bootmem_data_t node_bootmem_data[MAX_NUMNODES]; + +-pg_data_t discontig_node_data[NR_NODES] = { ++pg_data_t discontig_node_data[MAX_NUMNODES] = { + { .bdata = &node_bootmem_data[0] }, + { .bdata = &node_bootmem_data[1] }, + { .bdata = &node_bootmem_data[2] }, +--- linux-2.6.0-test6/arch/arm/mm/init.c 2003-07-10 18:50:30.000000000 -0700 ++++ 25/arch/arm/mm/init.c 2003-10-05 00:34:40.000000000 -0700 +@@ -33,12 +33,6 @@ + #include + #include + +-#ifndef CONFIG_DISCONTIGMEM +-#define NR_NODES 1 +-#else +-#define NR_NODES 4 +-#endif +- + #ifdef CONFIG_CPU_32 + #define TABLE_OFFSET (PTRS_PER_PTE) + #else +@@ -178,7 +172,7 @@ find_memend_and_nodes(struct meminfo *mi + { + unsigned int i, bootmem_pages = 0, memend_pfn = 0; + +- for (i = 0; i < NR_NODES; i++) { ++ for (i = 0; i < MAX_NUMNODES; i++) { + np[i].start = -1U; + np[i].end = 0; + np[i].bootmap_pages = 0; +@@ -207,7 +201,7 @@ find_memend_and_nodes(struct meminfo *mi + * we have, we're in trouble. (maybe we ought to + * limit, instead of bugging?) + */ +- if (numnodes > NR_NODES) ++ if (numnodes > MAX_NUMNODES) + BUG(); + } + +@@ -365,7 +359,7 @@ static inline void free_bootmem_node_ban + */ + void __init bootmem_init(struct meminfo *mi) + { +- struct node_info node_info[NR_NODES], *np = node_info; ++ struct node_info node_info[MAX_NUMNODES], *np = node_info; + unsigned int bootmap_pages, bootmap_pfn, map_pg; + int node, initrd_node; + +--- linux-2.6.0-test6/arch/arm/mm/ioremap.c 2003-06-14 12:18:24.000000000 -0700 ++++ 25/arch/arm/mm/ioremap.c 2003-10-05 00:33:23.000000000 -0700 +@@ -150,7 +150,7 @@ __ioremap(unsigned long phys_addr, size_ + if (!area) + return NULL; + addr = area->addr; +- if (remap_area_pages(VMALLOC_VMADDR(addr), phys_addr, size, flags)) { ++ if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { + vfree(addr); + return NULL; + } +--- linux-2.6.0-test6/arch/cris/mm/ioremap.c 2003-07-10 18:50:30.000000000 -0700 ++++ 25/arch/cris/mm/ioremap.c 2003-10-05 00:33:23.000000000 -0700 +@@ -157,7 +157,7 @@ void * __ioremap(unsigned long phys_addr + if (!area) + return NULL; + addr = area->addr; +- if (remap_area_pages(VMALLOC_VMADDR(addr), phys_addr, size, flags)) { ++ if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { + vfree(addr); + return NULL; + } +--- linux-2.6.0-test6/arch/h8300/README 2003-06-14 12:18:26.000000000 -0700 ++++ 25/arch/h8300/README 2003-10-05 00:33:23.000000000 -0700 +@@ -16,7 +16,7 @@ H8S is planning. + + 3.H8MAX + Under development +- see http://www.strawbelly-linux.com (Japanese Only) ++ see http://www.strawberry-linux.com (Japanese Only) + + * Toolchain Version + gcc-3.1 or higher and patch +--- linux-2.6.0-test6/arch/i386/boot/setup.S 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/boot/setup.S 2003-10-05 00:36:48.000000000 -0700 +@@ -162,7 +162,7 @@ cmd_line_ptr: .long 0 # (Header versio + # can be located anywhere in + # low memory 0x10000 or higher. + +-ramdisk_max: .long MAXMEM-1 # (Header version 0x0203 or later) ++ramdisk_max: .long __MAXMEM-1 # (Header version 0x0203 or later) + # The highest safe address for + # the contents of an initrd + +--- linux-2.6.0-test6/arch/i386/Kconfig 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/Kconfig 2003-10-05 00:36:48.000000000 -0700 +@@ -397,6 +397,54 @@ config X86_OOSTORE + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 + default y + ++config X86_4G ++ bool "4 GB kernel-space and 4 GB user-space virtual memory support" ++ help ++ This option is only useful for systems that have more than 1 GB ++ of RAM. ++ ++ The default kernel VM layout leaves 1 GB of virtual memory for ++ kernel-space mappings, and 3 GB of VM for user-space applications. ++ This option ups both the kernel-space VM and the user-space VM to ++ 4 GB. ++ ++ The cost of this option is additional TLB flushes done at ++ system-entry points that transition from user-mode into kernel-mode. ++ I.e. system calls and page faults, and IRQs that interrupt user-mode ++ code. There's also additional overhead to kernel operations that copy ++ memory to/from user-space. The overhead from this is hard to tell and ++ depends on the workload - it can be anything from no visible overhead ++ to 20-30% overhead. A good rule of thumb is to count with a runtime ++ overhead of 20%. ++ ++ The upside is the much increased kernel-space VM, which more than ++ quadruples the maximum amount of RAM supported. Kernels compiled with ++ this option boot on 64GB of RAM and still have more than 3.1 GB of ++ 'lowmem' left. Another bonus is that highmem IO bouncing decreases, ++ if used with drivers that still use bounce-buffers. ++ ++ There's also a 33% increase in user-space VM size - database ++ applications might see a boost from this. ++ ++ But the cost of the TLB flushes and the runtime overhead has to be ++ weighed against the bonuses offered by the larger VM spaces. The ++ dividing line depends on the actual workload - there might be 4 GB ++ systems that benefit from this option. Systems with less than 4 GB ++ of RAM will rarely see a benefit from this option - but it's not ++ out of question, the exact circumstances have to be considered. ++ ++config X86_SWITCH_PAGETABLES ++ def_bool X86_4G ++ ++config X86_4G_VM_LAYOUT ++ def_bool X86_4G ++ ++config X86_UACCESS_INDIRECT ++ def_bool X86_4G ++ ++config X86_HIGH_ENTRY ++ def_bool X86_4G ++ + config HPET_TIMER + bool "HPET Timer Support" + help +@@ -793,7 +841,8 @@ config HAVE_DEC_LOCK + # Summit needs it only when NUMA is on + config BOOT_IOREMAP + bool +- depends on ((X86_SUMMIT || X86_GENERICARCH) && NUMA) ++ depends on X86_PC ++# depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA)) || X86_GENERICARCH + default y + + endmenu +@@ -1030,6 +1079,25 @@ config PCI_DIRECT + depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS) + default y + ++config PCI_USE_VECTOR ++ bool "Vector-based interrupt indexing" ++ depends on X86_LOCAL_APIC ++ default n ++ help ++ This replaces the current existing IRQ-based index interrupt scheme ++ with the vector-base index scheme. The advantages of vector base ++ over IRQ base are listed below: ++ 1) Support MSI implementation. ++ 2) Support future IOxAPIC hotplug ++ ++ Note that this enables MSI, Message Signaled Interrupt, on all ++ MSI capable device functions detected if users also install the ++ MSI patch. Message Signal Interrupt enables an MSI-capable ++ hardware device to send an inbound Memory Write on its PCI bus ++ instead of asserting IRQ signal on device IRQ pin. ++ ++ If you don't know what to do here, say N. ++ + source "drivers/pci/Kconfig" + + config ISA +@@ -1231,6 +1299,15 @@ config DEBUG_PAGEALLOC + This results in a large slowdown, but helps to find certain types + of memory corruptions. + ++config SPINLINE ++ bool "Spinlock inlining" ++ depends on DEBUG_KERNEL ++ help ++ This will change spinlocks from out of line to inline, making them ++ account cost to the callers in readprofile, rather than the lock ++ itself (as ".text.lock.filename"). This can be helpful for finding ++ the callers of locks. ++ + config DEBUG_HIGHMEM + bool "Highmem debugging" + depends on DEBUG_KERNEL && HIGHMEM +@@ -1247,20 +1324,208 @@ config DEBUG_INFO + Say Y here only if you plan to use gdb to debug the kernel. + If you don't debug the kernel, you can say N. + ++config LOCKMETER ++ bool "Kernel lock metering" ++ depends on SMP && !PREEMPT ++ help ++ Say Y to enable kernel lock metering, which adds overhead to SMP locks, ++ but allows you to see various statistics using the lockstat command. ++ + config DEBUG_SPINLOCK_SLEEP + bool "Sleep-inside-spinlock checking" + help + If you say Y here, various routines which may sleep will become very + noisy if they are called with a spinlock held. + ++config KGDB ++ bool "Include kgdb kernel debugger" ++ depends on DEBUG_KERNEL ++ help ++ If you say Y here, the system will be compiled with the debug ++ option (-g) and a debugging stub will be included in the ++ kernel. This stub communicates with gdb on another (host) ++ computer via a serial port. The host computer should have ++ access to the kernel binary file (vmlinux) and a serial port ++ that is connected to the target machine. Gdb can be made to ++ configure the serial port or you can use stty and setserial to ++ do this. See the 'target' command in gdb. This option also ++ configures in the ability to request a breakpoint early in the ++ boot process. To request the breakpoint just include 'kgdb' ++ as a boot option when booting the target machine. The system ++ will then break as soon as it looks at the boot options. This ++ option also installs a breakpoint in panic and sends any ++ kernel faults to the debugger. For more information see the ++ Documentation/i386/kgdb.txt file. ++ ++choice ++ depends on KGDB ++ prompt "Debug serial port BAUD" ++ default KGDB_115200BAUD ++ help ++ Gdb and the kernel stub need to agree on the baud rate to be ++ used. Some systems (x86 family at this writing) allow this to ++ be configured. ++ ++config KGDB_9600BAUD ++ bool "9600" ++ ++config KGDB_19200BAUD ++ bool "19200" ++ ++config KGDB_38400BAUD ++ bool "38400" ++ ++config KGDB_57600BAUD ++ bool "57600" ++ ++config KGDB_115200BAUD ++ bool "115200" ++endchoice ++ ++config KGDB_PORT ++ hex "hex I/O port address of the debug serial port" ++ depends on KGDB ++ default 3f8 ++ help ++ Some systems (x86 family at this writing) allow the port ++ address to be configured. The number entered is assumed to be ++ hex, don't put 0x in front of it. The standard address are: ++ COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx ++ will tell you what you have. It is good to test the serial ++ connection with a live system before trying to debug. ++ ++config KGDB_IRQ ++ int "IRQ of the debug serial port" ++ depends on KGDB ++ default 4 ++ help ++ This is the irq for the debug port. If everything is working ++ correctly and the kernel has interrupts on a control C to the ++ port should cause a break into the kernel debug stub. ++ ++config DEBUG_INFO ++ bool ++ depends on KGDB ++ default y ++ ++config KGDB_MORE ++ bool "Add any additional compile options" ++ depends on KGDB ++ default n ++ help ++ Saying yes here turns on the ability to enter additional ++ compile options. ++ ++ ++config KGDB_OPTIONS ++ depends on KGDB_MORE ++ string "Additional compile arguments" ++ default "-O1" ++ help ++ This option allows you enter additional compile options for ++ the whole kernel compile. Each platform will have a default ++ that seems right for it. For example on PPC "-ggdb -O1", and ++ for i386 "-O1". Note that by configuring KGDB "-g" is already ++ turned on. In addition, on i386 platforms ++ "-fomit-frame-pointer" is deleted from the standard compile ++ options. ++ ++config NO_KGDB_CPUS ++ int "Number of CPUs" ++ depends on KGDB && SMP ++ default NR_CPUS ++ help ++ ++ This option sets the number of cpus for kgdb ONLY. It is used ++ to prune some internal structures so they look "nice" when ++ displayed with gdb. This is to overcome possibly larger ++ numbers that may have been entered above. Enter the real ++ number to get nice clean kgdb_info displays. ++ ++config KGDB_TS ++ bool "Enable kgdb time stamp macros?" ++ depends on KGDB ++ default n ++ help ++ Kgdb event macros allow you to instrument your code with calls ++ to the kgdb event recording function. The event log may be ++ examined with gdb at a break point. Turning on this ++ capability also allows you to choose how many events to ++ keep. Kgdb always keeps the lastest events. ++ ++choice ++ depends on KGDB_TS ++ prompt "Max number of time stamps to save?" ++ default KGDB_TS_128 ++ ++config KGDB_TS_64 ++ bool "64" ++ ++config KGDB_TS_128 ++ bool "128" ++ ++config KGDB_TS_256 ++ bool "256" ++ ++config KGDB_TS_512 ++ bool "512" ++ ++config KGDB_TS_1024 ++ bool "1024" ++ ++endchoice ++ ++config STACK_OVERFLOW_TEST ++ bool "Turn on kernel stack overflow testing?" ++ depends on KGDB ++ default n ++ help ++ This option enables code in the front line interrupt handlers ++ to check for kernel stack overflow on interrupts and system ++ calls. This is part of the kgdb code on x86 systems. ++ ++config KGDB_CONSOLE ++ bool "Enable serial console thru kgdb port" ++ depends on KGDB ++ default n ++ help ++ This option enables the command line "console=kgdb" option. ++ When the system is booted with this option in the command line ++ all kernel printk output is sent to gdb (as well as to other ++ consoles). For this to work gdb must be connected. For this ++ reason, this command line option will generate a breakpoint if ++ gdb has not yet connected. After the gdb continue command is ++ given all pent up console output will be printed by gdb on the ++ host machine. Neither this option, nor KGDB require the ++ serial driver to be configured. ++ ++config KGDB_SYSRQ ++ bool "Turn on SysRq 'G' command to do a break?" ++ depends on KGDB ++ default y ++ help ++ This option includes an option in the SysRq code that allows ++ you to enter SysRq G which generates a breakpoint to the KGDB ++ stub. This will work if the keyboard is alive and can ++ interrupt the system. Because of constraints on when the ++ serial port interrupt can be enabled, this code may allow you ++ to interrupt the system before the serial port control C is ++ available. Just say yes here. ++ + config FRAME_POINTER + bool "Compile the kernel with frame pointers" ++ default KGDB + help + If you say Y here the resulting kernel image will be slightly larger + and slower, but it will give very useful debugging information. + If you don't debug the kernel, you can say N, but we may not be able + to solve problems without frame pointers. + ++config MAGIC_SYSRQ ++ bool ++ depends on KGDB_SYSRQ ++ default y ++ + config X86_EXTRA_IRQS + bool + depends on X86_LOCAL_APIC || X86_VOYAGER +@@ -1303,3 +1568,8 @@ config X86_TRAMPOLINE + bool + depends on SMP || X86_VISWS + default y ++ ++config PC ++ bool ++ depends on X86 && !EMBEDDED ++ default y +--- linux-2.6.0-test6/arch/i386/kernel/acpi/boot.c 2003-09-08 13:58:55.000000000 -0700 ++++ 25/arch/i386/kernel/acpi/boot.c 2003-10-05 00:36:22.000000000 -0700 +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -183,8 +184,7 @@ acpi_parse_lapic_nmi ( + + #endif /*CONFIG_X86_LOCAL_APIC*/ + +-#ifdef CONFIG_X86_IO_APIC +- ++#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) + + static int __init + acpi_parse_ioapic ( +@@ -297,6 +297,10 @@ acpi_find_rsdp (void) + { + unsigned long rsdp_phys = 0; + ++ if (efi.acpi20) ++ return __pa(efi.acpi20); ++ else if (efi.acpi) ++ return __pa(efi.acpi); + /* + * Scan memory looking for the RSDP signature. First search EBDA (low + * memory) paragraphs and then search upper memory (E0000-FFFFF). +@@ -368,7 +372,6 @@ acpi_boot_init (void) + + result = acpi_table_parse(ACPI_APIC, acpi_parse_madt); + if (!result) { +- printk(KERN_WARNING PREFIX "MADT not present\n"); + return 0; + } + else if (result < 0) { +@@ -416,7 +419,7 @@ acpi_boot_init (void) + + #endif /*CONFIG_X86_LOCAL_APIC*/ + +-#ifdef CONFIG_X86_IO_APIC ++#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) + + /* + * I/O APIC +@@ -472,7 +475,8 @@ acpi_boot_init (void) + acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; + + acpi_ioapic = 1; +-#endif /*CONFIG_X86_IO_APIC*/ ++ ++#endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */ + + #ifdef CONFIG_X86_LOCAL_APIC + if (acpi_lapic && acpi_ioapic) { +@@ -480,6 +484,7 @@ acpi_boot_init (void) + clustered_apic_check(); + } + #endif ++ + #ifdef CONFIG_HPET_TIMER + acpi_table_parse(ACPI_HPET, acpi_parse_hpet); + #endif +--- linux-2.6.0-test6/arch/i386/kernel/asm-offsets.c 2003-06-14 12:18:07.000000000 -0700 ++++ 25/arch/i386/kernel/asm-offsets.c 2003-10-05 00:36:48.000000000 -0700 +@@ -4,9 +4,11 @@ + * to extract and format the required data. + */ + ++#include + #include + #include + #include "sigframe.h" ++#include + + #define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) +@@ -28,4 +30,17 @@ void foo(void) + + DEFINE(RT_SIGFRAME_sigcontext, + offsetof (struct rt_sigframe, uc.uc_mcontext)); ++ DEFINE(TI_task, offsetof (struct thread_info, task)); ++ DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain)); ++ DEFINE(TI_flags, offsetof (struct thread_info, flags)); ++ DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count)); ++ DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit)); ++ DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack)); ++ DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack)); ++ DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd)); ++ ++ DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr, __fix_to_virt(FIX_ENTRY_TRAMPOLINE_0)); ++ DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL)); ++ DEFINE(PAGE_SIZE_asm, PAGE_SIZE); ++ DEFINE(task_thread_db7, offsetof (struct task_struct, thread.debugreg[7])); + } +--- linux-2.6.0-test6/arch/i386/kernel/cpu/common.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/kernel/cpu/common.c 2003-10-05 00:36:48.000000000 -0700 +@@ -510,16 +510,20 @@ void __init cpu_init (void) + BUG(); + enter_lazy_tlb(&init_mm, current); + +- load_esp0(t, thread->esp0); +- set_tss_desc(cpu,t); ++ t->esp0 = thread->esp0; ++ set_tss_desc(cpu, t); + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; + load_TR_desc(); +- load_LDT(&init_mm.context); ++ if (cpu) ++ load_LDT(&init_mm.context); + + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); + cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff; + ++ if (cpu) ++ trap_init_virtual_GDT(); ++ + /* Clear %fs and %gs. */ + asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); + +--- linux-2.6.0-test6/arch/i386/kernel/cpu/cpufreq/acpi.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/kernel/cpu/cpufreq/acpi.c 2003-10-05 00:33:23.000000000 -0700 +@@ -231,7 +231,7 @@ acpi_processor_set_performance ( + int state) + { + u16 port = 0; +- u8 value = 0; ++ u16 value = 0; + int i = 0; + struct cpufreq_freqs cpufreq_freqs; + +@@ -282,9 +282,9 @@ acpi_processor_set_performance ( + value = (u16) perf->states[state].control; + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, +- "Writing 0x%02x to port 0x%04x\n", value, port)); ++ "Writing 0x%04x to port 0x%04x\n", value, port)); + +- outb(value, port); ++ outw(value, port); + + /* + * Then we read the 'status_register' and compare the value with the +@@ -296,12 +296,12 @@ acpi_processor_set_performance ( + port = perf->status_register; + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, +- "Looking for 0x%02x from port 0x%04x\n", +- (u8) perf->states[state].status, port)); ++ "Looking for 0x%04x from port 0x%04x\n", ++ (u16) perf->states[state].status, port)); + + for (i=0; i<100; i++) { +- value = inb(port); +- if (value == (u8) perf->states[state].status) ++ value = inw(port); ++ if (value == (u16) perf->states[state].status) + break; + udelay(10); + } +@@ -309,7 +309,7 @@ acpi_processor_set_performance ( + /* notify cpufreq */ + cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); + +- if (value != perf->states[state].status) { ++ if (value != (u16) perf->states[state].status) { + unsigned int tmp = cpufreq_freqs.new; + cpufreq_freqs.new = cpufreq_freqs.old; + cpufreq_freqs.old = tmp; +--- linux-2.6.0-test6/arch/i386/kernel/cpu/cpufreq/Kconfig 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/kernel/cpu/cpufreq/Kconfig 2003-10-05 00:33:23.000000000 -0700 +@@ -88,6 +88,16 @@ config X86_POWERNOW_K7 + + If in doubt, say N. + ++config X86_POWERNOW_K8 ++ tristate "AMD Opteron/Athlon64 PowerNow!" ++ depends on CPU_FREQ_TABLE ++ help ++ This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. ++ ++ For details, take a look at linux/Documentation/cpu-freq. ++ ++ If in doubt, say N. ++ + config X86_GX_SUSPMOD + tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" + depends on CPU_FREQ +--- linux-2.6.0-test6/arch/i386/kernel/cpu/cpufreq/longhaul.c 2003-09-08 13:58:55.000000000 -0700 ++++ 25/arch/i386/kernel/cpu/cpufreq/longhaul.c 2003-10-05 00:33:23.000000000 -0700 +@@ -70,21 +70,6 @@ static unsigned int calc_speed (int mult + } + + +-static unsigned int longhaul_get_cpu_fsb (void) +-{ +- unsigned long lo, hi; +- unsigned int eblcr_fsb_table[] = { 66, 133, 100, -1 }; +- unsigned int invalue=0; +- +- if (fsb == 0) { +- rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); +- invalue = (lo & (1<<18|1<<19)) >>18; +- fsb = eblcr_fsb_table[invalue]; +- } +- return fsb; +-} +- +- + static int longhaul_get_cpu_mult (void) + { + unsigned long invalue=0,lo, hi; +@@ -168,7 +153,7 @@ static void longhaul_setstate (unsigned + break; + + /* +- * Longhaul v3. (Ezra-T [C5M], Nehemiag [C5N]) ++ * Longhaul v3. (Ezra-T [C5M], Nehemiah [C5N]) + * This can also do voltage scaling, but see above. + * Ezra-T was alleged to do FSB scaling too, but it never worked in practice. + */ +@@ -193,6 +178,39 @@ static void longhaul_setstate (unsigned + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + ++/* ++ * Centaur decided to make life a little more tricky. ++ * Only longhaul v1 is allowed to read EBLCR BSEL[0:1]. ++ * Samuel2 and above have to try and guess what the FSB is. ++ * We do this by assuming we booted at maximum multiplier, and interpolate ++ * between that value multiplied by possible FSBs and cpu_mhz which ++ * was calculated at boot time. Really ugly, but no other way to do this. ++ */ ++static int _guess (int guess, int maxmult) ++{ ++ int target; ++ ++ target = ((maxmult/10)*guess); ++ if (maxmult%10 != 0) ++ target += (guess/2); ++ target &= ~0xf; ++ return target; ++} ++ ++static int guess_fsb(int maxmult) ++{ ++ int speed = (cpu_khz/1000) & ~0xf; ++ int i; ++ int speeds[3] = { 66, 100, 133 }; ++ ++ for (i=0; i<3; i++) { ++ if (_guess(speeds[i],maxmult) == speed) ++ return speeds[i]; ++ } ++ return 0; ++} ++ ++ + + static int __init longhaul_get_ranges (void) + { +@@ -203,8 +221,8 @@ static int __init longhaul_get_ranges (v + -1,110,120,-1,135,115,125,105,130,150,160,140,-1,155,-1,145 }; + unsigned int j, k = 0; + union msr_longhaul longhaul; +- +- fsb = longhaul_get_cpu_fsb(); ++ unsigned long lo, hi; ++ unsigned int eblcr_fsb_table[] = { 66, 133, 100, -1 }; + + switch (longhaul_version) { + case 1: +@@ -212,6 +230,9 @@ static int __init longhaul_get_ranges (v + Assume min=3.0x & max = whatever we booted at. */ + minmult = 30; + maxmult = longhaul_get_cpu_mult(); ++ rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); ++ invalue = (lo & (1<<18|1<<19)) >>18; ++ fsb = eblcr_fsb_table[invalue]; + break; + + case 2 ... 3: +@@ -222,14 +243,13 @@ static int __init longhaul_get_ranges (v + invalue += 16; + maxmult=multipliers[invalue]; + +-#if 0 + invalue = longhaul.bits.MinMHzBR; +- if (longhaul.bits.MinMHzBR4); +- invalue += 16; +- minmult = multipliers[invalue]; +-#else +- minmult = 30; /* as per spec */ +-#endif ++ if (longhaul.bits.MinMHzBR4 == 1) ++ minmult = 30; ++ else ++ minmult = multipliers[invalue]; ++ ++ fsb = guess_fsb(maxmult); + break; + } + +--- linux-2.6.0-test6/arch/i386/kernel/cpu/cpufreq/Makefile 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/kernel/cpu/cpufreq/Makefile 2003-10-05 00:33:23.000000000 -0700 +@@ -1,5 +1,6 @@ + obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o + obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o ++obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o + obj-$(CONFIG_X86_LONGHAUL) += longhaul.o + obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o + obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25/arch/i386/kernel/cpu/cpufreq/powernow-k8.c 2003-10-05 00:33:23.000000000 -0700 +@@ -0,0 +1,1020 @@ ++/* ++ * (c) 2003 Advanced Micro Devices, Inc. ++ * Your use of this code is subject to the terms and conditions of the ++ * GNU general public license version 2. See "../../../COPYING" or ++ * http://www.gnu.org/licenses/gpl.html ++ * ++ * Support : paul.devriendt@amd.com ++ * ++ * Based on the powernow-k7.c module written by Dave Jones. ++ * (C) 2003 Dave Jones on behalf of SuSE Labs ++ * Licensed under the terms of the GNU GPL License version 2. ++ * Based upon datasheets & sample CPUs kindly provided by AMD. ++ * ++ * Processor information obtained from Chapter 9 (Power and Thermal Management) ++ * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD ++ * Opteron Processors", revision 3.03, available for download from www.amd.com ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#define PFX "powernow-k8: " ++#define BFX PFX "BIOS error: " ++#define VERSION "version 1.00.08 - September 26, 2003" ++#include "powernow-k8.h" ++ ++#ifdef CONFIG_PREEMPT ++#warning this driver has not been tested on a preempt system ++#endif ++ ++static u32 vstable; /* voltage stabalization time, from PSB, units 20 us */ ++static u32 plllock; /* pll lock time, from PSB, units 1 us */ ++static u32 numps; /* number of p-states, from PSB */ ++static u32 rvo; /* ramp voltage offset, from PSB */ ++static u32 irt; /* isochronous relief time, from PSB */ ++static u32 vidmvs; /* usable value calculated from mvs, from PSB */ ++struct pst_s *ppst; /* array of p states, valid for this part */ ++static u32 currvid; /* keep track of the current fid / vid */ ++static u32 currfid; ++ ++/* ++The PSB table supplied by BIOS allows for the definition of the number of ++p-states that can be used when running on a/c, and the number of p-states ++that can be used when running on battery. This allows laptop manufacturers ++to force the system to save power when running from battery. The relationship ++is : ++ 1 <= number_of_battery_p_states <= maximum_number_of_p_states ++ ++This driver does NOT have the support in it to detect transitions from ++a/c power to battery power, and thus trigger the transition to a lower ++p-state if required. This is because I need ACPI and the 2.6 kernel to do ++this, and this is a 2.4 kernel driver. Check back for a new improved driver ++for the 2.6 kernel soon. ++ ++This code therefore assumes it is on battery at all times, and thus ++restricts performance to number_of_battery_p_states. For desktops, ++ number_of_battery_p_states == maximum_number_of_pstates, ++so this is not actually a restriction. ++*/ ++ ++static u32 batps; /* limit on the number of p states when on battery */ ++ /* - set by BIOS in the PSB/PST */ ++ ++static struct cpufreq_driver cpufreq_amd64_driver = { ++ .verify = drv_verify, ++ .target = drv_target, ++ .init = drv_cpu_init, ++ .name = "cpufreq-amd64", ++ .owner = THIS_MODULE, ++}; ++ ++#define SEARCH_UP 1 ++#define SEARCH_DOWN 0 ++ ++/* Return a frequency in MHz, given an input fid */ ++u32 ++find_freq_from_fid(u32 fid) ++{ ++ return 800 + (fid * 100); ++} ++ ++/* Return a fid matching an input frequency in MHz */ ++u32 ++find_fid_from_freq(u32 freq) ++{ ++ return (freq - 800) / 100; ++} ++ ++/* Return the vco fid for an input fid */ ++static u32 ++convert_fid_to_vco_fid(u32 fid) ++{ ++ if (fid < HI_FID_TABLE_BOTTOM) { ++ return 8 + (2 * fid); ++ } else { ++ return fid; ++ } ++} ++ ++/* Sort the fid/vid frequency table into ascending order by fid. The spec */ ++/* implies that it will be sorted by BIOS, but, it only implies it, and I */ ++/* prefer not to trust when I can check. */ ++/* Yes, it is a simple bubble sort, but the PST is really small, so the */ ++/* choice of algorithm is pretty irrelevant. */ ++static inline void ++sort_pst(struct pst_s *ppst, u32 numpstates) ++{ ++ u32 i; ++ u8 tempfid; ++ u8 tempvid; ++ int swaps = 1; ++ ++ while (swaps) { ++ swaps = 0; ++ for (i = 0; i < (numpstates - 1); i++) { ++ if (ppst[i].fid > ppst[i + 1].fid) { ++ swaps = 1; ++ tempfid = ppst[i].fid; ++ tempvid = ppst[i].vid; ++ ppst[i].fid = ppst[i + 1].fid; ++ ppst[i].vid = ppst[i + 1].vid; ++ ppst[i + 1].fid = tempfid; ++ ppst[i + 1].vid = tempvid; ++ } ++ } ++ } ++ ++ return; ++} ++ ++/* Return 1 if the pending bit is set. Unless we are actually just told the */ ++/* processor to transition a state, seeing this bit set is really bad news. */ ++static inline int ++pending_bit_stuck(void) ++{ ++ u32 lo; ++ u32 hi; ++ ++ rdmsr(MSR_FIDVID_STATUS, lo, hi); ++ return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; ++} ++ ++/* Update the global current fid / vid values from the status msr. Returns 1 */ ++/* on error. */ ++static int ++query_current_values_with_pending_wait(void) ++{ ++ u32 lo; ++ u32 hi; ++ u32 i = 0; ++ ++ lo = MSR_S_LO_CHANGE_PENDING; ++ while (lo & MSR_S_LO_CHANGE_PENDING) { ++ if (i++ > 0x1000000) { ++ printk(KERN_ERR PFX "detected change pending stuck\n"); ++ return 1; ++ } ++ rdmsr(MSR_FIDVID_STATUS, lo, hi); ++ } ++ ++ currvid = hi & MSR_S_HI_CURRENT_VID; ++ currfid = lo & MSR_S_LO_CURRENT_FID; ++ ++ return 0; ++} ++ ++/* the isochronous relief time */ ++static inline void ++count_off_irt(void) ++{ ++ udelay((1 << irt) * 10); ++ return; ++} ++ ++/* the voltage stabalization time */ ++static inline void ++count_off_vst(void) ++{ ++ udelay(vstable * VST_UNITS_20US); ++ return; ++} ++ ++/* write the new fid value along with the other control fields to the msr */ ++static int ++write_new_fid(u32 fid) ++{ ++ u32 lo; ++ u32 savevid = currvid; ++ ++ if ((fid & INVALID_FID_MASK) || (currvid & INVALID_VID_MASK)) { ++ printk(KERN_ERR PFX "internal error - overflow on fid write\n"); ++ return 1; ++ } ++ ++ lo = fid | (currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; ++ ++ dprintk(KERN_DEBUG PFX "writing fid %x, lo %x, hi %x\n", ++ fid, lo, plllock * PLL_LOCK_CONVERSION); ++ ++ wrmsr(MSR_FIDVID_CTL, lo, plllock * PLL_LOCK_CONVERSION); ++ ++ if (query_current_values_with_pending_wait()) ++ return 1; ++ ++ count_off_irt(); ++ ++ if (savevid != currvid) { ++ printk(KERN_ERR PFX ++ "vid changed on fid transition, save %x, currvid %x\n", ++ savevid, currvid); ++ return 1; ++ } ++ ++ if (fid != currfid) { ++ printk(KERN_ERR PFX ++ "fid transition failed, fid %x, currfid %x\n", ++ fid, currfid); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++/* Write a new vid to the hardware */ ++static int ++write_new_vid(u32 vid) ++{ ++ u32 lo; ++ u32 savefid = currfid; ++ ++ if ((currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { ++ printk(KERN_ERR PFX "internal error - overflow on vid write\n"); ++ return 1; ++ } ++ ++ lo = currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; ++ ++ dprintk(KERN_DEBUG PFX "writing vid %x, lo %x, hi %x\n", ++ vid, lo, STOP_GRANT_5NS); ++ ++ wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); ++ ++ if (query_current_values_with_pending_wait()) { ++ return 1; ++ } ++ ++ if (savefid != currfid) { ++ printk(KERN_ERR PFX ++ "fid changed on vid transition, save %x currfid %x\n", ++ savefid, currfid); ++ return 1; ++ } ++ ++ if (vid != currvid) { ++ printk(KERN_ERR PFX ++ "vid transition failed, vid %x, currvid %x\n", ++ vid, currvid); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++/* Reduce the vid by the max of step or reqvid. */ ++/* Decreasing vid codes represent increasing voltages : */ ++/* vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of 0x1f is off. */ ++static int ++decrease_vid_code_by_step(u32 reqvid, u32 step) ++{ ++ if ((currvid - reqvid) > step) ++ reqvid = currvid - step; ++ ++ if (write_new_vid(reqvid)) ++ return 1; ++ ++ count_off_vst(); ++ ++ return 0; ++} ++ ++/* Change the fid and vid, by the 3 phases. */ ++static inline int ++transition_fid_vid(u32 reqfid, u32 reqvid) ++{ ++ if (core_voltage_pre_transition(reqvid)) ++ return 1; ++ ++ if (core_frequency_transition(reqfid)) ++ return 1; ++ ++ if (core_voltage_post_transition(reqvid)) ++ return 1; ++ ++ if (query_current_values_with_pending_wait()) ++ return 1; ++ ++ if ((reqfid != currfid) || (reqvid != currvid)) { ++ printk(KERN_ERR PFX "failed: req 0x%x 0x%x, curr 0x%x 0x%x\n", ++ reqfid, reqvid, currfid, currvid); ++ return 1; ++ } ++ ++ dprintk(KERN_INFO PFX ++ "transitioned: new fid 0x%x, vid 0x%x\n", currfid, currvid); ++ ++ return 0; ++} ++ ++/* Phase 1 - core voltage transition ... setup appropriate voltage for the */ ++/* fid transition. */ ++static inline int ++core_voltage_pre_transition(u32 reqvid) ++{ ++ u32 rvosteps = rvo; ++ u32 savefid = currfid; ++ ++ dprintk(KERN_DEBUG PFX ++ "ph1: start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo %x\n", ++ currfid, currvid, reqvid, rvo); ++ ++ while (currvid > reqvid) { ++ dprintk(KERN_DEBUG PFX "ph1: curr 0x%x, requesting vid 0x%x\n", ++ currvid, reqvid); ++ if (decrease_vid_code_by_step(reqvid, vidmvs)) ++ return 1; ++ } ++ ++ while (rvosteps > 0) { ++ if (currvid == 0) { ++ rvosteps = 0; ++ } else { ++ dprintk(KERN_DEBUG PFX ++ "ph1: changing vid for rvo, requesting 0x%x\n", ++ currvid - 1); ++ if (decrease_vid_code_by_step(currvid - 1, 1)) ++ return 1; ++ rvosteps--; ++ } ++ } ++ ++ if (query_current_values_with_pending_wait()) ++ return 1; ++ ++ if (savefid != currfid) { ++ printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", currfid); ++ return 1; ++ } ++ ++ dprintk(KERN_DEBUG PFX "ph1 complete, currfid 0x%x, currvid 0x%x\n", ++ currfid, currvid); ++ ++ return 0; ++} ++ ++/* Phase 2 - core frequency transition */ ++static inline int ++core_frequency_transition(u32 reqfid) ++{ ++ u32 vcoreqfid; ++ u32 vcocurrfid; ++ u32 vcofiddiff; ++ u32 savevid = currvid; ++ ++ if ((reqfid < HI_FID_TABLE_BOTTOM) && (currfid < HI_FID_TABLE_BOTTOM)) { ++ printk(KERN_ERR PFX "ph2 illegal lo-lo transition 0x%x 0x%x\n", ++ reqfid, currfid); ++ return 1; ++ } ++ ++ if (currfid == reqfid) { ++ printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", currfid); ++ return 0; ++ } ++ ++ dprintk(KERN_DEBUG PFX ++ "ph2 starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n", ++ currfid, currvid, reqfid); ++ ++ vcoreqfid = convert_fid_to_vco_fid(reqfid); ++ vcocurrfid = convert_fid_to_vco_fid(currfid); ++ vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid ++ : vcoreqfid - vcocurrfid; ++ ++ while (vcofiddiff > 2) { ++ if (reqfid > currfid) { ++ if (currfid > LO_FID_TABLE_TOP) { ++ if (write_new_fid(currfid + 2)) { ++ return 1; ++ } ++ } else { ++ if (write_new_fid ++ (2 + convert_fid_to_vco_fid(currfid))) { ++ return 1; ++ } ++ } ++ } else { ++ if (write_new_fid(currfid - 2)) ++ return 1; ++ } ++ ++ vcocurrfid = convert_fid_to_vco_fid(currfid); ++ vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid ++ : vcoreqfid - vcocurrfid; ++ } ++ ++ if (write_new_fid(reqfid)) ++ return 1; ++ ++ if (query_current_values_with_pending_wait()) ++ return 1; ++ ++ if (currfid != reqfid) { ++ printk(KERN_ERR PFX ++ "ph2 mismatch, failed fid transition, curr %x, req %x\n", ++ currfid, reqfid); ++ return 1; ++ } ++ ++ if (savevid != currvid) { ++ printk(KERN_ERR PFX ++ "ph2 vid changed, save %x, curr %x\n", savevid, ++ currvid); ++ return 1; ++ } ++ ++ dprintk(KERN_DEBUG PFX "ph2 complete, currfid 0x%x, currvid 0x%x\n", ++ currfid, currvid); ++ ++ return 0; ++} ++ ++/* Phase 3 - core voltage transition flow ... jump to the final vid. */ ++static inline int ++core_voltage_post_transition(u32 reqvid) ++{ ++ u32 savefid = currfid; ++ u32 savereqvid = reqvid; ++ ++ dprintk(KERN_DEBUG PFX "ph3 starting, currfid 0x%x, currvid 0x%x\n", ++ currfid, currvid); ++ ++ if (reqvid != currvid) { ++ if (write_new_vid(reqvid)) ++ return 1; ++ ++ if (savefid != currfid) { ++ printk(KERN_ERR PFX ++ "ph3: bad fid change, save %x, curr %x\n", ++ savefid, currfid); ++ return 1; ++ } ++ ++ if (currvid != reqvid) { ++ printk(KERN_ERR PFX ++ "ph3: failed vid transition\n, req %x, curr %x", ++ reqvid, currvid); ++ return 1; ++ } ++ } ++ ++ if (query_current_values_with_pending_wait()) ++ return 1; ++ ++ if (savereqvid != currvid) { ++ dprintk(KERN_ERR PFX "ph3 failed, currvid 0x%x\n", currvid); ++ return 1; ++ } ++ ++ if (savefid != currfid) { ++ dprintk(KERN_ERR PFX "ph3 failed, currfid changed 0x%x\n", ++ currfid); ++ return 1; ++ } ++ ++ dprintk(KERN_DEBUG PFX "ph3 complete, currfid 0x%x, currvid 0x%x\n", ++ currfid, currvid); ++ ++ return 0; ++} ++ ++static inline int ++check_supported_cpu(void) ++{ ++ struct cpuinfo_x86 *c = cpu_data; ++ u32 eax, ebx, ecx, edx; ++ ++ if (num_online_cpus() != 1) { ++ printk(KERN_INFO PFX "multiprocessor systems not supported\n"); ++ return 0; ++ } ++ ++ if (c->x86_vendor != X86_VENDOR_AMD) { ++ printk(KERN_INFO PFX "Not an AMD processor\n"); ++ return 0; ++ } ++ ++ eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); ++ if ((eax & CPUID_XFAM_MOD) == ATHLON64_XFAM_MOD) { ++ dprintk(KERN_DEBUG PFX "AMD Althon 64 Processor found\n"); ++ if ((eax & CPUID_F1_STEP) < ATHLON64_REV_C0) { ++ printk(KERN_INFO PFX "Revision C0 or better " ++ "AMD Athlon 64 processor required\n"); ++ return 0; ++ } ++ } else if ((eax & CPUID_XFAM_MOD) == OPTERON_XFAM_MOD) { ++ dprintk(KERN_DEBUG PFX "AMD Opteron Processor found\n"); ++ } else { ++ printk(KERN_INFO PFX ++ "AMD Athlon 64 or AMD Opteron processor required\n"); ++ return 0; ++ } ++ ++ eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); ++ if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { ++ printk(KERN_INFO PFX ++ "No frequency change capabilities detected\n"); ++ return 0; ++ } ++ ++ cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); ++ if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { ++ printk(KERN_INFO PFX "Power state transitions not supported\n"); ++ return 0; ++ } ++ ++ printk(KERN_INFO PFX "Found AMD Athlon 64 / Opteron processor " ++ "supporting p-state transitions\n"); ++ ++ return 1; ++} ++ ++/* Find and validate the PSB/PST table in BIOS. */ ++static inline int ++find_psb_table(void) ++{ ++ struct psb_s *psb; ++ struct pst_s *pst; ++ unsigned i, j; ++ u32 lastfid; ++ u32 mvs; ++ u8 maxvid; ++ ++ for (i = 0xc0000; i < 0xffff0; i += 0x10) { ++ /* Scan BIOS looking for the signature. */ ++ /* It can not be at ffff0 - it is too big. */ ++ ++ psb = phys_to_virt(i); ++ if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) ++ continue; ++ ++ dprintk(KERN_DEBUG PFX "found PSB header at 0x%p\n", psb); ++ ++ dprintk(KERN_DEBUG PFX "table vers: 0x%x\n", psb->tableversion); ++ if (psb->tableversion != PSB_VERSION_1_4) { ++ printk(KERN_INFO BFX "PSB table is not v1.4\n"); ++ return -ENODEV; ++ } ++ ++ dprintk(KERN_DEBUG PFX "flags: 0x%x\n", psb->flags1); ++ if (psb->flags1) { ++ printk(KERN_ERR BFX "unknown flags\n"); ++ return -ENODEV; ++ } ++ ++ vstable = psb->voltagestabilizationtime; ++ printk(KERN_INFO PFX "voltage stable time: %d (units 20us)\n", ++ vstable); ++ ++ dprintk(KERN_DEBUG PFX "flags2: 0x%x\n", psb->flags2); ++ rvo = psb->flags2 & 3; ++ irt = ((psb->flags2) >> 2) & 3; ++ mvs = ((psb->flags2) >> 4) & 3; ++ vidmvs = 1 << mvs; ++ batps = ((psb->flags2) >> 6) & 3; ++ printk(KERN_INFO PFX "p states on battery: %d ", batps); ++ switch (batps) { ++ case 0: ++ printk("- all available\n"); ++ break; ++ case 1: ++ printk("- only the minimum\n"); ++ break; ++ case 2: ++ printk("- only the 2 lowest\n"); ++ break; ++ case 3: ++ printk("- only the 3 lowest\n"); ++ break; ++ } ++ printk(KERN_INFO PFX "ramp voltage offset: %d\n", rvo); ++ printk(KERN_INFO PFX "isochronous relief time: %d\n", irt); ++ printk(KERN_INFO PFX "maximum voltage step: %d\n", mvs); ++ ++ dprintk(KERN_DEBUG PFX "numpst: 0x%x\n", psb->numpst); ++ if (psb->numpst != 1) { ++ printk(KERN_ERR BFX "numpst must be 1\n"); ++ return -ENODEV; ++ } ++ ++ dprintk(KERN_DEBUG PFX "cpuid: 0x%x\n", psb->cpuid); ++ ++ plllock = psb->plllocktime; ++ printk(KERN_INFO PFX "pll lock time: 0x%x\n", plllock); ++ ++ maxvid = psb->maxvid; ++ printk(KERN_INFO PFX "maxfid: 0x%x\n", psb->maxfid); ++ printk(KERN_INFO PFX "maxvid: 0x%x\n", maxvid); ++ ++ numps = psb->numpstates; ++ printk(KERN_INFO PFX "numpstates: 0x%x\n", numps); ++ if (numps < 2) { ++ printk(KERN_ERR BFX "no p states to transition\n"); ++ return -ENODEV; ++ } ++ ++ if (batps == 0) { ++ batps = numps; ++ } else if (batps > numps) { ++ printk(KERN_ERR BFX "batterypstates > numpstates\n"); ++ batps = numps; ++ } else { ++ printk(KERN_ERR PFX ++ "Restricting operation to %d p-states\n", batps); ++ printk(KERN_ERR PFX ++ "Check for an updated driver to access all " ++ "%d p-states\n", numps); ++ } ++ ++ if ((numps <= 1) || (batps <= 1)) { ++ printk(KERN_ERR PFX "only 1 p-state to transition\n"); ++ return -ENODEV; ++ } ++ ++ ppst = kmalloc(sizeof (struct pst_s) * numps, GFP_KERNEL); ++ if (!ppst) { ++ printk(KERN_ERR PFX "ppst memory alloc failure\n"); ++ return -ENOMEM; ++ } ++ ++ pst = (struct pst_s *) (psb + 1); ++ for (j = 0; j < numps; j++) { ++ ppst[j].fid = pst[j].fid; ++ ppst[j].vid = pst[j].vid; ++ printk(KERN_INFO PFX ++ " %d : fid 0x%x, vid 0x%x\n", j, ++ ppst[j].fid, ppst[j].vid); ++ } ++ sort_pst(ppst, numps); ++ ++ lastfid = ppst[0].fid; ++ if (lastfid > LO_FID_TABLE_TOP) ++ printk(KERN_INFO BFX "first fid not in lo freq tbl\n"); ++ ++ if ((lastfid > MAX_FID) || (lastfid & 1) || (ppst[0].vid > LEAST_VID)) { ++ printk(KERN_ERR BFX "first fid/vid bad (0x%x - 0x%x)\n", ++ lastfid, ppst[0].vid); ++ kfree(ppst); ++ return -ENODEV; ++ } ++ ++ for (j = 1; j < numps; j++) { ++ if ((lastfid >= ppst[j].fid) ++ || (ppst[j].fid & 1) ++ || (ppst[j].fid < HI_FID_TABLE_BOTTOM) ++ || (ppst[j].fid > MAX_FID) ++ || (ppst[j].vid > LEAST_VID)) { ++ printk(KERN_ERR BFX ++ "invalid fid/vid in pst(%x %x)\n", ++ ppst[j].fid, ppst[j].vid); ++ kfree(ppst); ++ return -ENODEV; ++ } ++ lastfid = ppst[j].fid; ++ } ++ ++ for (j = 0; j < numps; j++) { ++ if (ppst[j].vid < rvo) { /* vid+rvo >= 0 */ ++ printk(KERN_ERR BFX ++ "0 vid exceeded with pstate %d\n", j); ++ return -ENODEV; ++ } ++ if (ppst[j].vid < maxvid+rvo) { /* vid+rvo >= maxvid */ ++ printk(KERN_ERR BFX ++ "maxvid exceeded with pstate %d\n", j); ++ return -ENODEV; ++ } ++ } ++ ++ if (query_current_values_with_pending_wait()) { ++ kfree(ppst); ++ return -EIO; ++ } ++ ++ printk(KERN_INFO PFX "currfid 0x%x, currvid 0x%x\n", ++ currfid, currvid); ++ ++ for (j = 0; j < numps; j++) ++ if ((ppst[j].fid==currfid) && (ppst[j].vid==currvid)) ++ return (0); ++ ++ printk(KERN_ERR BFX "currfid/vid do not match PST, ignoring\n"); ++ return 0; ++ } ++ ++ printk(KERN_ERR BFX "no PSB\n"); ++ return -ENODEV; ++} ++ ++/* Converts a frequency (that might not necessarily be a multiple of 200) */ ++/* to a fid. */ ++u32 ++find_closest_fid(u32 freq, int searchup) ++{ ++ if (searchup == SEARCH_UP) ++ freq += MIN_FREQ_RESOLUTION - 1; ++ ++ freq = (freq / MIN_FREQ_RESOLUTION) * MIN_FREQ_RESOLUTION; ++ ++ if (freq < MIN_FREQ) ++ freq = MIN_FREQ; ++ else if (freq > MAX_FREQ) ++ freq = MAX_FREQ; ++ ++ return find_fid_from_freq(freq); ++} ++ ++static int ++find_match(u32 * ptargfreq, u32 * pmin, u32 * pmax, int searchup, u32 * pfid, ++ u32 * pvid) ++{ ++ u32 availpstates = batps; ++ u32 targfid = find_closest_fid(*ptargfreq, searchup); ++ u32 minfid = find_closest_fid(*pmin, SEARCH_DOWN); ++ u32 maxfid = find_closest_fid(*pmax, SEARCH_UP); ++ u32 minidx = 0; ++ u32 maxidx = availpstates - 1; ++ u32 targidx = 0xffffffff; ++ int i; ++ ++ dprintk(KERN_DEBUG PFX "find match: freq %d MHz, min %d, max %d\n", ++ *ptargfreq, *pmin, *pmax); ++ ++ /* Restrict values to the frequency choices in the PST */ ++ if (minfid < ppst[0].fid) ++ minfid = ppst[0].fid; ++ if (maxfid > ppst[maxidx].fid) ++ maxfid = ppst[maxidx].fid; ++ ++ /* Find appropriate PST index for the minimim fid */ ++ for (i = 0; i < (int) availpstates; i++) { ++ if (minfid >= ppst[i].fid) ++ minidx = i; ++ } ++ ++ /* Find appropriate PST index for the maximum fid */ ++ for (i = availpstates - 1; i >= 0; i--) { ++ if (maxfid <= ppst[i].fid) ++ maxidx = i; ++ } ++ ++ if (minidx > maxidx) ++ maxidx = minidx; ++ ++ /* Frequency ids are now constrained by limits matching PST entries */ ++ minfid = ppst[minidx].fid; ++ maxfid = ppst[maxidx].fid; ++ ++ /* Limit the target frequency to these limits */ ++ if (targfid < minfid) ++ targfid = minfid; ++ else if (targfid > maxfid) ++ targfid = maxfid; ++ ++ /* Find the best target index into the PST, contrained by the range */ ++ if (searchup == SEARCH_UP) { ++ for (i = maxidx; i >= (int) minidx; i--) { ++ if (targfid <= ppst[i].fid) ++ targidx = i; ++ } ++ } else { ++ for (i = minidx; i <= (int) maxidx; i++) { ++ if (targfid >= ppst[i].fid) ++ targidx = i; ++ } ++ } ++ ++ if (targidx == 0xffffffff) { ++ printk(KERN_ERR PFX "could not find target\n"); ++ return 1; ++ } ++ ++ *pmin = find_freq_from_fid(minfid); ++ *pmax = find_freq_from_fid(maxfid); ++ *ptargfreq = find_freq_from_fid(ppst[targidx].fid); ++ ++ if (pfid) ++ *pfid = ppst[targidx].fid; ++ if (pvid) ++ *pvid = ppst[targidx].vid; ++ ++ return 0; ++} ++ ++/* Take a frequency, and issue the fid/vid transition command */ ++static inline int ++transition_frequency(u32 * preq, u32 * pmin, u32 * pmax, u32 searchup) ++{ ++ u32 fid; ++ u32 vid; ++ int res; ++ struct cpufreq_freqs freqs; ++ ++ if (find_match(preq, pmin, pmax, searchup, &fid, &vid)) ++ return 1; ++ ++ dprintk(KERN_DEBUG PFX "table matched fid 0x%x, giving vid 0x%x\n", ++ fid, vid); ++ ++ if (query_current_values_with_pending_wait()) ++ return 1; ++ ++ if ((currvid == vid) && (currfid == fid)) { ++ dprintk(KERN_DEBUG PFX ++ "target matches current values (fid 0x%x, vid 0x%x)\n", ++ fid, vid); ++ return 0; ++ } ++ ++ if ((fid < HI_FID_TABLE_BOTTOM) && (currfid < HI_FID_TABLE_BOTTOM)) { ++ printk(KERN_ERR PFX ++ "ignoring illegal change in lo freq table-%x to %x\n", ++ currfid, fid); ++ return 1; ++ } ++ ++ dprintk(KERN_DEBUG PFX "changing to fid 0x%x, vid 0x%x\n", fid, vid); ++ ++ freqs.cpu = 0; /* only true because SMP not supported */ ++ ++ freqs.old = find_freq_from_fid(currfid); ++ freqs.new = find_freq_from_fid(fid); ++ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); ++ ++ res = transition_fid_vid(fid, vid); ++ ++ freqs.new = find_freq_from_fid(currfid); ++ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); ++ ++ return res; ++} ++ ++/* Driver entry point to switch to the target frequency */ ++static int ++drv_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) ++{ ++ u32 checkfid = currfid; ++ u32 checkvid = currvid; ++ u32 reqfreq = targfreq / 1000; ++ u32 minfreq = pol->min / 1000; ++ u32 maxfreq = pol->max / 1000; ++ ++ if (ppst == 0) { ++ printk(KERN_ERR PFX "targ: ppst 0\n"); ++ return -ENODEV; ++ } ++ ++ if (pending_bit_stuck()) { ++ printk(KERN_ERR PFX "drv targ fail: change pending bit set\n"); ++ return -EIO; ++ } ++ ++ dprintk(KERN_DEBUG PFX "targ: %d kHz, min %d, max %d, relation %d\n", ++ targfreq, pol->min, pol->max, relation); ++ ++ if (query_current_values_with_pending_wait()) ++ return -EIO; ++ ++ dprintk(KERN_DEBUG PFX "targ: curr fid 0x%x, vid 0x%x\n", ++ currfid, currvid); ++ ++ if ((checkvid != currvid) || (checkfid != currfid)) { ++ printk(KERN_ERR PFX ++ "error - out of sync, fid 0x%x 0x%x, vid 0x%x 0x%x\n", ++ checkfid, currfid, checkvid, currvid); ++ } ++ ++ if (transition_frequency(&reqfreq, &minfreq, &maxfreq, ++ relation == ++ CPUFREQ_RELATION_H ? SEARCH_UP : SEARCH_DOWN)) ++ { ++ printk(KERN_ERR PFX "transition frequency failed\n"); ++ return 1; ++ } ++ ++ pol->cur = 1000 * find_freq_from_fid(currfid); ++ ++ return 0; ++} ++ ++/* Driver entry point to verify the policy and range of frequencies */ ++static int ++drv_verify(struct cpufreq_policy *pol) ++{ ++ u32 min = pol->min / 1000; ++ u32 max = pol->max / 1000; ++ u32 targ = min; ++ int res; ++ ++ if (ppst == 0) { ++ printk(KERN_ERR PFX "verify - ppst 0\n"); ++ return -ENODEV; ++ } ++ ++ if (pending_bit_stuck()) { ++ printk(KERN_ERR PFX "failing verify, change pending bit set\n"); ++ return -EIO; ++ } ++ ++ dprintk(KERN_DEBUG PFX ++ "ver: cpu%d, min %d, max %d, cur %d, pol %d\n", pol->cpu, ++ pol->min, pol->max, pol->cur, pol->policy); ++ ++ if (pol->cpu != 0) { ++ printk(KERN_ERR PFX "verify - cpu not 0\n"); ++ return -ENODEV; ++ } ++ ++ res = find_match(&targ, &min, &max, ++ pol->policy == CPUFREQ_POLICY_POWERSAVE ? ++ SEARCH_DOWN : SEARCH_UP, 0, 0); ++ if (!res) { ++ pol->min = min * 1000; ++ pol->max = max * 1000; ++ } ++ return res; ++} ++ ++/* per CPU init entry point to the driver */ ++static int __init ++drv_cpu_init(struct cpufreq_policy *pol) ++{ ++ if (pol->cpu != 0) { ++ printk(KERN_ERR PFX "init not cpu 0\n"); ++ return -ENODEV; ++ } ++ ++ pol->policy = CPUFREQ_POLICY_PERFORMANCE; /* boot as fast as we can */ ++ ++ /* Take a crude guess here. */ ++ pol->cpuinfo.transition_latency = ((rvo + 8) * vstable * VST_UNITS_20US) ++ + (3 * (1 << irt) * 10); ++ ++ if (query_current_values_with_pending_wait()) ++ return -EIO; ++ ++ pol->cur = 1000 * find_freq_from_fid(currfid); ++ dprintk(KERN_DEBUG PFX "policy current frequency %d kHz\n", pol->cur); ++ ++ /* min/max the cpu is capable of */ ++ pol->cpuinfo.min_freq = 1000 * find_freq_from_fid(ppst[0].fid); ++ pol->cpuinfo.max_freq = 1000 * find_freq_from_fid(ppst[numps-1].fid); ++ pol->min = 1000 * find_freq_from_fid(ppst[0].fid); ++ pol->max = 1000 * find_freq_from_fid(ppst[batps - 1].fid); ++ ++ printk(KERN_INFO PFX "cpu_init done, current fid 0x%x, vid 0x%x\n", ++ currfid, currvid); ++ ++ return 0; ++} ++ ++/* driver entry point for init */ ++static int __init ++drv_init(void) ++{ ++ int rc; ++ ++ printk(KERN_INFO PFX VERSION "\n"); ++ ++ if (check_supported_cpu() == 0) ++ return -ENODEV; ++ ++ rc = find_psb_table(); ++ if (rc) ++ return rc; ++ ++ if (pending_bit_stuck()) { ++ printk(KERN_ERR PFX "drv_init fail, change pending bit set\n"); ++ kfree(ppst); ++ return -EIO; ++ } ++ ++ return cpufreq_register_driver(&cpufreq_amd64_driver); ++} ++ ++/* driver entry point for term */ ++static void __exit ++drv_exit(void) ++{ ++ dprintk(KERN_INFO PFX "drv_exit\n"); ++ ++ cpufreq_unregister_driver(&cpufreq_amd64_driver); ++ kfree(ppst); ++} ++ ++MODULE_AUTHOR("Paul Devriendt "); ++MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); ++MODULE_LICENSE("GPL"); ++ ++module_init(drv_init); ++module_exit(drv_exit); +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25/arch/i386/kernel/cpu/cpufreq/powernow-k8.h 2003-10-05 00:33:23.000000000 -0700 +@@ -0,0 +1,126 @@ ++/* ++ * (c) 2003 Advanced Micro Devices, Inc. ++ * Your use of this code is subject to the terms and conditions of the ++ * GNU general public license version 2. See "../../../COPYING" or ++ * http://www.gnu.org/licenses/gpl.html ++ */ ++ ++/* processor's cpuid instruction support */ ++#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ ++#define CPUID_F1_FAM 0x00000f00 /* family mask */ ++#define CPUID_F1_XFAM 0x0ff00000 /* extended family mask */ ++#define CPUID_F1_MOD 0x000000f0 /* model mask */ ++#define CPUID_F1_STEP 0x0000000f /* stepping level mask */ ++#define CPUID_XFAM_MOD 0x0ff00ff0 /* xtended fam, fam + model */ ++#define ATHLON64_XFAM_MOD 0x00000f40 /* xtended fam, fam + model */ ++#define OPTERON_XFAM_MOD 0x00000f50 /* xtended fam, fam + model */ ++#define ATHLON64_REV_C0 8 ++#define CPUID_GET_MAX_CAPABILITIES 0x80000000 ++#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 ++#define P_STATE_TRANSITION_CAPABLE 6 ++ ++/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */ ++/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */ ++/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */ ++/* the register number is placed in ecx, and the data is returned in edx:eax. */ ++ ++#define MSR_FIDVID_CTL 0xc0010041 ++#define MSR_FIDVID_STATUS 0xc0010042 ++ ++/* Field definitions within the FID VID Low Control MSR : */ ++#define MSR_C_LO_INIT_FID_VID 0x00010000 ++#define MSR_C_LO_NEW_VID 0x00001f00 ++#define MSR_C_LO_NEW_FID 0x0000002f ++#define MSR_C_LO_VID_SHIFT 8 ++ ++/* Field definitions within the FID VID High Control MSR : */ ++#define MSR_C_HI_STP_GNT_TO 0x000fffff ++ ++/* Field definitions within the FID VID Low Status MSR : */ ++#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ ++#define MSR_S_LO_MAX_RAMP_VID 0x1f000000 ++#define MSR_S_LO_MAX_FID 0x003f0000 ++#define MSR_S_LO_START_FID 0x00003f00 ++#define MSR_S_LO_CURRENT_FID 0x0000003f ++ ++/* Field definitions within the FID VID High Status MSR : */ ++#define MSR_S_HI_MAX_WORKING_VID 0x001f0000 ++#define MSR_S_HI_START_VID 0x00001f00 ++#define MSR_S_HI_CURRENT_VID 0x0000001f ++ ++/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ ++#define LO_FID_TABLE_TOP 6 ++#define HI_FID_TABLE_BOTTOM 8 ++ ++#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ ++#define HI_VCOFREQ_TABLE_BOTTOM 1600 ++ ++#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ ++ ++#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ ++ ++#define LEAST_VID 0x1e /* Lowest (numerically highest) useful vid value */ ++ ++#define MIN_FREQ 800 /* Min and max freqs, per spec */ ++#define MAX_FREQ 5000 ++ ++#define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */ ++ ++#define INVALID_VID_MASK 0xffffffe0 /* not a valid vid if these bits are set */ ++ ++#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ ++ ++#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ ++ ++#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ ++ ++#define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */ ++ ++/* ++Version 1.4 of the PSB table. This table is constructed by BIOS and is ++to tell the OS's power management driver which VIDs and FIDs are ++supported by this particular processor. This information is obtained from ++the data sheets for each processor model by the system vendor and ++incorporated into the BIOS. ++If the data in the PSB / PST is wrong, then this driver will program the ++wrong values into hardware, which is very likely to lead to a crash. ++*/ ++ ++#define PSB_ID_STRING "AMDK7PNOW!" ++#define PSB_ID_STRING_LEN 10 ++ ++#define PSB_VERSION_1_4 0x14 ++ ++struct psb_s { ++ u8 signature[10]; ++ u8 tableversion; ++ u8 flags1; ++ u16 voltagestabilizationtime; ++ u8 flags2; ++ u8 numpst; ++ u32 cpuid; ++ u8 plllocktime; ++ u8 maxfid; ++ u8 maxvid; ++ u8 numpstates; ++}; ++ ++/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */ ++struct pst_s { ++ u8 fid; ++ u8 vid; ++}; ++ ++#ifdef DEBUG ++#define dprintk(msg...) printk(msg) ++#else ++#define dprintk(msg...) do { } while(0) ++#endif ++ ++static inline int core_voltage_pre_transition(u32 reqvid); ++static inline int core_voltage_post_transition(u32 reqvid); ++static inline int core_frequency_transition(u32 reqfid); ++static int drv_verify(struct cpufreq_policy *pol); ++static int drv_target(struct cpufreq_policy *pol, unsigned targfreq, ++ unsigned relation); ++static int __init drv_cpu_init(struct cpufreq_policy *pol); +--- linux-2.6.0-test6/arch/i386/kernel/cpu/intel.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/kernel/cpu/intel.c 2003-10-05 00:36:48.000000000 -0700 +@@ -8,11 +8,10 @@ + #include + #include + #include ++#include + + #include "cpu.h" + +-extern int trap_init_f00f_bug(void); +- + #ifdef CONFIG_X86_INTEL_USERCOPY + /* + * Alignment at which movsl is preferred for bulk memory copies. +@@ -157,7 +156,7 @@ static void __init init_intel(struct cpu + + c->f00f_bug = 1; + if ( !f00f_workaround_enabled ) { +- trap_init_f00f_bug(); ++ trap_init_virtual_IDT(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } +@@ -238,12 +237,9 @@ static void __init init_intel(struct cpu + } + + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ +- if ( c->x86 == 6) { +- unsigned model_mask = (c->x86_model << 8) + c->x86_mask; +- if (model_mask < 0x0303) +- clear_bit(X86_FEATURE_SEP, c->x86_capability); +- } +- ++ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) ++ clear_bit(X86_FEATURE_SEP, c->x86_capability); ++ + /* Names for the Pentium II/Celeron processors + detectable only by also checking the cache size. + Dixon is NOT a Celeron. */ +--- linux-2.6.0-test6/arch/i386/kernel/cpu/mcheck/k7.c 2003-08-08 22:55:10.000000000 -0700 ++++ 25/arch/i386/kernel/cpu/mcheck/k7.c 2003-10-05 00:33:23.000000000 -0700 +@@ -17,7 +17,7 @@ + #include "mce.h" + + /* Machine Check Handler For AMD Athlon/Duron */ +-static void k7_machine_check(struct pt_regs * regs, long error_code) ++static asmlinkage void k7_machine_check(struct pt_regs * regs, long error_code) + { + int recover=1; + u32 alow, ahigh, high, low; +@@ -31,7 +31,7 @@ static void k7_machine_check(struct pt_r + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + +- for (i=0; i"), +- MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1011"), NO_MATCH }}, +- + { force_acpi_ht, "ABIT i440BX-W83977", { + MATCH(DMI_BOARD_VENDOR, "ABIT "), + MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), +@@ -978,7 +973,10 @@ static __initdata struct dmi_blacklist d + { disable_acpi_pci, "ASUS A7V", { + MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), + MATCH(DMI_BOARD_NAME, ""), +- MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), NO_MATCH }}, ++ /* newer BIOS, Revision 1011, does work */ ++ MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), ++ NO_MATCH }}, ++ + #endif + + { NULL, } +--- linux-2.6.0-test6/arch/i386/kernel/doublefault.c 2003-08-22 19:23:40.000000000 -0700 ++++ 25/arch/i386/kernel/doublefault.c 2003-10-05 00:36:48.000000000 -0700 +@@ -7,12 +7,13 @@ + #include + #include + #include ++#include + + #define DOUBLEFAULT_STACKSIZE (1024) + static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; + #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) + +-#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000) ++#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START)) + + static void doublefault_fn(void) + { +@@ -38,8 +39,8 @@ static void doublefault_fn(void) + + printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", + t->eax, t->ebx, t->ecx, t->edx); +- printk("esi = %08lx, edi = %08lx\n", +- t->esi, t->edi); ++ printk("esi = %08lx, edi = %08lx, ebp = %08lx\n", ++ t->esi, t->edi, t->ebp); + } + } + +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25/arch/i386/kernel/efi.c 2003-10-05 00:36:25.000000000 -0700 +@@ -0,0 +1,611 @@ ++/* ++ * Extensible Firmware Interface ++ * ++ * Based on Extensible Firmware Interface Specification version 1.0 ++ * ++ * Copyright (C) 1999 VA Linux Systems ++ * Copyright (C) 1999 Walt Drummond ++ * Copyright (C) 1999-2002 Hewlett-Packard Co. ++ * David Mosberger-Tang ++ * Stephane Eranian ++ * ++ * All EFI Runtime Services are not implemented yet as EFI only ++ * supports physical mode addressing on SoftSDV. This is to be fixed ++ * in a future version. --drummond 1999-07-20 ++ * ++ * Implemented EFI runtime services and virtual mode calls. --davidm ++ * ++ * Goutham Rao: ++ * Skip non-WB memory and ignore empty memory ranges. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define EFI_DEBUG 0 ++#define PFX "EFI: " ++ ++extern efi_status_t asmlinkage efi_call_phys(void *, ...); ++ ++struct efi efi; ++struct efi efi_phys __initdata; ++struct efi_memory_map memmap __initdata; ++ ++/* ++ * We require an early boot_ioremap mapping mechanism initially ++ */ ++extern void * boot_ioremap(unsigned long, unsigned long); ++ ++/* ++ * efi_dir is allocated here, but the directory isn't created ++ * here, as proc_mkdir() doesn't work this early in the bootup ++ * process. Therefore, each module, like efivars, must test for ++ * if (!efi_dir) efi_dir = proc_mkdir("efi", NULL); ++ * prior to creating their own entries under /proc/efi. ++ */ ++#ifdef CONFIG_PROC_FS ++struct proc_dir_entry *efi_dir; ++#endif ++ ++ ++/* ++ * To make EFI call EFI runtime service in physical addressing mode we need ++ * prelog/epilog before/after the invocation to disable interrupt, to ++ * claim EFI runtime service handler exclusively and to duplicate a memory in ++ * low memory space say 0 - 3G. ++ */ ++ ++static unsigned long efi_rt_eflags; ++static spinlock_t efi_rt_lock = SPIN_LOCK_UNLOCKED; ++static pgd_t efi_bak_pg_dir_pointer[2]; ++ ++static void efi_call_phys_prelog(void) ++{ ++ unsigned long cr4; ++ unsigned long temp; ++ ++ spin_lock(&efi_rt_lock); ++ local_irq_save(efi_rt_eflags); ++ ++ /* ++ * If I don't have PSE, I should just duplicate two entries in page ++ * directory. If I have PSE, I just need to duplicate one entry in ++ * page directory. ++ */ ++ __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); ++ ++ if (cr4 & X86_CR4_PSE) { ++ efi_bak_pg_dir_pointer[0].pgd = ++ swapper_pg_dir[pgd_index(0)].pgd; ++ swapper_pg_dir[0].pgd = ++ swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; ++ } else { ++ efi_bak_pg_dir_pointer[0].pgd = ++ swapper_pg_dir[pgd_index(0)].pgd; ++ efi_bak_pg_dir_pointer[1].pgd = ++ swapper_pg_dir[pgd_index(0x400000)].pgd; ++ swapper_pg_dir[pgd_index(0)].pgd = ++ swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; ++ temp = PAGE_OFFSET + 0x400000; ++ swapper_pg_dir[pgd_index(0x400000)].pgd = ++ swapper_pg_dir[pgd_index(temp)].pgd; ++ } ++ ++ /* ++ * After the lock is released, the original page table is restored. ++ */ ++ local_flush_tlb(); ++ ++ cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); ++ __asm__ __volatile__("lgdt %0":"=m" ++ (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]))); ++} ++ ++static void efi_call_phys_epilog(void) ++{ ++ unsigned long cr4; ++ ++ cpu_gdt_descr[0].address = ++ (unsigned long) __va(cpu_gdt_descr[0].address); ++ __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); ++ __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); ++ ++ if (cr4 & X86_CR4_PSE) { ++ swapper_pg_dir[pgd_index(0)].pgd = ++ efi_bak_pg_dir_pointer[0].pgd; ++ } else { ++ swapper_pg_dir[pgd_index(0)].pgd = ++ efi_bak_pg_dir_pointer[0].pgd; ++ swapper_pg_dir[pgd_index(0x400000)].pgd = ++ efi_bak_pg_dir_pointer[1].pgd; ++ } ++ ++ /* ++ * After the lock is released, the original page table is restored. ++ */ ++ local_flush_tlb(); ++ ++ local_irq_restore(efi_rt_eflags); ++ spin_unlock(&efi_rt_lock); ++} ++ ++static efi_status_t ++phys_efi_set_virtual_address_map(unsigned long memory_map_size, ++ unsigned long descriptor_size, ++ u32 descriptor_version, ++ efi_memory_desc_t *virtual_map) ++{ ++ efi_status_t status; ++ ++ efi_call_phys_prelog(); ++ status = efi_call_phys(efi_phys.set_virtual_address_map, ++ memory_map_size, descriptor_size, ++ descriptor_version, virtual_map); ++ efi_call_phys_epilog(); ++ return status; ++} ++ ++efi_status_t ++phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) ++{ ++ efi_status_t status; ++ ++ efi_call_phys_prelog(); ++ status = efi_call_phys(efi_phys.get_time, tm, tc); ++ efi_call_phys_epilog(); ++ return status; ++} ++ ++void efi_gettimeofday(struct timespec *tv) ++{ ++ efi_time_t tm; ++ ++ memset(tv, 0, sizeof(*tv)); ++ if ((*efi.get_time) (&tm, 0) != EFI_SUCCESS) ++ return; ++ ++ tv->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, ++ tm.second); ++ tv->tv_nsec = tm.nanosecond; ++} ++ ++int is_available_memory(efi_memory_desc_t * md) ++{ ++ if (!(md->attribute & EFI_MEMORY_WB)) ++ return 0; ++ ++ switch (md->type) { ++ case EFI_LOADER_CODE: ++ case EFI_LOADER_DATA: ++ case EFI_BOOT_SERVICES_CODE: ++ case EFI_BOOT_SERVICES_DATA: ++ case EFI_CONVENTIONAL_MEMORY: ++ return 1; ++ } ++ return 0; ++} ++ ++/* ++ * We need to map the EFI memory map again after paging_init(). ++ */ ++void __init efi_map_memmap(void) ++{ ++ memmap.map = NULL; ++ ++ memmap.map = (efi_memory_desc_t *) ++ bt_ioremap((unsigned long) memmap.phys_map, ++ (memmap.nr_map * sizeof(efi_memory_desc_t))); ++ ++ if (memmap.map == NULL) ++ printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); ++} ++ ++void __init print_efi_memmap(void) ++{ ++ efi_memory_desc_t *md; ++ int i; ++ ++ for (i = 0; i < memmap.nr_map; i++) { ++ md = &memmap.map[i]; ++ printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " ++ "range=[0x%016llx-0x%016llx) (%lluMB)\n", ++ i, md->type, md->attribute, md->phys_addr, ++ md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), ++ (md->num_pages >> (20 - EFI_PAGE_SHIFT))); ++ } ++} ++ ++/* ++ * Walks the EFI memory map and calls CALLBACK once for each EFI ++ * memory descriptor that has memory that is available for kernel use. ++ */ ++void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) ++{ ++ int prev_valid = 0; ++ struct range { ++ unsigned long start; ++ unsigned long end; ++ } prev, curr; ++ efi_memory_desc_t *md; ++ unsigned long start, end; ++ int i; ++ ++ for (i = 0; i < memmap.nr_map; i++) { ++ md = &memmap.map[i]; ++ ++ if ((md->num_pages == 0) || (!is_available_memory(md))) ++ continue; ++ ++ curr.start = md->phys_addr; ++ curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); ++ ++ if (!prev_valid) { ++ prev = curr; ++ prev_valid = 1; ++ } else { ++ if (curr.start < prev.start) ++ printk(KERN_INFO PFX "Unordered memory map\n"); ++ if (prev.end == curr.start) ++ prev.end = curr.end; ++ else { ++ start = ++ (unsigned long) (PAGE_ALIGN(prev.start)); ++ end = (unsigned long) (prev.end & PAGE_MASK); ++ if ((end > start) ++ && (*callback) (start, end, arg) < 0) ++ return; ++ prev = curr; ++ } ++ } ++ } ++ if (prev_valid) { ++ start = (unsigned long) PAGE_ALIGN(prev.start); ++ end = (unsigned long) (prev.end & PAGE_MASK); ++ if (end > start) ++ (*callback) (start, end, arg); ++ } ++} ++ ++void __init efi_init(void) ++{ ++ efi_config_table_t *config_tables; ++ efi_runtime_services_t *runtime; ++ efi_char16_t *c16; ++ char vendor[100] = "unknown"; ++ unsigned long num_config_tables; ++ int i = 0; ++ ++ memset(&efi, 0, sizeof(efi) ); ++ memset(&efi_phys, 0, sizeof(efi_phys)); ++ ++ efi_phys.systab = EFI_SYSTAB; ++ memmap.phys_map = EFI_MEMMAP; ++ memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; ++ memmap.desc_version = EFI_MEMDESC_VERSION; ++ ++ efi.systab = (efi_system_table_t *) ++ boot_ioremap((unsigned long) efi_phys.systab, ++ sizeof(efi_system_table_t)); ++ /* ++ * Verify the EFI Table ++ */ ++ if (efi.systab == NULL) ++ printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n"); ++ if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) ++ printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n"); ++ if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0) ++ printk(KERN_ERR PFX ++ "Warning: EFI system table major version mismatch: " ++ "got %d.%02d, expected %d.%02d\n", ++ efi.systab->hdr.revision >> 16, ++ efi.systab->hdr.revision & 0xffff, ++ EFI_SYSTEM_TABLE_REVISION >> 16, ++ EFI_SYSTEM_TABLE_REVISION & 0xffff); ++ /* ++ * Grab some details from the system table ++ */ ++ num_config_tables = efi.systab->nr_tables; ++ config_tables = (efi_config_table_t *)efi.systab->tables; ++ runtime = efi.systab->runtime; ++ ++ /* ++ * Show what we know for posterity ++ */ ++ c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2); ++ if (c16) { ++ for (i = 0; i < sizeof(vendor) && *c16; ++i) ++ vendor[i] = *c16++; ++ vendor[i] = '\0'; ++ } else ++ printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); ++ ++ printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n", ++ efi.systab->hdr.revision >> 16, ++ efi.systab->hdr.revision & 0xffff, vendor); ++ ++ /* ++ * Let's see what config tables the firmware passed to us. ++ */ ++ config_tables = (efi_config_table_t *) ++ boot_ioremap((unsigned long) config_tables, ++ num_config_tables * sizeof(efi_config_table_t)); ++ ++ if (config_tables == NULL) ++ printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n"); ++ ++ for (i = 0; i < num_config_tables; i++) { ++ if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { ++ efi.mps = (void *)config_tables[i].table; ++ printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table); ++ } else ++ if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { ++ efi.acpi20 = __va(config_tables[i].table); ++ printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table); ++ } else ++ if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { ++ efi.acpi = __va(config_tables[i].table); ++ printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table); ++ } else ++ if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { ++ efi.smbios = (void *) config_tables[i].table; ++ printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table); ++ } else ++ if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { ++ efi.hcdp = (void *)config_tables[i].table; ++ printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table); ++ } else ++ if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) { ++ efi.uga = (void *)config_tables[i].table; ++ printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table); ++ } ++ } ++ printk("\n"); ++ ++ /* ++ * Check out the runtime services table. We need to map ++ * the runtime services table so that we can grab the physical ++ * address of several of the EFI runtime functions, needed to ++ * set the firmware into virtual mode. ++ */ ++ ++ runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long) ++ runtime, ++ sizeof(efi_runtime_services_t)); ++ if (runtime != NULL) { ++ /* ++ * We will only need *early* access to the following ++ * two EFI runtime services before set_virtual_address_map ++ * is invoked. ++ */ ++ efi_phys.get_time = (efi_get_time_t *) runtime->get_time; ++ efi_phys.set_virtual_address_map = ++ (efi_set_virtual_address_map_t *) ++ runtime->set_virtual_address_map; ++ } else ++ printk(KERN_ERR PFX "Could not map the runtime service table!\n"); ++ ++ /* Map the EFI memory map for use until paging_init() */ ++ ++ memmap.map = (efi_memory_desc_t *) ++ boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); ++ ++ if (memmap.map == NULL) ++ printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); ++ ++ if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) { ++ printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't " ++ "match the one from EFI!\n"); ++ } ++#if EFI_DEBUG ++ print_efi_memmap(); ++#endif ++} ++ ++/* ++ * This function will switch the EFI runtime services to virtual mode. ++ * Essentially, look through the EFI memmap and map every region that ++ * has the runtime attribute bit set in its memory descriptor and update ++ * that memory descriptor with the virtual address obtained from ioremap(). ++ * This enables the runtime services to be called without having to ++ * thunk back into physical mode for every invocation. ++ */ ++ ++void __init efi_enter_virtual_mode(void) ++{ ++ efi_memory_desc_t *md; ++ efi_status_t status; ++ int i; ++ ++ efi.systab = NULL; ++ ++ for (i = 0; i < memmap.nr_map; i++) { ++ md = &memmap.map[i]; ++ ++ if (md->attribute & EFI_MEMORY_RUNTIME) { ++ md->virt_addr = ++ (unsigned long)ioremap(md->phys_addr, ++ md->num_pages << EFI_PAGE_SHIFT); ++ if (!(unsigned long)md->virt_addr) { ++ printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", ++ (unsigned long)md->phys_addr); ++ } ++ ++ if (((unsigned long)md->phys_addr <= ++ (unsigned long)efi_phys.systab) && ++ ((unsigned long)efi_phys.systab < ++ md->phys_addr + ++ ((unsigned long)md->num_pages << ++ EFI_PAGE_SHIFT))) { ++ unsigned long addr; ++ ++ addr = md->virt_addr - md->phys_addr + ++ (unsigned long)efi_phys.systab; ++ efi.systab = (efi_system_table_t *)addr; ++ } ++ } ++ } ++ ++ if (!efi.systab) ++ BUG(); ++ ++ status = phys_efi_set_virtual_address_map( ++ sizeof(efi_memory_desc_t) * memmap.nr_map, ++ sizeof(efi_memory_desc_t), ++ memmap.desc_version, ++ memmap.phys_map); ++ ++ if (status != EFI_SUCCESS) { ++ printk (KERN_ALERT "You are screwed! " ++ "Unable to switch EFI into virtual mode " ++ "(status=%lx)\n", status); ++ panic("EFI call to SetVirtualAddressMap() failed!"); ++ } ++ ++ /* ++ * Now that EFI is in virtual mode, update the function ++ * pointers in the runtime service table to the new virtual addresses. ++ */ ++ ++ efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time; ++ efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time; ++ efi.get_wakeup_time = (efi_get_wakeup_time_t *) ++ efi.systab->runtime->get_wakeup_time; ++ efi.set_wakeup_time = (efi_set_wakeup_time_t *) ++ efi.systab->runtime->set_wakeup_time; ++ efi.get_variable = (efi_get_variable_t *) ++ efi.systab->runtime->get_variable; ++ efi.get_next_variable = (efi_get_next_variable_t *) ++ efi.systab->runtime->get_next_variable; ++ efi.set_variable = (efi_set_variable_t *) ++ efi.systab->runtime->set_variable; ++ efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *) ++ efi.systab->runtime->get_next_high_mono_count; ++ efi.reset_system = (efi_reset_system_t *) ++ efi.systab->runtime->reset_system; ++} ++ ++void __init ++efi_initialize_iomem_resources(struct resource *code_resource, ++ struct resource *data_resource) ++{ ++ struct resource *res; ++ efi_memory_desc_t *md; ++ int i; ++ ++ for (i = 0; i < memmap.nr_map; i++) { ++ md = &memmap.map[i]; ++ ++ if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > ++ 0x100000000ULL) ++ continue; ++ res = alloc_bootmem_low(sizeof(struct resource)); ++ switch (md->type) { ++ case EFI_RESERVED_TYPE: ++ res->name = "Reserved Memory"; ++ break; ++ case EFI_LOADER_CODE: ++ res->name = "Loader Code"; ++ break; ++ case EFI_LOADER_DATA: ++ res->name = "Loader Data"; ++ break; ++ case EFI_BOOT_SERVICES_DATA: ++ res->name = "BootServices Data"; ++ break; ++ case EFI_BOOT_SERVICES_CODE: ++ res->name = "BootServices Code"; ++ break; ++ case EFI_RUNTIME_SERVICES_CODE: ++ res->name = "Runtime Service Code"; ++ break; ++ case EFI_RUNTIME_SERVICES_DATA: ++ res->name = "Runtime Service Data"; ++ break; ++ case EFI_CONVENTIONAL_MEMORY: ++ res->name = "Conventional Memory"; ++ break; ++ case EFI_UNUSABLE_MEMORY: ++ res->name = "Unusable Memory"; ++ break; ++ case EFI_ACPI_RECLAIM_MEMORY: ++ res->name = "ACPI Reclaim"; ++ break; ++ case EFI_ACPI_MEMORY_NVS: ++ res->name = "ACPI NVS"; ++ break; ++ case EFI_MEMORY_MAPPED_IO: ++ res->name = "Memory Mapped IO"; ++ break; ++ case EFI_MEMORY_MAPPED_IO_PORT_SPACE: ++ res->name = "Memory Mapped IO Port Space"; ++ break; ++ default: ++ res->name = "Reserved"; ++ break; ++ } ++ res->start = md->phys_addr; ++ res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1); ++ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; ++ if (request_resource(&iomem_resource, res) < 0) ++ printk(KERN_ERR PFX "Failed to allocate res %s : 0x%lx-0x%lx\n", ++ res->name, res->start, res->end); ++ /* ++ * We don't know which region contains kernel data so we try ++ * it repeatedly and let the resource manager test it. ++ */ ++ if (md->type == EFI_CONVENTIONAL_MEMORY) { ++ request_resource(res, code_resource); ++ request_resource(res, data_resource); ++ } ++ } ++} ++ ++/* ++ * Convenience functions to obtain memory types and attributes ++ */ ++ ++u32 efi_mem_type(unsigned long phys_addr) ++{ ++ efi_memory_desc_t *md; ++ int i; ++ ++ for (i = 0; i < memmap.nr_map; i++) { ++ md = &memmap.map[i]; ++ if ((md->phys_addr <= phys_addr) && (phys_addr < ++ (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) ++ return md->type; ++ } ++ return 0; ++} ++ ++u64 efi_mem_attributes(unsigned long phys_addr) ++{ ++ efi_memory_desc_t *md; ++ int i; ++ ++ for (i = 0; i < memmap.nr_map; i++) { ++ md = &memmap.map[i]; ++ if ((md->phys_addr <= phys_addr) && (phys_addr < ++ (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) ++ return md->attribute; ++ } ++ return 0; ++} +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25/arch/i386/kernel/efi_stub.S 2003-10-05 00:36:22.000000000 -0700 +@@ -0,0 +1,125 @@ ++/* ++ * EFI call stub for IA32. ++ * ++ * This stub allows us to make EFI calls in physical mode with interrupts ++ * turned off. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++/* ++ * efi_call_phys(void *, ...) is a function with variable parameters. ++ * All the callers of this function assure that all the parameters are 4-bytes. ++ */ ++ ++/* ++ * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. ++ * So we'd better save all of them at the beginning of this function and restore ++ * at the end no matter how many we use, because we can not assure EFI runtime ++ * service functions will comply with gcc calling convention, too. ++ */ ++ ++.text ++.section .text, "a" ++ENTRY(efi_call_phys) ++ /* ++ * 0. The function can only be called in Linux kernel. So CS has been ++ * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found ++ * the values of these registers are the same. And, the corresponding ++ * GDT entries are identical. So I will do nothing about segment reg ++ * and GDT, but change GDT base register in prelog and epilog. ++ */ ++ ++ /* ++ * 1. Now I am running with EIP = + PAGE_OFFSET. ++ * But to make it smoothly switch from virtual mode to flat mode. ++ * The mapping of lower virtual memory has been created in prelog and ++ * epilog. ++ */ ++ movl $1f, %edx ++ subl $__PAGE_OFFSET, %edx ++ jmp *%edx ++1: ++ ++ /* ++ * 2. Now on the top of stack is the return ++ * address in the caller of efi_call_phys(), then parameter 1, ++ * parameter 2, ..., param n. To make things easy, we save the return ++ * address of efi_call_phys in a global variable. ++ */ ++ popl %edx ++ movl %edx, saved_return_addr ++ /* get the function pointer into ECX*/ ++ popl %ecx ++ movl %ecx, efi_rt_function_ptr ++ movl $2f, %edx ++ subl $__PAGE_OFFSET, %edx ++ pushl %edx ++ ++ /* ++ * 3. Clear PG bit in %CR0. ++ */ ++ movl %cr0, %edx ++ andl $0x7fffffff, %edx ++ movl %edx, %cr0 ++ jmp 1f ++1: ++ ++ /* ++ * 4. Adjust stack pointer. ++ */ ++ subl $__PAGE_OFFSET, %esp ++ ++ /* ++ * 5. Call the physical function. ++ */ ++ jmp *%ecx ++ ++2: ++ /* ++ * 6. After EFI runtime service returns, control will return to ++ * following instruction. We'd better readjust stack pointer first. ++ */ ++ addl $__PAGE_OFFSET, %esp ++ ++ /* ++ * 7. Restore PG bit ++ */ ++ movl %cr0, %edx ++ orl $0x80000000, %edx ++ movl %edx, %cr0 ++ jmp 1f ++1: ++ /* ++ * 8. Now restore the virtual mode from flat mode by ++ * adding EIP with PAGE_OFFSET. ++ */ ++ movl $1f, %edx ++ jmp *%edx ++1: ++ ++ /* ++ * 9. Balance the stack. And because EAX contain the return value, ++ * we'd better not clobber it. ++ */ ++ leal efi_rt_function_ptr, %edx ++ movl (%edx), %ecx ++ pushl %ecx ++ ++ /* ++ * 10. Push the saved return address onto the stack and return. ++ */ ++ leal saved_return_addr, %edx ++ movl (%edx), %ecx ++ pushl %ecx ++ ret ++.previous ++ ++.data ++saved_return_addr: ++ .long 0 ++efi_rt_function_ptr: ++ .long 0 +--- linux-2.6.0-test6/arch/i386/kernel/entry.S 2003-08-22 19:23:40.000000000 -0700 ++++ 25/arch/i386/kernel/entry.S 2003-10-05 00:36:48.000000000 -0700 +@@ -43,11 +43,25 @@ + #include + #include + #include ++#include + #include + #include ++#include + #include + #include + #include "irq_vectors.h" ++ /* We do not recover from a stack overflow, but at least ++ * we know it happened and should be able to track it down. ++ */ ++#ifdef CONFIG_STACK_OVERFLOW_TEST ++#define STACK_OVERFLOW_TEST \ ++ testl $7680,%esp; \ ++ jnz 10f; \ ++ call stack_overflow; \ ++10: ++#else ++#define STACK_OVERFLOW_TEST ++#endif + + EBX = 0x00 + ECX = 0x04 +@@ -85,7 +99,102 @@ TSS_ESP0_OFFSET = (4 - 0x200) + #define resume_kernel restore_all + #endif + +-#define SAVE_ALL \ ++#ifdef CONFIG_X86_HIGH_ENTRY ++ ++#ifdef CONFIG_X86_SWITCH_PAGETABLES ++ ++#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) ++/* ++ * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu, ++ * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is ++ * left stale, so we must check whether to repeat the real stack calculation. ++ */ ++#define repeat_if_esp_changed \ ++ xorl %esp, %ebp; \ ++ testl $0xffffe000, %ebp; \ ++ jnz 0b ++#else ++#define repeat_if_esp_changed ++#endif ++ ++/* clobbers ebx, edx and ebp */ ++ ++#define __SWITCH_KERNELSPACE \ ++ cmpl $0xff000000, %esp; \ ++ jb 1f; \ ++ \ ++ /* \ ++ * switch pagetables and load the real stack, \ ++ * keep the stack offset: \ ++ */ \ ++ \ ++ movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \ ++ \ ++ /* GET_THREAD_INFO(%ebp) intermixed */ \ ++0: \ ++ movl %esp, %ebp; \ ++ movl %esp, %ebx; \ ++ andl $0xffffe000, %ebp; \ ++ andl $0x00001fff, %ebx; \ ++ orl TI_real_stack(%ebp), %ebx; \ ++ repeat_if_esp_changed; \ ++ \ ++ movl %edx, %cr3; \ ++ movl %ebx, %esp; \ ++1: ++ ++#endif ++ ++ ++#define __SWITCH_USERSPACE \ ++ /* interrupted any of the user return paths? */ \ ++ \ ++ movl EIP(%esp), %eax; \ ++ \ ++ cmpl $int80_ret_start_marker, %eax; \ ++ jb 33f; /* nope - continue with sysexit check */\ ++ cmpl $int80_ret_end_marker, %eax; \ ++ jb 22f; /* yes - switch to virtual stack */ \ ++33: \ ++ cmpl $sysexit_ret_start_marker, %eax; \ ++ jb 44f; /* nope - continue with user check */ \ ++ cmpl $sysexit_ret_end_marker, %eax; \ ++ jb 22f; /* yes - switch to virtual stack */ \ ++ /* return to userspace? */ \ ++44: \ ++ movl EFLAGS(%esp),%ecx; \ ++ movb CS(%esp),%cl; \ ++ testl $(VM_MASK | 3),%ecx; \ ++ jz 2f; \ ++22: \ ++ /* \ ++ * switch to the virtual stack, then switch to \ ++ * the userspace pagetables. \ ++ */ \ ++ \ ++ GET_THREAD_INFO(%ebp); \ ++ movl TI_virtual_stack(%ebp), %edx; \ ++ movl TI_user_pgd(%ebp), %ecx; \ ++ \ ++ movl %esp, %ebx; \ ++ andl $0x1fff, %ebx; \ ++ orl %ebx, %edx; \ ++int80_ret_start_marker: \ ++ movl %edx, %esp; \ ++ movl %ecx, %cr3; \ ++ \ ++ __RESTORE_ALL; \ ++int80_ret_end_marker: \ ++2: ++ ++#else /* !CONFIG_X86_HIGH_ENTRY */ ++ ++#define __SWITCH_KERNELSPACE ++#define __SWITCH_USERSPACE ++ ++#endif ++ ++#define __SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ +@@ -100,7 +209,7 @@ TSS_ESP0_OFFSET = (4 - 0x200) + movl %edx, %ds; \ + movl %edx, %es; + +-#define RESTORE_INT_REGS \ ++#define __RESTORE_INT_REGS \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ +@@ -109,29 +218,28 @@ TSS_ESP0_OFFSET = (4 - 0x200) + popl %ebp; \ + popl %eax + +-#define RESTORE_REGS \ +- RESTORE_INT_REGS; \ +-1: popl %ds; \ +-2: popl %es; \ ++#define __RESTORE_REGS \ ++ __RESTORE_INT_REGS; \ ++111: popl %ds; \ ++222: popl %es; \ + .section .fixup,"ax"; \ +-3: movl $0,(%esp); \ +- jmp 1b; \ +-4: movl $0,(%esp); \ +- jmp 2b; \ ++444: movl $0,(%esp); \ ++ jmp 111b; \ ++555: movl $0,(%esp); \ ++ jmp 222b; \ + .previous; \ + .section __ex_table,"a";\ + .align 4; \ +- .long 1b,3b; \ +- .long 2b,4b; \ ++ .long 111b,444b;\ ++ .long 222b,555b;\ + .previous + +- +-#define RESTORE_ALL \ +- RESTORE_REGS \ ++#define __RESTORE_ALL \ ++ __RESTORE_REGS \ + addl $4, %esp; \ +-1: iret; \ ++333: iret; \ + .section .fixup,"ax"; \ +-2: sti; \ ++666: sti; \ + movl $(__USER_DS), %edx; \ + movl %edx, %ds; \ + movl %edx, %es; \ +@@ -140,10 +248,19 @@ TSS_ESP0_OFFSET = (4 - 0x200) + .previous; \ + .section __ex_table,"a";\ + .align 4; \ +- .long 1b,2b; \ ++ .long 333b,666b;\ + .previous + ++#define SAVE_ALL \ ++ __SAVE_ALL; \ ++ __SWITCH_KERNELSPACE; \ ++ STACK_OVERFLOW_TEST; ++ ++#define RESTORE_ALL \ ++ __SWITCH_USERSPACE; \ ++ __RESTORE_ALL; + ++.section .entry.text,"ax" + + ENTRY(lcall7) + pushfl # We get a different stack layout with call +@@ -161,7 +278,7 @@ do_lcall: + movl %edx,EIP(%ebp) # Now we move them to their "normal" places + movl %ecx,CS(%ebp) # + andl $-8192, %ebp # GET_THREAD_INFO +- movl TI_EXEC_DOMAIN(%ebp), %edx # Get the execution domain ++ movl TI_exec_domain(%ebp), %edx # Get the execution domain + call *4(%edx) # Call the lcall7 handler for the domain + addl $4, %esp + popl %eax +@@ -206,7 +323,7 @@ ENTRY(resume_userspace) + cli # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret +- movl TI_FLAGS(%ebp), %ecx ++ movl TI_flags(%ebp), %ecx + andl $_TIF_WORK_MASK, %ecx # is there any work to be done on + # int/exception return? + jne work_pending +@@ -214,18 +331,18 @@ ENTRY(resume_userspace) + + #ifdef CONFIG_PREEMPT + ENTRY(resume_kernel) +- cmpl $0,TI_PRE_COUNT(%ebp) # non-zero preempt_count ? ++ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? + jnz restore_all + need_resched: +- movl TI_FLAGS(%ebp), %ecx # need_resched set ? ++ movl TI_flags(%ebp), %ecx # need_resched set ? + testb $_TIF_NEED_RESCHED, %cl + jz restore_all + testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? + jz restore_all +- movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebp) ++ movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp) + sti + call schedule +- movl $0,TI_PRE_COUNT(%ebp) ++ movl $0,TI_preempt_count(%ebp) + cli + jmp need_resched + #endif +@@ -244,37 +361,50 @@ sysenter_past_esp: + pushl $(__USER_CS) + pushl $SYSENTER_RETURN + +-/* +- * Load the potential sixth argument from user stack. +- * Careful about security. +- */ +- cmpl $__PAGE_OFFSET-3,%ebp +- jae syscall_fault +-1: movl (%ebp),%ebp +-.section __ex_table,"a" +- .align 4 +- .long 1b,syscall_fault +-.previous +- + pushl %eax + SAVE_ALL + GET_THREAD_INFO(%ebp) + cmpl $(nr_syscalls), %eax + jae syscall_badsys + +- testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) ++ testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) + jnz syscall_trace_entry + call *sys_call_table(,%eax,4) + movl %eax,EAX(%esp) + cli +- movl TI_FLAGS(%ebp), %ecx ++ movl TI_flags(%ebp), %ecx + testw $_TIF_ALLWORK_MASK, %cx + jne syscall_exit_work ++ ++#ifdef CONFIG_X86_SWITCH_PAGETABLES ++ ++ GET_THREAD_INFO(%ebp) ++ movl TI_virtual_stack(%ebp), %edx ++ movl TI_user_pgd(%ebp), %ecx ++ movl %esp, %ebx ++ andl $0x1fff, %ebx ++ orl %ebx, %edx ++sysexit_ret_start_marker: ++ movl %edx, %esp ++ movl %ecx, %cr3 ++#endif ++ /* ++ * only ebx is not restored by the userspace sysenter vsyscall ++ * code, it assumes it to be callee-saved. ++ */ ++ movl EBX(%esp), %ebx ++ + /* if something modifies registers it must also disable sysexit */ ++ + movl EIP(%esp), %edx + movl OLDESP(%esp), %ecx ++ + sti + sysexit ++#ifdef CONFIG_X86_SWITCH_PAGETABLES ++sysexit_ret_end_marker: ++ nop ++#endif + + + # system call handler stub +@@ -285,7 +415,7 @@ ENTRY(system_call) + cmpl $(nr_syscalls), %eax + jae syscall_badsys + # system call tracing in operation +- testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) ++ testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) + jnz syscall_trace_entry + syscall_call: + call *sys_call_table(,%eax,4) +@@ -294,10 +424,23 @@ syscall_exit: + cli # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret +- movl TI_FLAGS(%ebp), %ecx ++ movl TI_flags(%ebp), %ecx + testw $_TIF_ALLWORK_MASK, %cx # current->work + jne syscall_exit_work + restore_all: ++#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS ++ movl EFLAGS(%esp), %eax # mix EFLAGS and CS ++ movb CS(%esp), %al ++ testl $(VM_MASK | 3), %eax ++ jz resume_kernelX # returning to kernel or vm86-space ++ ++ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? ++ jz resume_kernelX ++ ++ int $3 ++ ++resume_kernelX: ++#endif + RESTORE_ALL + + # perform work that needs to be done immediately before resumption +@@ -310,7 +453,7 @@ work_resched: + cli # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret +- movl TI_FLAGS(%ebp), %ecx ++ movl TI_flags(%ebp), %ecx + andl $_TIF_WORK_MASK, %ecx # is there any work to be done other + # than syscall tracing? + jz restore_all +@@ -325,6 +468,22 @@ work_notifysig: # deal with pending s + # vm86-space + xorl %edx, %edx + call do_notify_resume ++ ++#if CONFIG_X86_HIGH_ENTRY ++ /* ++ * Reload db7 if necessary: ++ */ ++ movl TI_flags(%ebp), %ecx ++ testb $_TIF_DB7, %cl ++ jnz work_db7 ++ ++ jmp restore_all ++ ++work_db7: ++ movl TI_task(%ebp), %edx; ++ movl task_thread_db7(%edx), %edx; ++ movl %edx, %db7; ++#endif + jmp restore_all + + ALIGN +@@ -380,7 +539,7 @@ syscall_badsys: + */ + .data + ENTRY(interrupt) +-.text ++.previous + + vector=0 + ENTRY(irq_entries_start) +@@ -390,7 +549,7 @@ ENTRY(irq_entries_start) + jmp common_interrupt + .data + .long 1b +-.text ++.previous + vector=vector+1 + .endr + +@@ -431,12 +590,17 @@ error_code: + movl ES(%esp), %edi # get the function address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) +- movl %esp, %edx + pushl %esi # push the error code +- pushl %edx # push the pt_regs pointer + movl $(__USER_DS), %edx + movl %edx, %ds + movl %edx, %es ++ ++/* clobbers edx, ebx and ebp */ ++ __SWITCH_KERNELSPACE ++ ++ leal 4(%esp), %edx # prepare pt_regs ++ pushl %edx # push pt_regs ++ + call *%edi + addl $8, %esp + jmp ret_from_exception +@@ -527,7 +691,7 @@ nmi_stack_correct: + pushl %edx + call do_nmi + addl $8, %esp +- RESTORE_ALL ++ jmp restore_all + + nmi_stack_fixup: + FIX_STACK(12,nmi_stack_correct, 1) +@@ -595,7 +759,7 @@ ENTRY(page_fault) + #ifdef CONFIG_X86_MCE + ENTRY(machine_check) + pushl $0 +- pushl $do_machine_check ++ pushl machine_check_vector + jmp error_code + #endif + +@@ -604,6 +768,8 @@ ENTRY(spurious_interrupt_bug) + pushl $do_spurious_interrupt_bug + jmp error_code + ++.previous ++ + .data + ENTRY(sys_call_table) + .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ +@@ -879,5 +1045,60 @@ ENTRY(sys_call_table) + .long sys_tgkill /* 270 */ + .long sys_utimes + .long sys_fadvise64_64 ++ .long sys_ni_syscall /* sys_vserver */ + + nr_syscalls=(.-sys_call_table)/4 ++ ++ ++# Here we do call frames. We cheat a bit as we only really need ++# correct frames at locations we can actually look at from a ++# debugger. Since the break instruction trap actually goes thru ++# some of this code, we don't really need info on those areas, but ++# only after the fact. I.e. if we can not step or break in a ++# location or end up with a return address pointing at the ++# location, we don't need a correct call frame for it. ++ ++#if 0 ++ ++#include ++/* ++ * The register numbers as known by gdb ++ */ ++#define _EAX 0 ++#define _ECX 1 ++#define _EDX 2 ++#define _EBX 3 ++#define _ESP 4 ++#define _EBP 5 ++#define _ESI 6 ++#define _EDI 7 ++#define _PC 8 ++#define _EIP 8 ++#define _PS 9 ++#define _EFLAGS 9 ++#define _CS 10 ++#define _SS 11 ++#define _DS 12 ++#define _ES 13 ++#define _FS 14 ++#define _GS 15 ++ ++ CFI_preamble(c1,_PC,1,1) ++ CFA_define_reference(_ESP,OLDESP) ++ CFA_define_offset(_EIP,EIP) ++ CFA_define_offset(_EBX,EBX) ++ CFA_define_offset(_ECX,ECX) ++ CFA_define_offset(_EDX,EDX) ++ CFA_define_offset(_ESI,ESI) ++ CFA_define_offset(_EDI,EDI) ++ CFA_define_offset(_EBP,EBP) ++ CFA_define_offset(_EAX,EAX) ++ CFA_define_offset(_EFLAGS,EFLAGS) ++ CFA_define_offset(_CS,CS) ++ CFA_define_offset(_DS,DS) ++ CFA_define_offset(_ES,ES) ++ CFI_postamble(c1) ++ ++ FDE_preamble(c1,f1,ret_from_intr,(divide_error - ret_from_intr)) ++ FDE_postamble(f1) ++#endif +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25/arch/i386/kernel/entry_trampoline.c 2003-10-05 00:36:48.000000000 -0700 +@@ -0,0 +1,75 @@ ++/* ++ * linux/arch/i386/kernel/entry_trampoline.c ++ * ++ * (C) Copyright 2003 Ingo Molnar ++ * ++ * This file contains the needed support code for 4GB userspace ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text; ++ ++void __init init_entry_mappings(void) ++{ ++#ifdef CONFIG_X86_HIGH_ENTRY ++ void *tramp; ++ ++ /* ++ * We need a high IDT and GDT for the 4G/4G split: ++ */ ++ trap_init_virtual_IDT(); ++ ++ __set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL); ++ __set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL); ++ tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0); ++ ++ printk("mapped 4G/4G trampoline to %p.\n", tramp); ++ BUG_ON((void *)&__start___entry_text != tramp); ++ /* ++ * Virtual kernel stack: ++ */ ++ BUG_ON(__kmap_atomic_vaddr(KM_VSTACK0) & 8191); ++ BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE); ++ BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE); ++ ++ /* ++ * set up the initial thread's virtual stack related ++ * fields: ++ */ ++ current->thread.stack_page0 = virt_to_page((char *)current->thread_info); ++ current->thread.stack_page1 = virt_to_page((char *)current->thread_info + PAGE_SIZE); ++ current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK0); ++ ++ __kunmap_atomic_type(KM_VSTACK0); ++ __kunmap_atomic_type(KM_VSTACK1); ++ __kmap_atomic(current->thread.stack_page0, KM_VSTACK0); ++ __kmap_atomic(current->thread.stack_page1, KM_VSTACK1); ++ ++#endif ++ printk("current: %p\n", current); ++ printk("current->thread_info: %p\n", current->thread_info); ++ current->thread_info->real_stack = (void *)current->thread_info; ++ current->thread_info->user_pgd = NULL; ++ current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE; ++} ++ ++ ++ ++void __init entry_trampoline_setup(void) ++{ ++ /* ++ * old IRQ entries set up by the boot code will still hang ++ * around - they are a sign of hw trouble anyway, now they'll ++ * produce a double fault message. ++ */ ++ trap_init_virtual_GDT(); ++} +--- linux-2.6.0-test6/arch/i386/kernel/head.S 2003-08-22 19:23:40.000000000 -0700 ++++ 25/arch/i386/kernel/head.S 2003-10-05 00:36:48.000000000 -0700 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + #define OLD_CL_MAGIC_ADDR 0x90020 + #define OLD_CL_MAGIC 0xA33F +@@ -330,7 +331,7 @@ ENTRY(stack_start) + + /* This is the default interrupt "handler" :-) */ + int_msg: +- .asciz "Unknown interrupt\n" ++ .asciz "Unknown interrupt or fault at EIP %p %p %p\n" + ALIGN + ignore_int: + cld +@@ -342,9 +343,17 @@ ignore_int: + movl $(__KERNEL_DS),%eax + movl %eax,%ds + movl %eax,%es ++ pushl 16(%esp) ++ pushl 24(%esp) ++ pushl 32(%esp) ++ pushl 40(%esp) + pushl $int_msg + call printk + popl %eax ++ popl %eax ++ popl %eax ++ popl %eax ++ popl %eax + popl %ds + popl %es + popl %edx +@@ -377,23 +386,27 @@ cpu_gdt_descr: + .fill NR_CPUS-1,8,0 # space for the other GDT descriptors + + /* +- * This is initialized to create an identity-mapping at 0-8M (for bootup +- * purposes) and another mapping of the 0-8M area at virtual address ++ * This is initialized to create an identity-mapping at 0-16M (for bootup ++ * purposes) and another mapping of the 0-16M area at virtual address + * PAGE_OFFSET. + */ + .org 0x1000 + ENTRY(swapper_pg_dir) + .long 0x00102007 + .long 0x00103007 +- .fill BOOT_USER_PGD_PTRS-2,4,0 +- /* default: 766 entries */ ++ .long 0x00104007 ++ .long 0x00105007 ++ .fill BOOT_USER_PGD_PTRS-4,4,0 ++ /* default: 764 entries */ + .long 0x00102007 + .long 0x00103007 +- /* default: 254 entries */ +- .fill BOOT_KERNEL_PGD_PTRS-2,4,0 ++ .long 0x00104007 ++ .long 0x00105007 ++ /* default: 252 entries */ ++ .fill BOOT_KERNEL_PGD_PTRS-4,4,0 + + /* +- * The page tables are initialized to only 8MB here - the final page ++ * The page tables are initialized to only 16MB here - the final page + * tables are set up later depending on memory size. + */ + .org 0x2000 +@@ -402,15 +415,21 @@ ENTRY(pg0) + .org 0x3000 + ENTRY(pg1) + ++.org 0x4000 ++ENTRY(pg2) ++ ++.org 0x5000 ++ENTRY(pg3) ++ + /* + * empty_zero_page must immediately follow the page tables ! (The + * initialization loop counts until empty_zero_page) + */ + +-.org 0x4000 ++.org 0x6000 + ENTRY(empty_zero_page) + +-.org 0x5000 ++.org 0x7000 + + /* + * Real beginning of normal "text" segment +@@ -419,12 +438,12 @@ ENTRY(stext) + ENTRY(_stext) + + /* +- * This starts the data section. Note that the above is all +- * in the text section because it has alignment requirements +- * that we cannot fulfill any other way. ++ * This starts the data section. + */ + .data + ++.align PAGE_SIZE_asm ++ + /* + * The Global Descriptor Table contains 28 quadwords, per-CPU. + */ +@@ -439,7 +458,9 @@ ENTRY(boot_gdt_table) + .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ + .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ + #endif +- .align L1_CACHE_BYTES ++ ++.align PAGE_SIZE_asm ++ + ENTRY(cpu_gdt_table) + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x0000000000000000 /* 0x0b reserved */ +--- linux-2.6.0-test6/arch/i386/kernel/i386_ksyms.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/kernel/i386_ksyms.c 2003-10-05 00:36:48.000000000 -0700 +@@ -98,7 +98,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter + EXPORT_SYMBOL_NOVERS(__down_failed_trylock); + EXPORT_SYMBOL_NOVERS(__up_wakeup); + /* Networking helper routines. */ +-EXPORT_SYMBOL(csum_partial_copy_generic); + /* Delay loops */ + EXPORT_SYMBOL(__ndelay); + EXPORT_SYMBOL(__udelay); +@@ -112,13 +111,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4); + EXPORT_SYMBOL(strpbrk); + EXPORT_SYMBOL(strstr); + ++#if !defined(CONFIG_X86_UACCESS_INDIRECT) + EXPORT_SYMBOL(strncpy_from_user); +-EXPORT_SYMBOL(__strncpy_from_user); ++EXPORT_SYMBOL(__direct_strncpy_from_user); + EXPORT_SYMBOL(clear_user); + EXPORT_SYMBOL(__clear_user); + EXPORT_SYMBOL(__copy_from_user_ll); + EXPORT_SYMBOL(__copy_to_user_ll); + EXPORT_SYMBOL(strnlen_user); ++#else /* CONFIG_X86_UACCESS_INDIRECT */ ++EXPORT_SYMBOL(direct_csum_partial_copy_generic); ++#endif + + EXPORT_SYMBOL(dma_alloc_coherent); + EXPORT_SYMBOL(dma_free_coherent); +--- linux-2.6.0-test6/arch/i386/kernel/i387.c 2003-06-14 12:18:51.000000000 -0700 ++++ 25/arch/i386/kernel/i387.c 2003-10-05 00:36:48.000000000 -0700 +@@ -219,6 +219,7 @@ void set_fpu_mxcsr( struct task_struct * + static int convert_fxsr_to_user( struct _fpstate __user *buf, + struct i387_fxsave_struct *fxsave ) + { ++ struct _fpreg tmp[8]; /* 80 bytes scratch area */ + unsigned long env[7]; + struct _fpreg __user *to; + struct _fpxreg *from; +@@ -235,23 +236,25 @@ static int convert_fxsr_to_user( struct + if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) + return 1; + +- to = &buf->_st[0]; ++ to = tmp; + from = (struct _fpxreg *) &fxsave->st_space[0]; + for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + unsigned long *t = (unsigned long *)to; + unsigned long *f = (unsigned long *)from; + +- if (__put_user(*f, t) || +- __put_user(*(f + 1), t + 1) || +- __put_user(from->exponent, &to->exponent)) +- return 1; ++ *t = *f; ++ *(t + 1) = *(f+1); ++ to->exponent = from->exponent; + } ++ if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8]))) ++ return 1; + return 0; + } + + static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, + struct _fpstate __user *buf ) + { ++ struct _fpreg tmp[8]; /* 80 bytes scratch area */ + unsigned long env[7]; + struct _fpxreg *to; + struct _fpreg __user *from; +@@ -259,6 +262,8 @@ static int convert_fxsr_from_user( struc + + if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) + return 1; ++ if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8]))) ++ return 1; + + fxsave->cwd = (unsigned short)(env[0] & 0xffff); + fxsave->swd = (unsigned short)(env[1] & 0xffff); +@@ -270,15 +275,14 @@ static int convert_fxsr_from_user( struc + fxsave->fos = env[6]; + + to = (struct _fpxreg *) &fxsave->st_space[0]; +- from = &buf->_st[0]; ++ from = tmp; + for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + unsigned long *t = (unsigned long *)to; + unsigned long *f = (unsigned long *)from; + +- if (__get_user(*t, f) || +- __get_user(*(t + 1), f + 1) || +- __get_user(to->exponent, &from->exponent)) +- return 1; ++ *t = *f; ++ *(t + 1) = *(f + 1); ++ to->exponent = from->exponent; + } + return 0; + } +@@ -549,13 +553,3 @@ int dump_task_extended_fpu(struct task_s + } + return fpvalid; + } +- +- +-#ifdef CONFIG_SMP +-void dump_smp_unlazy_fpu(void) +-{ +- unlazy_fpu(current); +- return; +-} +-#endif +- +--- linux-2.6.0-test6/arch/i386/kernel/i8259.c 2003-06-14 12:18:34.000000000 -0700 ++++ 25/arch/i386/kernel/i8259.c 2003-10-05 00:36:20.000000000 -0700 +@@ -419,8 +419,10 @@ void __init init_IRQ(void) + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ +- for (i = 0; i < NR_IRQS; i++) { ++ for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; ++ if (i >= NR_IRQS) ++ break; + if (vector != SYSCALL_VECTOR) + set_intr_gate(vector, interrupt[i]); + } +--- linux-2.6.0-test6/arch/i386/kernel/init_task.c 2003-06-14 12:18:35.000000000 -0700 ++++ 25/arch/i386/kernel/init_task.c 2003-10-05 00:36:48.000000000 -0700 +@@ -23,7 +23,7 @@ struct mm_struct init_mm = INIT_MM(init_ + */ + union thread_union init_thread_union + __attribute__((__section__(".data.init_task"))) = +- { INIT_THREAD_INFO(init_task) }; ++ { INIT_THREAD_INFO(init_task, init_thread_union) }; + + /* + * Initial task structure. +@@ -39,5 +39,5 @@ struct task_struct init_task = INIT_TASK + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +-struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; ++struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS }; + +--- linux-2.6.0-test6/arch/i386/kernel/io_apic.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/kernel/io_apic.c 2003-10-05 00:36:27.000000000 -0700 +@@ -76,6 +76,14 @@ static struct irq_pin_list { + int apic, pin, next; + } irq_2_pin[PIN_MAP_SIZE]; + ++#ifdef CONFIG_PCI_USE_VECTOR ++int vector_irq[NR_IRQS] = { [0 ... NR_IRQS -1] = -1}; ++#define vector_to_irq(vector) \ ++ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) ++#else ++#define vector_to_irq(vector) (vector) ++#endif ++ + /* + * The common case is 1:1 IRQ<->pin mappings. Sometimes there are + * shared ISA-space IRQs, so we have to support them. We are super +@@ -249,7 +257,7 @@ static void clear_IO_APIC (void) + clear_IO_APIC_pin(apic, pin); + } + +-static void set_ioapic_affinity(unsigned int irq, cpumask_t cpumask) ++static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) + { + unsigned long flags; + int pin; +@@ -288,7 +296,7 @@ static void set_ioapic_affinity(unsigned + + extern cpumask_t irq_affinity[NR_IRQS]; + +-static cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; ++cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; + + #define IRQBALANCE_CHECK_ARCH -999 + static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; +@@ -670,13 +678,11 @@ static int __init irqbalance_disable(cha + + __setup("noirqbalance", irqbalance_disable); + +-static void set_ioapic_affinity(unsigned int irq, cpumask_t mask); +- + static inline void move_irq(int irq) + { + /* note - we hold the desc->lock */ + if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { +- set_ioapic_affinity(irq, pending_irq_balance_cpumask[irq]); ++ set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); + cpus_clear(pending_irq_balance_cpumask[irq]); + } + } +@@ -853,7 +859,7 @@ void __init setup_ioapic_dest(cpumask_t + if (irq_entry == -1) + continue; + irq = pin_2_irq(irq_entry, ioapic, pin); +- set_ioapic_affinity(irq, mask); ++ set_ioapic_affinity_irq(irq, mask); + } + + } +@@ -1138,12 +1144,14 @@ static inline int IO_APIC_irq_trigger(in + return 0; + } + +-int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 }; ++u8 *irq_vector; ++int nr_irqs; + +-static int __init assign_irq_vector(int irq) ++#ifndef CONFIG_PCI_USE_VECTOR ++int __init assign_irq_vector(int irq) + { + static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; +- BUG_ON(irq >= NR_IRQS); ++ BUG_ON(irq >= nr_irqs); + if (IO_APIC_VECTOR(irq) > 0) + return IO_APIC_VECTOR(irq); + next: +@@ -1157,11 +1165,36 @@ next: + } + + IO_APIC_VECTOR(irq) = current_vector; ++ + return current_vector; + } ++#endif + +-static struct hw_interrupt_type ioapic_level_irq_type; +-static struct hw_interrupt_type ioapic_edge_irq_type; ++static struct hw_interrupt_type ioapic_level_type; ++static struct hw_interrupt_type ioapic_edge_type; ++ ++#define IOAPIC_AUTO -1 ++#define IOAPIC_EDGE 0 ++#define IOAPIC_LEVEL 1 ++ ++static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) ++{ ++ if (use_pci_vector() && !platform_legacy_irq(irq)) { ++ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || ++ trigger == IOAPIC_LEVEL) ++ irq_desc[vector].handler = &ioapic_level_type; ++ else ++ irq_desc[vector].handler = &ioapic_edge_type; ++ set_intr_gate(vector, interrupt[vector]); ++ } else { ++ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || ++ trigger == IOAPIC_LEVEL) ++ irq_desc[irq].handler = &ioapic_level_type; ++ else ++ irq_desc[irq].handler = &ioapic_edge_type; ++ set_intr_gate(vector, interrupt[irq]); ++ } ++} + + void __init setup_IO_APIC_irqs(void) + { +@@ -1219,13 +1252,7 @@ void __init setup_IO_APIC_irqs(void) + if (IO_APIC_IRQ(irq)) { + vector = assign_irq_vector(irq); + entry.vector = vector; +- +- if (IO_APIC_irq_trigger(irq)) +- irq_desc[irq].handler = &ioapic_level_irq_type; +- else +- irq_desc[irq].handler = &ioapic_edge_irq_type; +- +- set_intr_gate(vector, interrupt[irq]); ++ ioapic_register_intr(irq, vector, IOAPIC_AUTO); + + if (!apic && (irq < 16)) + disable_8259A_irq(irq); +@@ -1272,7 +1299,7 @@ void __init setup_ExtINT_IRQ0_pin(unsign + * The timer IRQ doesn't have to know that behind the + * scene we have a 8259A-master in AEOI mode ... + */ +- irq_desc[0].handler = &ioapic_edge_irq_type; ++ irq_desc[0].handler = &ioapic_edge_type; + + /* + * Add it to the IO-APIC irq-routing table: +@@ -1762,9 +1789,6 @@ static int __init timer_irq_works(void) + * that was delayed but this is now handled in the device + * independent code. + */ +-#define enable_edge_ioapic_irq unmask_IO_APIC_irq +- +-static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ } + + /* + * Starting up a edge-triggered IO-APIC interrupt is +@@ -1775,7 +1799,6 @@ static void disable_edge_ioapic_irq (uns + * This is not complete - we should be able to fake + * an edge even if it isn't on the 8259A... + */ +- + static unsigned int startup_edge_ioapic_irq(unsigned int irq) + { + int was_pending = 0; +@@ -1793,8 +1816,6 @@ static unsigned int startup_edge_ioapic_ + return was_pending; + } + +-#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq +- + /* + * Once we have recorded IRQ_PENDING already, we can mask the + * interrupt for real. This prevents IRQ storms from unhandled +@@ -1809,9 +1830,6 @@ static void ack_edge_ioapic_irq(unsigned + ack_APIC_irq(); + } + +-static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ } +- +- + /* + * Level triggered interrupts can just be masked, + * and shutting down and starting up the interrupt +@@ -1833,10 +1851,6 @@ static unsigned int startup_level_ioapic + return 0; /* don't check for pending */ + } + +-#define shutdown_level_ioapic_irq mask_IO_APIC_irq +-#define enable_level_ioapic_irq unmask_IO_APIC_irq +-#define disable_level_ioapic_irq mask_IO_APIC_irq +- + static void end_level_ioapic_irq (unsigned int irq) + { + unsigned long v; +@@ -1863,6 +1877,7 @@ static void end_level_ioapic_irq (unsign + * The idea is from Manfred Spraul. --macro + */ + i = IO_APIC_VECTOR(irq); ++ + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + ack_APIC_irq(); +@@ -1897,7 +1912,57 @@ static void end_level_ioapic_irq (unsign + } + } + +-static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ } ++#ifdef CONFIG_PCI_USE_VECTOR ++static unsigned int startup_edge_ioapic_vector(unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ return startup_edge_ioapic_irq(irq); ++} ++ ++static void ack_edge_ioapic_vector(unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ ack_edge_ioapic_irq(irq); ++} ++ ++static unsigned int startup_level_ioapic_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ return startup_level_ioapic_irq (irq); ++} ++ ++static void end_level_ioapic_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ end_level_ioapic_irq(irq); ++} ++ ++static void mask_IO_APIC_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ mask_IO_APIC_irq(irq); ++} ++ ++static void unmask_IO_APIC_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ unmask_IO_APIC_irq(irq); ++} ++ ++static void set_ioapic_affinity_vector (unsigned int vector, ++ unsigned long cpu_mask) ++{ ++ int irq = vector_to_irq(vector); ++ ++ set_ioapic_affinity_irq(irq, cpu_mask); ++} ++#endif + + /* + * Level and edge triggered IO-APIC interrupts need different handling, +@@ -1907,26 +1972,25 @@ static void mask_and_ack_level_ioapic_ir + * edge-triggered handler, without risking IRQ storms and other ugly + * races. + */ +- +-static struct hw_interrupt_type ioapic_edge_irq_type = { ++static struct hw_interrupt_type ioapic_edge_type = { + .typename = "IO-APIC-edge", +- .startup = startup_edge_ioapic_irq, +- .shutdown = shutdown_edge_ioapic_irq, +- .enable = enable_edge_ioapic_irq, +- .disable = disable_edge_ioapic_irq, +- .ack = ack_edge_ioapic_irq, +- .end = end_edge_ioapic_irq, ++ .startup = startup_edge_ioapic, ++ .shutdown = shutdown_edge_ioapic, ++ .enable = enable_edge_ioapic, ++ .disable = disable_edge_ioapic, ++ .ack = ack_edge_ioapic, ++ .end = end_edge_ioapic, + .set_affinity = set_ioapic_affinity, + }; + +-static struct hw_interrupt_type ioapic_level_irq_type = { ++static struct hw_interrupt_type ioapic_level_type = { + .typename = "IO-APIC-level", +- .startup = startup_level_ioapic_irq, +- .shutdown = shutdown_level_ioapic_irq, +- .enable = enable_level_ioapic_irq, +- .disable = disable_level_ioapic_irq, +- .ack = mask_and_ack_level_ioapic_irq, +- .end = end_level_ioapic_irq, ++ .startup = startup_level_ioapic, ++ .shutdown = shutdown_level_ioapic, ++ .enable = enable_level_ioapic, ++ .disable = disable_level_ioapic, ++ .ack = mask_and_ack_level_ioapic, ++ .end = end_level_ioapic, + .set_affinity = set_ioapic_affinity, + }; + +@@ -1946,7 +2010,13 @@ static inline void init_IO_APIC_traps(vo + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + for (irq = 0; irq < NR_IRQS ; irq++) { +- if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) { ++ int tmp = irq; ++ if (use_pci_vector()) { ++ if (!platform_legacy_irq(tmp)) ++ if ((tmp = vector_to_irq(tmp)) == -1) ++ continue; ++ } ++ if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { + /* + * Hmm.. We don't have an entry for this, + * so default to an old-fashioned 8259 +@@ -2378,10 +2448,12 @@ int io_apic_set_pci_routing (int ioapic, + "IRQ %d Mode:%i Active:%i)\n", ioapic, + mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); + ++ if (use_pci_vector() && !platform_legacy_irq(irq)) ++ irq = IO_APIC_VECTOR(irq); + if (edge_level) { +- irq_desc[irq].handler = &ioapic_level_irq_type; ++ irq_desc[irq].handler = &ioapic_level_type; + } else { +- irq_desc[irq].handler = &ioapic_edge_irq_type; ++ irq_desc[irq].handler = &ioapic_edge_type; + } + + set_intr_gate(entry.vector, interrupt[irq]); +--- linux-2.6.0-test6/arch/i386/kernel/irq.c 2003-08-22 19:23:40.000000000 -0700 ++++ 25/arch/i386/kernel/irq.c 2003-10-05 00:33:50.000000000 -0700 +@@ -44,6 +44,7 @@ + #include + #include + #include ++#include + + /* + * Linux has a controller-independent x86 interrupt architecture. +@@ -499,6 +500,17 @@ out: + + irq_exit(); + ++#ifdef CONFIG_KGDB ++ /* ++ * We need to do this after clearing out of all the interrupt ++ * machinery because kgdb will reenter the NIC driver and the IRQ ++ * system. synchronize_irq() (at least) will deadlock. ++ */ ++ if (kgdb_eth_need_breakpoint[smp_processor_id()]) { ++ kgdb_eth_need_breakpoint[smp_processor_id()] = 0; ++ BREAKPOINT; ++ } ++#endif + return 1; + } + +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25/arch/i386/kernel/kgdb_stub.c 2003-10-05 00:33:51.000000000 -0700 +@@ -0,0 +1,2492 @@ ++/* ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2, or (at your option) any ++ * later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ */ ++ ++/* ++ * Copyright (c) 2000 VERITAS Software Corporation. ++ * ++ */ ++/**************************************************************************** ++ * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ ++ * ++ * Module name: remcom.c $ ++ * Revision: 1.34 $ ++ * Date: 91/03/09 12:29:49 $ ++ * Contributor: Lake Stevens Instrument Division$ ++ * ++ * Description: low level support for gdb debugger. $ ++ * ++ * Considerations: only works on target hardware $ ++ * ++ * Written by: Glenn Engel $ ++ * Updated by: David Grothe ++ * Updated by: Robert Walsh ++ * Updated by: wangdi ++ * ModuleState: Experimental $ ++ * ++ * NOTES: See Below $ ++ * ++ * Modified for 386 by Jim Kingdon, Cygnus Support. ++ * Compatibility with 2.1.xx kernel by David Grothe ++ * ++ * Changes to allow auto initilization. All that is needed is that it ++ * be linked with the kernel and a break point (int 3) be executed. ++ * The header file defines BREAKPOINT to allow one to do ++ * this. It should also be possible, once the interrupt system is up, to ++ * call putDebugChar("+"). Once this is done, the remote debugger should ++ * get our attention by sending a ^C in a packet. George Anzinger ++ * ++ * Integrated into 2.2.5 kernel by Tigran Aivazian ++ * Added thread support, support for multiple processors, ++ * support for ia-32(x86) hardware debugging. ++ * Amit S. Kale ( akale@veritas.com ) ++ * ++ * Modified to support debugging over ethernet by Robert Walsh ++ * and wangdi , based on ++ * code by San Mehat. ++ * ++ * ++ * To enable debugger support, two things need to happen. One, a ++ * call to set_debug_traps() is necessary in order to allow any breakpoints ++ * or error conditions to be properly intercepted and reported to gdb. ++ * Two, a breakpoint needs to be generated to begin communication. This ++ * is most easily accomplished by a call to breakpoint(). Breakpoint() ++ * simulates a breakpoint by executing an int 3. ++ * ++ ************* ++ * ++ * The following gdb commands are supported: ++ * ++ * command function Return value ++ * ++ * g return the value of the CPU registers hex data or ENN ++ * G set the value of the CPU registers OK or ENN ++ * ++ * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN ++ * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN ++ * ++ * c Resume at current address SNN ( signal NN) ++ * cAA..AA Continue at address AA..AA SNN ++ * ++ * s Step one instruction SNN ++ * sAA..AA Step one instruction from AA..AA SNN ++ * ++ * k kill ++ * ++ * ? What was the last sigval ? SNN (signal NN) ++ * ++ * All commands and responses are sent with a packet which includes a ++ * checksum. A packet consists of ++ * ++ * $#. ++ * ++ * where ++ * :: ++ * :: < two hex digits computed as modulo 256 sum of > ++ * ++ * When a packet is received, it is first acknowledged with either '+' or '-'. ++ * '+' indicates a successful transfer. '-' indicates a failed transfer. ++ * ++ * Example: ++ * ++ * Host: Reply: ++ * $m0,10#2a +$00010203040506070809101112131415#42 ++ * ++ ****************************************************************************/ ++#define KGDB_VERSION "<20030915.1651.33>" ++#include ++#include ++#include /* for strcpy */ ++#include ++#include ++#include ++#include ++#include /* for linux pt_regs struct */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/************************************************************************ ++ * ++ * external low-level support routines ++ */ ++typedef void (*Function) (void); /* pointer to a function */ ++ ++/* Thread reference */ ++typedef unsigned char threadref[8]; ++ ++extern int tty_putDebugChar(int); /* write a single character */ ++extern int tty_getDebugChar(void); /* read and return a single char */ ++extern void tty_flushDebugChar(void); /* flush pending characters */ ++extern int eth_putDebugChar(int); /* write a single character */ ++extern int eth_getDebugChar(void); /* read and return a single char */ ++extern void eth_flushDebugChar(void); /* flush pending characters */ ++extern void kgdb_eth_set_trapmode(int); ++extern void kgdb_eth_reply_arp(void); /*send arp request */ ++extern volatile int kgdb_eth_is_initializing; ++ ++ ++/************************************************************************/ ++/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ ++/* at least NUMREGBYTES*2 are needed for register packets */ ++/* Longer buffer is needed to list all threads */ ++#define BUFMAX 400 ++ ++char *kgdb_version = KGDB_VERSION; ++ ++/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ ++int debug_regs = 0; /* set to non-zero to print registers */ ++ ++/* filled in by an external module */ ++char *gdb_module_offsets; ++ ++static const char hexchars[] = "0123456789abcdef"; ++ ++/* Number of bytes of registers. */ ++#define NUMREGBYTES 64 ++/* ++ * Note that this register image is in a different order than ++ * the register image that Linux produces at interrupt time. ++ * ++ * Linux's register image is defined by struct pt_regs in ptrace.h. ++ * Just why GDB uses a different order is a historical mystery. ++ */ ++enum regnames { _EAX, /* 0 */ ++ _ECX, /* 1 */ ++ _EDX, /* 2 */ ++ _EBX, /* 3 */ ++ _ESP, /* 4 */ ++ _EBP, /* 5 */ ++ _ESI, /* 6 */ ++ _EDI, /* 7 */ ++ _PC /* 8 also known as eip */ , ++ _PS /* 9 also known as eflags */ , ++ _CS, /* 10 */ ++ _SS, /* 11 */ ++ _DS, /* 12 */ ++ _ES, /* 13 */ ++ _FS, /* 14 */ ++ _GS /* 15 */ ++}; ++ ++/*************************** ASSEMBLY CODE MACROS *************************/ ++/* ++ * Put the error code here just in case the user cares. ++ * Likewise, the vector number here (since GDB only gets the signal ++ * number through the usual means, and that's not very specific). ++ * The called_from is the return address so he can tell how we entered kgdb. ++ * This will allow him to seperate out the various possible entries. ++ */ ++#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ ++ ++#define PID_MAX PID_MAX_DEFAULT ++ ++#ifdef CONFIG_SMP ++void smp_send_nmi_allbutself(void); ++#define IF_SMP(x) x ++#undef MAX_NO_CPUS ++#ifndef CONFIG_NO_KGDB_CPUS ++#define CONFIG_NO_KGDB_CPUS 2 ++#endif ++#if CONFIG_NO_KGDB_CPUS > NR_CPUS ++#define MAX_NO_CPUS NR_CPUS ++#else ++#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS ++#endif ++#define hold_init hold_on_sstep: 1, ++#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) ++#define NUM_CPUS num_online_cpus() ++#else ++#define IF_SMP(x) ++#define hold_init ++#undef MAX_NO_CPUS ++#define MAX_NO_CPUS 1 ++#define NUM_CPUS 1 ++#endif ++#define NOCPU (struct task_struct *)0xbad1fbad ++/* *INDENT-OFF* */ ++struct kgdb_info { ++ int used_malloc; ++ void *called_from; ++ long long entry_tsc; ++ int errcode; ++ int vector; ++ int print_debug_info; ++#ifdef CONFIG_SMP ++ int hold_on_sstep; ++ struct { ++ volatile struct task_struct *task; ++ int pid; ++ int hold; ++ struct pt_regs *regs; ++ } cpus_waiting[MAX_NO_CPUS]; ++#endif ++} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; ++ ++/* *INDENT-ON* */ ++ ++#define used_m kgdb_info.used_malloc ++/* ++ * This is little area we set aside to contain the stack we ++ * need to build to allow gdb to call functions. We use one ++ * per cpu to avoid locking issues. We will do all this work ++ * with interrupts off so that should take care of the protection ++ * issues. ++ */ ++#define LOOKASIDE_SIZE 200 /* should be more than enough */ ++#define MALLOC_MAX 200 /* Max malloc size */ ++struct { ++ unsigned int esp; ++ int array[LOOKASIDE_SIZE]; ++} fn_call_lookaside[MAX_NO_CPUS]; ++ ++static int trap_cpu; ++static unsigned int OLD_esp; ++ ++#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] ++#define IF_BIT 0x200 ++#define TF_BIT 0x100 ++ ++#define MALLOC_ROUND 8-1 ++ ++static char malloc_array[MALLOC_MAX]; ++IF_SMP(static void to_gdb(const char *mess)); ++void * ++malloc(int size) ++{ ++ ++ if (size <= (MALLOC_MAX - used_m)) { ++ int old_used = used_m; ++ used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); ++ return &malloc_array[old_used]; ++ } else { ++ return NULL; ++ } ++} ++ ++/* ++ * I/O dispatch functions... ++ * Based upon kgdb_eth, either call the ethernet ++ * handler or the serial one.. ++ */ ++void ++putDebugChar(int c) ++{ ++ if (kgdb_eth == -1) { ++ tty_putDebugChar(c); ++ } else { ++ eth_putDebugChar(c); ++ } ++} ++ ++int ++getDebugChar(void) ++{ ++ if (kgdb_eth == -1) { ++ return tty_getDebugChar(); ++ } else { ++ return eth_getDebugChar(); ++ } ++} ++ ++void ++flushDebugChar(void) ++{ ++ if (kgdb_eth == -1) { ++ tty_flushDebugChar(); ++ } else { ++ eth_flushDebugChar(); ++ } ++} ++ ++/* ++ * Gdb calls functions by pushing agruments, including a return address ++ * on the stack and the adjusting EIP to point to the function. The ++ * whole assumption in GDB is that we are on a different stack than the ++ * one the "user" i.e. code that hit the break point, is on. This, of ++ * course is not true in the kernel. Thus various dodges are needed to ++ * do the call without directly messing with EIP (which we can not change ++ * as it is just a location and not a register. To adjust it would then ++ * require that we move every thing below EIP up or down as needed. This ++ * will not work as we may well have stack relative pointer on the stack ++ * (such as the pointer to regs, for example). ++ ++ * So here is what we do: ++ * We detect gdb attempting to store into the stack area and instead, store ++ * into the fn_call_lookaside.array at the same relative location as if it ++ * were the area ESP pointed at. We also trap ESP modifications ++ * and uses these to adjust fn_call_lookaside.esp. On entry ++ * fn_call_lookaside.esp will be set to point at the last entry in ++ * fn_call_lookaside.array. This allows us to check if it has changed, and ++ * if so, on exit, we add the registers we will use to do the move and a ++ * trap/ interrupt return exit sequence. We then adjust the eflags in the ++ * regs array (remember we now have a copy in the fn_call_lookaside.array) to ++ * kill the interrupt bit, AND we change EIP to point at our set up stub. ++ * As part of the register set up we preset the registers to point at the ++ * begining and end of the fn_call_lookaside.array, so all the stub needs to ++ * do is move words from the array to the stack until ESP= the desired value ++ * then do the rti. This will then transfer to the desired function with ++ * all the correct registers. Nifty huh? ++ */ ++extern asmlinkage void fn_call_stub(void); ++extern asmlinkage void fn_rtn_stub(void); ++/* *INDENT-OFF* */ ++__asm__("fn_rtn_stub:\n\t" ++ "movl %eax,%esp\n\t" ++ "fn_call_stub:\n\t" ++ "1:\n\t" ++ "addl $-4,%ebx\n\t" ++ "movl (%ebx), %eax\n\t" ++ "pushl %eax\n\t" ++ "cmpl %esp,%ecx\n\t" ++ "jne 1b\n\t" ++ "popl %eax\n\t" ++ "popl %ebx\n\t" ++ "popl %ecx\n\t" ++ "iret \n\t"); ++/* *INDENT-ON* */ ++#define gdb_i386vector kgdb_info.vector ++#define gdb_i386errcode kgdb_info.errcode ++#define waiting_cpus kgdb_info.cpus_waiting ++#define remote_debug kgdb_info.print_debug_info ++#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold ++/* gdb locks */ ++ ++#ifdef CONFIG_SMP ++static int in_kgdb_called; ++static spinlock_t waitlocks[MAX_NO_CPUS] = ++ {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; ++/* ++ * The following array has the thread pointer of each of the "other" ++ * cpus. We make it global so it can be seen by gdb. ++ */ ++volatile int in_kgdb_entry_log[MAX_NO_CPUS]; ++volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; ++/* ++static spinlock_t continuelocks[MAX_NO_CPUS]; ++*/ ++spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; ++/* waiters on our spinlock plus us */ ++static atomic_t spinlock_waiters = ATOMIC_INIT(1); ++static int spinlock_count = 0; ++static int spinlock_cpu = 0; ++/* ++ * Note we use nested spin locks to account for the case where a break ++ * point is encountered when calling a function by user direction from ++ * kgdb. Also there is the memory exception recursion to account for. ++ * Well, yes, but this lets other cpus thru too. Lets add a ++ * cpu id to the lock. ++ */ ++#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ ++ spinlock_cpu != smp_processor_id()){\ ++ atomic_inc(&spinlock_waiters); \ ++ while (! spin_trylock(x)) {\ ++ in_kgdb(®s);\ ++ }\ ++ atomic_dec(&spinlock_waiters); \ ++ spinlock_count = 1; \ ++ spinlock_cpu = smp_processor_id(); \ ++ }else{ \ ++ spinlock_count++; \ ++ } ++#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) ++#else ++unsigned kgdb_spinlock = 0; ++#define KGDB_SPIN_LOCK(x) --*x ++#define KGDB_SPIN_UNLOCK(x) ++*x ++#endif ++ ++int ++hex(char ch) ++{ ++ if ((ch >= 'a') && (ch <= 'f')) ++ return (ch - 'a' + 10); ++ if ((ch >= '0') && (ch <= '9')) ++ return (ch - '0'); ++ if ((ch >= 'A') && (ch <= 'F')) ++ return (ch - 'A' + 10); ++ return (-1); ++} ++ ++/* scan for the sequence $# */ ++void ++getpacket(char *buffer) ++{ ++ unsigned char checksum; ++ unsigned char xmitcsum; ++ int i; ++ int count; ++ char ch; ++ ++ do { ++ /* wait around for the start character, ignore all other characters */ ++ while ((ch = (getDebugChar() & 0x7f)) != '$') ; ++ checksum = 0; ++ xmitcsum = -1; ++ ++ count = 0; ++ ++ /* now, read until a # or end of buffer is found */ ++ while (count < BUFMAX) { ++ ch = getDebugChar() & 0x7f; ++ if (ch == '#') ++ break; ++ checksum = checksum + ch; ++ buffer[count] = ch; ++ count = count + 1; ++ } ++ buffer[count] = 0; ++ ++ if (ch == '#') { ++ xmitcsum = hex(getDebugChar() & 0x7f) << 4; ++ xmitcsum += hex(getDebugChar() & 0x7f); ++ if ((remote_debug) && (checksum != xmitcsum)) { ++ printk ++ ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", ++ checksum, xmitcsum, buffer); ++ } ++ ++ if (checksum != xmitcsum) ++ putDebugChar('-'); /* failed checksum */ ++ else { ++ putDebugChar('+'); /* successful transfer */ ++ /* if a sequence char is present, reply the sequence ID */ ++ if (buffer[2] == ':') { ++ putDebugChar(buffer[0]); ++ putDebugChar(buffer[1]); ++ /* remove sequence chars from buffer */ ++ count = strlen(buffer); ++ for (i = 3; i <= count; i++) ++ buffer[i - 3] = buffer[i]; ++ } ++ } ++ } ++ } while (checksum != xmitcsum); ++ ++ if (remote_debug) ++ printk("R:%s\n", buffer); ++ flushDebugChar(); ++} ++ ++/* send the packet in buffer. */ ++ ++void ++putpacket(char *buffer) ++{ ++ unsigned char checksum; ++ int count; ++ char ch; ++ ++ /* $#. */ ++ ++ if (kgdb_eth == -1) { ++ do { ++ if (remote_debug) ++ printk("T:%s\n", buffer); ++ putDebugChar('$'); ++ checksum = 0; ++ count = 0; ++ ++ while ((ch = buffer[count])) { ++ putDebugChar(ch); ++ checksum += ch; ++ count += 1; ++ } ++ ++ putDebugChar('#'); ++ putDebugChar(hexchars[checksum >> 4]); ++ putDebugChar(hexchars[checksum % 16]); ++ flushDebugChar(); ++ ++ } while ((getDebugChar() & 0x7f) != '+'); ++ } else { ++ /* ++ * For udp, we can not transfer too much bytes once. ++ * We only transfer MAX_SEND_COUNT size bytes each time ++ */ ++ ++#define MAX_SEND_COUNT 30 ++ ++ int send_count = 0, i = 0; ++ char send_buf[MAX_SEND_COUNT]; ++ ++ do { ++ if (remote_debug) ++ printk("T:%s\n", buffer); ++ putDebugChar('$'); ++ checksum = 0; ++ count = 0; ++ send_count = 0; ++ while ((ch = buffer[count])) { ++ if (send_count >= MAX_SEND_COUNT) { ++ for(i = 0; i < MAX_SEND_COUNT; i++) { ++ putDebugChar(send_buf[i]); ++ } ++ flushDebugChar(); ++ send_count = 0; ++ } else { ++ send_buf[send_count] = ch; ++ checksum += ch; ++ count ++; ++ send_count++; ++ } ++ } ++ for(i = 0; i < send_count; i++) ++ putDebugChar(send_buf[i]); ++ putDebugChar('#'); ++ putDebugChar(hexchars[checksum >> 4]); ++ putDebugChar(hexchars[checksum % 16]); ++ flushDebugChar(); ++ } while ((getDebugChar() & 0x7f) != '+'); ++ } ++} ++ ++static char remcomInBuffer[BUFMAX]; ++static char remcomOutBuffer[BUFMAX]; ++static short error; ++ ++void ++debug_error(char *format, char *parm) ++{ ++ if (remote_debug) ++ printk(format, parm); ++} ++ ++static void ++print_regs(struct pt_regs *regs) ++{ ++ printk("EAX=%08lx ", regs->eax); ++ printk("EBX=%08lx ", regs->ebx); ++ printk("ECX=%08lx ", regs->ecx); ++ printk("EDX=%08lx ", regs->edx); ++ printk("\n"); ++ printk("ESI=%08lx ", regs->esi); ++ printk("EDI=%08lx ", regs->edi); ++ printk("EBP=%08lx ", regs->ebp); ++ printk("ESP=%08lx ", (long) ®s->esp); ++ printk("\n"); ++ printk(" DS=%08x ", regs->xds); ++ printk(" ES=%08x ", regs->xes); ++ printk(" SS=%08x ", __KERNEL_DS); ++ printk(" FL=%08lx ", regs->eflags); ++ printk("\n"); ++ printk(" CS=%08x ", regs->xcs); ++ printk(" IP=%08lx ", regs->eip); ++#if 0 ++ printk(" FS=%08x ", regs->fs); ++ printk(" GS=%08x ", regs->gs); ++#endif ++ printk("\n"); ++ ++} /* print_regs */ ++ ++#define NEW_esp fn_call_lookaside[trap_cpu].esp ++ ++static void ++regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) ++{ ++ gdb_regs[_EAX] = regs->eax; ++ gdb_regs[_EBX] = regs->ebx; ++ gdb_regs[_ECX] = regs->ecx; ++ gdb_regs[_EDX] = regs->edx; ++ gdb_regs[_ESI] = regs->esi; ++ gdb_regs[_EDI] = regs->edi; ++ gdb_regs[_EBP] = regs->ebp; ++ gdb_regs[_DS] = regs->xds; ++ gdb_regs[_ES] = regs->xes; ++ gdb_regs[_PS] = regs->eflags; ++ gdb_regs[_CS] = regs->xcs; ++ gdb_regs[_PC] = regs->eip; ++ /* Note, as we are a debugging the kernel, we will always ++ * trap in kernel code, this means no priviledge change, ++ * and so the pt_regs structure is not completely valid. In a non ++ * privilege change trap, only EFLAGS, CS and EIP are put on the stack, ++ * SS and ESP are not stacked, this means that the last 2 elements of ++ * pt_regs is not valid (they would normally refer to the user stack) ++ * also, using regs+1 is no good because you end up will a value that is ++ * 2 longs (8) too high. This used to cause stepping over functions ++ * to fail, so my fix is to use the address of regs->esp, which ++ * should point at the end of the stack frame. Note I have ignored ++ * completely exceptions that cause an error code to be stacked, such ++ * as double fault. Stuart Hughes, Zentropix. ++ * original code: gdb_regs[_ESP] = (int) (regs + 1) ; ++ ++ * this is now done on entry and moved to OLD_esp (as well as NEW_esp). ++ */ ++ gdb_regs[_ESP] = NEW_esp; ++ gdb_regs[_SS] = __KERNEL_DS; ++ gdb_regs[_FS] = 0xFFFF; ++ gdb_regs[_GS] = 0xFFFF; ++} /* regs_to_gdb_regs */ ++ ++static void ++gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) ++{ ++ regs->eax = gdb_regs[_EAX]; ++ regs->ebx = gdb_regs[_EBX]; ++ regs->ecx = gdb_regs[_ECX]; ++ regs->edx = gdb_regs[_EDX]; ++ regs->esi = gdb_regs[_ESI]; ++ regs->edi = gdb_regs[_EDI]; ++ regs->ebp = gdb_regs[_EBP]; ++ regs->xds = gdb_regs[_DS]; ++ regs->xes = gdb_regs[_ES]; ++ regs->eflags = gdb_regs[_PS]; ++ regs->xcs = gdb_regs[_CS]; ++ regs->eip = gdb_regs[_PC]; ++ NEW_esp = gdb_regs[_ESP]; /* keep the value */ ++#if 0 /* can't change these */ ++ regs->esp = gdb_regs[_ESP]; ++ regs->xss = gdb_regs[_SS]; ++ regs->fs = gdb_regs[_FS]; ++ regs->gs = gdb_regs[_GS]; ++#endif ++ ++} /* gdb_regs_to_regs */ ++extern void scheduling_functions_start_here(void); ++extern void scheduling_functions_end_here(void); ++#define first_sched ((unsigned long) scheduling_functions_start_here) ++#define last_sched ((unsigned long) scheduling_functions_end_here) ++ ++int thread_list = 0; ++ ++void ++get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) ++{ ++ unsigned long stack_page; ++ int count = 0; ++ IF_SMP(int i); ++ if (!p || p == current) { ++ regs_to_gdb_regs(gdb_regs, regs); ++ return; ++ } ++#ifdef CONFIG_SMP ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (p == kgdb_info.cpus_waiting[i].task) { ++ regs_to_gdb_regs(gdb_regs, ++ kgdb_info.cpus_waiting[i].regs); ++ gdb_regs[_ESP] = ++ (int) &kgdb_info.cpus_waiting[i].regs->esp; ++ ++ return; ++ } ++ } ++#endif ++ memset(gdb_regs, 0, NUMREGBYTES); ++ gdb_regs[_ESP] = p->thread.esp; ++ gdb_regs[_PC] = p->thread.eip; ++ gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; ++ gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); ++ gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); ++ ++/* ++ * This code is to give a more informative notion of where a process ++ * is waiting. It is used only when the user asks for a thread info ++ * list. If he then switches to the thread, s/he will find the task ++ * is in schedule, but a back trace should show the same info we come ++ * up with. This code was shamelessly purloined from process.c. It was ++ * then enhanced to provide more registers than simply the program ++ * counter. ++ */ ++ ++ if (!thread_list) { ++ return; ++ } ++ ++ if (p->state == TASK_RUNNING) ++ return; ++ stack_page = (unsigned long) p->thread_info; ++ if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > 8188 + stack_page) ++ return; ++ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ ++ do { ++ if (gdb_regs[_EBP] < stack_page || ++ gdb_regs[_EBP] > 8184 + stack_page) ++ return; ++ gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); ++ gdb_regs[_ESP] = gdb_regs[_EBP] + 8; ++ gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; ++ if (gdb_regs[_PC] < first_sched || gdb_regs[_PC] >= last_sched) ++ return; ++ } while (count++ < 16); ++ return; ++} ++ ++/* Indicate to caller of mem2hex or hex2mem that there has been an ++ error. */ ++static volatile int mem_err = 0; ++static volatile int mem_err_expected = 0; ++static volatile int mem_err_cnt = 0; ++static int garbage_loc = -1; ++ ++int ++get_char(char *addr) ++{ ++ return *addr; ++} ++ ++void ++set_char(char *addr, int val, int may_fault) ++{ ++ /* ++ * This code traps references to the area mapped to the kernel ++ * stack as given by the regs and, instead, stores to the ++ * fn_call_lookaside[cpu].array ++ */ ++ if (may_fault && ++ (unsigned int) addr < OLD_esp && ++ ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { ++ addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); ++ } ++ *addr = val; ++} ++ ++/* convert the memory pointed to by mem into hex, placing result in buf */ ++/* return a pointer to the last char put in buf (null) */ ++/* If MAY_FAULT is non-zero, then we should set mem_err in response to ++ a fault; if zero treat a fault like any other fault in the stub. */ ++char * ++mem2hex(char *mem, char *buf, int count, int may_fault) ++{ ++ int i; ++ unsigned char ch; ++ ++ if (may_fault) { ++ mem_err_expected = 1; ++ mem_err = 0; ++ } ++ for (i = 0; i < count; i++) { ++ /* printk("%lx = ", mem) ; */ ++ ++ ch = get_char(mem++); ++ ++ /* printk("%02x\n", ch & 0xFF) ; */ ++ if (may_fault && mem_err) { ++ if (remote_debug) ++ printk("Mem fault fetching from addr %lx\n", ++ (long) (mem - 1)); ++ *buf = 0; /* truncate buffer */ ++ return (buf); ++ } ++ *buf++ = hexchars[ch >> 4]; ++ *buf++ = hexchars[ch % 16]; ++ } ++ *buf = 0; ++ if (may_fault) ++ mem_err_expected = 0; ++ return (buf); ++} ++ ++/* convert the hex array pointed to by buf into binary to be placed in mem */ ++/* return a pointer to the character AFTER the last byte written */ ++/* NOTE: We use the may fault flag to also indicate if the write is to ++ * the registers (0) or "other" memory (!=0) ++ */ ++char * ++hex2mem(char *buf, char *mem, int count, int may_fault) ++{ ++ int i; ++ unsigned char ch; ++ ++ if (may_fault) { ++ mem_err_expected = 1; ++ mem_err = 0; ++ } ++ for (i = 0; i < count; i++) { ++ ch = hex(*buf++) << 4; ++ ch = ch + hex(*buf++); ++ set_char(mem++, ch, may_fault); ++ ++ if (may_fault && mem_err) { ++ if (remote_debug) ++ printk("Mem fault storing to addr %lx\n", ++ (long) (mem - 1)); ++ return (mem); ++ } ++ } ++ if (may_fault) ++ mem_err_expected = 0; ++ return (mem); ++} ++ ++/**********************************************/ ++/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ ++/* RETURN NUMBER OF CHARS PROCESSED */ ++/**********************************************/ ++int ++hexToInt(char **ptr, int *intValue) ++{ ++ int numChars = 0; ++ int hexValue; ++ ++ *intValue = 0; ++ ++ while (**ptr) { ++ hexValue = hex(**ptr); ++ if (hexValue >= 0) { ++ *intValue = (*intValue << 4) | hexValue; ++ numChars++; ++ } else ++ break; ++ ++ (*ptr)++; ++ } ++ ++ return (numChars); ++} ++ ++#define stubhex(h) hex(h) ++#ifdef old_thread_list ++ ++static int ++stub_unpack_int(char *buff, int fieldlength) ++{ ++ int nibble; ++ int retval = 0; ++ ++ while (fieldlength) { ++ nibble = stubhex(*buff++); ++ retval |= nibble; ++ fieldlength--; ++ if (fieldlength) ++ retval = retval << 4; ++ } ++ return retval; ++} ++#endif ++static char * ++pack_hex_byte(char *pkt, int byte) ++{ ++ *pkt++ = hexchars[(byte >> 4) & 0xf]; ++ *pkt++ = hexchars[(byte & 0xf)]; ++ return pkt; ++} ++ ++#define BUF_THREAD_ID_SIZE 16 ++ ++static char * ++pack_threadid(char *pkt, threadref * id) ++{ ++ char *limit; ++ unsigned char *altid; ++ ++ altid = (unsigned char *) id; ++ limit = pkt + BUF_THREAD_ID_SIZE; ++ while (pkt < limit) ++ pkt = pack_hex_byte(pkt, *altid++); ++ return pkt; ++} ++ ++#ifdef old_thread_list ++static char * ++unpack_byte(char *buf, int *value) ++{ ++ *value = stub_unpack_int(buf, 2); ++ return buf + 2; ++} ++ ++static char * ++unpack_threadid(char *inbuf, threadref * id) ++{ ++ char *altref; ++ char *limit = inbuf + BUF_THREAD_ID_SIZE; ++ int x, y; ++ ++ altref = (char *) id; ++ ++ while (inbuf < limit) { ++ x = stubhex(*inbuf++); ++ y = stubhex(*inbuf++); ++ *altref++ = (x << 4) | y; ++ } ++ return inbuf; ++} ++#endif ++void ++int_to_threadref(threadref * id, int value) ++{ ++ unsigned char *scan; ++ ++ scan = (unsigned char *) id; ++ { ++ int i = 4; ++ while (i--) ++ *scan++ = 0; ++ } ++ *scan++ = (value >> 24) & 0xff; ++ *scan++ = (value >> 16) & 0xff; ++ *scan++ = (value >> 8) & 0xff; ++ *scan++ = (value & 0xff); ++} ++int ++int_to_hex_v(unsigned char * id, int value) ++{ ++ unsigned char *start = id; ++ int shift; ++ int ch; ++ ++ for (shift = 28; shift >= 0; shift -= 4) { ++ if ((ch = (value >> shift) & 0xf) || (id != start)) { ++ *id = hexchars[ch]; ++ id++; ++ } ++ } ++ if (id == start) ++ *id++ = '0'; ++ return id - start; ++} ++#ifdef old_thread_list ++ ++static int ++threadref_to_int(threadref * ref) ++{ ++ int i, value = 0; ++ unsigned char *scan; ++ ++ scan = (char *) ref; ++ scan += 4; ++ i = 4; ++ while (i-- > 0) ++ value = (value << 8) | ((*scan++) & 0xff); ++ return value; ++} ++#endif ++static int ++cmp_str(char *s1, char *s2, int count) ++{ ++ while (count--) { ++ if (*s1++ != *s2++) ++ return 0; ++ } ++ return 1; ++} ++ ++#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ ++extern struct task_struct *kgdb_get_idle(int cpu); ++#define idle_task(cpu) kgdb_get_idle(cpu) ++#else ++#define idle_task(cpu) init_tasks[cpu] ++#endif ++ ++extern int kgdb_pid_init_done; ++ ++struct task_struct * ++getthread(int pid) ++{ ++ struct task_struct *thread; ++ if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { ++ ++ return idle_task(pid - PID_MAX); ++ } else { ++ /* ++ * find_task_by_pid is relatively safe all the time ++ * Other pid functions require lock downs which imply ++ * that we may be interrupting them (as we get here ++ * in the middle of most any lock down). ++ * Still we don't want to call until the table exists! ++ */ ++ if (kgdb_pid_init_done){ ++ thread = find_task_by_pid(pid); ++ if (thread) { ++ return thread; ++ } ++ } ++ } ++ return NULL; ++} ++/* *INDENT-OFF* */ ++struct hw_breakpoint { ++ unsigned enabled; ++ unsigned type; ++ unsigned len; ++ unsigned addr; ++} breakinfo[4] = { {enabled:0}, ++ {enabled:0}, ++ {enabled:0}, ++ {enabled:0}}; ++/* *INDENT-ON* */ ++unsigned hw_breakpoint_status; ++void ++correct_hw_break(void) ++{ ++ int breakno; ++ int correctit; ++ int breakbit; ++ unsigned dr7; ++ ++ asm volatile ("movl %%db7, %0\n":"=r" (dr7) ++ :); ++ /* *INDENT-OFF* */ ++ do { ++ unsigned addr0, addr1, addr2, addr3; ++ asm volatile ("movl %%db0, %0\n" ++ "movl %%db1, %1\n" ++ "movl %%db2, %2\n" ++ "movl %%db3, %3\n" ++ :"=r" (addr0), "=r"(addr1), ++ "=r"(addr2), "=r"(addr3) ++ :); ++ } while (0); ++ /* *INDENT-ON* */ ++ correctit = 0; ++ for (breakno = 0; breakno < 3; breakno++) { ++ breakbit = 2 << (breakno << 1); ++ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 |= breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ dr7 |= (((breakinfo[breakno].len << 2) | ++ breakinfo[breakno].type) << 16) << ++ (breakno << 2); ++ switch (breakno) { ++ case 0: ++ asm volatile ("movl %0, %%dr0\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ ++ case 1: ++ asm volatile ("movl %0, %%dr1\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ ++ case 2: ++ asm volatile ("movl %0, %%dr2\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ ++ case 3: ++ asm volatile ("movl %0, %%dr3\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ } ++ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 &= ~breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ } ++ } ++ if (correctit) { ++ asm volatile ("movl %0, %%db7\n"::"r" (dr7)); ++ } ++} ++ ++int ++remove_hw_break(unsigned breakno) ++{ ++ if (!breakinfo[breakno].enabled) { ++ return -1; ++ } ++ breakinfo[breakno].enabled = 0; ++ return 0; ++} ++ ++int ++set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) ++{ ++ if (breakinfo[breakno].enabled) { ++ return -1; ++ } ++ breakinfo[breakno].enabled = 1; ++ breakinfo[breakno].type = type; ++ breakinfo[breakno].len = len; ++ breakinfo[breakno].addr = addr; ++ return 0; ++} ++ ++#ifdef CONFIG_SMP ++static int in_kgdb_console = 0; ++ ++int ++in_kgdb(struct pt_regs *regs) ++{ ++ unsigned flags; ++ int cpu = smp_processor_id(); ++ in_kgdb_called = 1; ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ ++ in_kgdb_console) { /* or we are doing slow i/o */ ++ return 1; ++ } ++ return 0; ++ } ++ ++ /* As I see it the only reason not to let all cpus spin on ++ * the same spin_lock is to allow selected ones to proceed. ++ * This would be a good thing, so we leave it this way. ++ * Maybe someday.... Done ! ++ ++ * in_kgdb() is called from an NMI so we don't pretend ++ * to have any resources, like printk() for example. ++ */ ++ ++ kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ ++ /* ++ * log arival of this cpu ++ * The NMI keeps on ticking. Protect against recurring more ++ * than once, and ignor the cpu that has the kgdb lock ++ */ ++ in_kgdb_entry_log[cpu]++; ++ in_kgdb_here_log[cpu] = regs; ++ if (cpu == spinlock_cpu || waiting_cpus[cpu].task) { ++ goto exit_in_kgdb; ++ } ++ /* ++ * For protection of the initilization of the spin locks by kgdb ++ * it locks the kgdb spinlock before it gets the wait locks set ++ * up. We wait here for the wait lock to be taken. If the ++ * kgdb lock goes away first?? Well, it could be a slow exit ++ * sequence where the wait lock is removed prior to the kgdb lock ++ * so if kgdb gets unlocked, we just exit. ++ */ ++ while (spin_is_locked(&kgdb_spinlock) && ++ !spin_is_locked(waitlocks + cpu)) ; ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ goto exit_in_kgdb; ++ } ++ waiting_cpus[cpu].task = current; ++ waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); ++ waiting_cpus[cpu].regs = regs; ++ ++ spin_unlock_wait(waitlocks + cpu); ++ /* ++ * log departure of this cpu ++ */ ++ waiting_cpus[cpu].task = 0; ++ waiting_cpus[cpu].pid = 0; ++ waiting_cpus[cpu].regs = 0; ++ correct_hw_break(); ++ exit_in_kgdb: ++ in_kgdb_here_log[cpu] = 0; ++ kgdb_local_irq_restore(flags); ++ return 1; ++ /* ++ spin_unlock(continuelocks + smp_processor_id()); ++ */ ++} ++ ++void ++smp__in_kgdb(struct pt_regs regs) ++{ ++ ack_APIC_irq(); ++ in_kgdb(®s); ++} ++#else ++int ++in_kgdb(struct pt_regs *regs) ++{ ++ return (kgdb_spinlock); ++} ++#endif ++ ++void ++printexceptioninfo(int exceptionNo, int errorcode, char *buffer) ++{ ++ unsigned dr6; ++ int i; ++ switch (exceptionNo) { ++ case 1: /* debug exception */ ++ break; ++ case 3: /* breakpoint */ ++ sprintf(buffer, "Software breakpoint"); ++ return; ++ default: ++ sprintf(buffer, "Details not available"); ++ return; ++ } ++ asm volatile ("movl %%db6, %0\n":"=r" (dr6) ++ :); ++ if (dr6 & 0x4000) { ++ sprintf(buffer, "Single step"); ++ return; ++ } ++ for (i = 0; i < 4; ++i) { ++ if (dr6 & (1 << i)) { ++ sprintf(buffer, "Hardware breakpoint %d", i); ++ return; ++ } ++ } ++ sprintf(buffer, "Unknown trap"); ++ return; ++} ++ ++/* ++ * This function does all command procesing for interfacing to gdb. ++ * ++ * NOTE: The INT nn instruction leaves the state of the interrupt ++ * enable flag UNCHANGED. That means that when this routine ++ * is entered via a breakpoint (INT 3) instruction from code ++ * that has interrupts enabled, then interrupts will STILL BE ++ * enabled when this routine is entered. The first thing that ++ * we do here is disable interrupts so as to prevent recursive ++ * entries and bothersome serial interrupts while we are ++ * trying to run the serial port in polled mode. ++ * ++ * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so ++ * it is always necessary to do a restore_flags before returning ++ * so as to let go of that lock. ++ */ ++int ++kgdb_handle_exception(int exceptionVector, ++ int signo, int err_code, struct pt_regs *linux_regs) ++{ ++ struct task_struct *usethread = NULL; ++ struct task_struct *thread_list_start = 0, *thread = NULL; ++ int addr, length; ++ unsigned long address; ++ int breakno, breaktype; ++ char *ptr; ++ int newPC; ++ threadref thref; ++ int threadid; ++ int thread_min = PID_MAX + MAX_NO_CPUS; ++#ifdef old_thread_list ++ int maxthreads; ++#endif ++ int nothreads; ++ unsigned long flags; ++ int gdb_regs[NUMREGBYTES / 4]; ++ int dr6; ++ IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ ++#define NO_NMI 1 ++#define NO_SYNC 2 ++#define regs (*linux_regs) ++#define NUMREGS NUMREGBYTES/4 ++ /* ++ * If the entry is not from the kernel then return to the Linux ++ * trap handler and let it process the interrupt normally. ++ */ ++ if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { ++ printk("ignoring non-kernel exception\n"); ++ print_regs(®s); ++ return (0); ++ } ++ /* ++ * If we're using eth mode, set the 'mode' in the netdevice. ++ */ ++ ++ __asm__("movl %%cr2,%0":"=r" (address)); ++ ++ if (kgdb_eth != -1) { ++ kgdb_eth_set_trapmode(1); ++ } ++ ++ kgdb_local_irq_save(flags); ++ ++ /* Get kgdb spinlock */ ++ ++ KGDB_SPIN_LOCK(&kgdb_spinlock); ++ rdtscll(kgdb_info.entry_tsc); ++ /* ++ * We depend on this spinlock and the NMI watch dog to control the ++ * other cpus. They will arrive at "in_kgdb()" as a result of the ++ * NMI and will wait there for the following spin locks to be ++ * released. ++ */ ++#ifdef CONFIG_SMP ++ ++#if 0 ++ if (cpu_callout_map & ~MAX_CPU_MASK) { ++ printk("kgdb : too many cpus, possibly not mapped" ++ " in contiguous space, change MAX_NO_CPUS" ++ " in kgdb_stub and make new kernel.\n" ++ " cpu_callout_map is %lx\n", cpu_callout_map); ++ goto exit_just_unlock; ++ } ++#endif ++ if (spinlock_count == 1) { ++ int time, end_time, dum; ++ int i; ++ int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) ++ }; ++ if (remote_debug) { ++ printk("kgdb : cpu %d entry, syncing others\n", ++ smp_processor_id()); ++ } ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ /* ++ * Use trylock as we may already hold the lock if ++ * we are holding the cpu. Net result is all ++ * locked. ++ */ ++ spin_trylock(&waitlocks[i]); ++ } ++ for (i = 0; i < MAX_NO_CPUS; i++) ++ cpu_logged_in[i] = 0; ++ /* ++ * Wait for their arrival. We know the watch dog is active if ++ * in_kgdb() has ever been called, as it is always called on a ++ * watchdog tick. ++ */ ++ rdtsc(dum, time); ++ end_time = time + 2; /* Note: we use the High order bits! */ ++ i = 1; ++ if (num_online_cpus() > 1) { ++ int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; ++ smp_send_nmi_allbutself(); ++ while (i < num_online_cpus() && time != end_time) { ++ int j; ++ for (j = 0; j < MAX_NO_CPUS; j++) { ++ if (waiting_cpus[j].task && ++ !cpu_logged_in[j]) { ++ i++; ++ cpu_logged_in[j] = 1; ++ if (remote_debug) { ++ printk ++ ("kgdb : cpu %d arrived at kgdb\n", ++ j); ++ } ++ break; ++ } else if (!waiting_cpus[j].task && ++ !cpu_online(j)) { ++ waiting_cpus[j].task = NOCPU; ++ cpu_logged_in[j] = 1; ++ waiting_cpus[j].hold = 1; ++ break; ++ } ++ if (!waiting_cpus[j].task && ++ in_kgdb_here_log[j]) { ++ ++ int wait = 100000; ++ while (wait--) ; ++ if (!waiting_cpus[j].task && ++ in_kgdb_here_log[j]) { ++ printk ++ ("kgdb : cpu %d stall" ++ " in in_kgdb\n", ++ j); ++ i++; ++ cpu_logged_in[j] = 1; ++ waiting_cpus[j].task = ++ (struct task_struct ++ *) 1; ++ } ++ } ++ } ++ ++ if (in_kgdb_entry_log[smp_processor_id()] > ++ (me_in_kgdb + 10)) { ++ break; ++ } ++ ++ rdtsc(dum, time); ++ } ++ if (i < num_online_cpus()) { ++ printk ++ ("kgdb : time out, proceeding without sync\n"); ++#if 0 ++ printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", ++ waiting_cpus[0].task != 0, ++ waiting_cpus[1].task != 0); ++ printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", ++ cpu_logged_in[0], cpu_logged_in[1]); ++ printk ++ ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", ++ in_kgdb_here_log[0] != 0, ++ in_kgdb_here_log[1] != 0); ++#endif ++ entry_state = NO_SYNC; ++ } else { ++#if 0 ++ int ent = ++ in_kgdb_entry_log[smp_processor_id()] - ++ me_in_kgdb; ++ printk("kgdb : sync after %d entries\n", ent); ++#endif ++ } ++ } else { ++ if (remote_debug) { ++ printk ++ ("kgdb : %d cpus, but watchdog not active\n" ++ "proceeding without locking down other cpus\n", ++ num_online_cpus()); ++ entry_state = NO_NMI; ++ } ++ } ++ } ++#endif ++ ++ if (remote_debug) { ++ printk("handle_exception(exceptionVector=%d, " ++ "signo=%d, err_code=%d, linux_regs=%p)\n", ++ exceptionVector, signo, err_code, linux_regs); ++ printk(" address: %lx\n", address); ++ ++ if (debug_regs) { ++ print_regs(®s); ++ show_trace(current, (unsigned long *)®s); ++ } ++ } ++ ++ /* Disable hardware debugging while we are in kgdb */ ++ /* Get the debug register status register */ ++/* *INDENT-OFF* */ ++ __asm__("movl %0,%%db7" ++ : /* no output */ ++ :"r"(0)); ++ ++ asm volatile ("movl %%db6, %0\n" ++ :"=r" (hw_breakpoint_status) ++ :); ++ ++/* *INDENT-ON* */ ++ switch (exceptionVector) { ++ case 0: /* divide error */ ++ case 1: /* debug exception */ ++ case 2: /* NMI */ ++ case 3: /* breakpoint */ ++ case 4: /* overflow */ ++ case 5: /* bounds check */ ++ case 6: /* invalid opcode */ ++ case 7: /* device not available */ ++ case 8: /* double fault (errcode) */ ++ case 10: /* invalid TSS (errcode) */ ++ case 12: /* stack fault (errcode) */ ++ case 16: /* floating point error */ ++ case 17: /* alignment check (errcode) */ ++ default: /* any undocumented */ ++ break; ++ case 11: /* segment not present (errcode) */ ++ case 13: /* general protection (errcode) */ ++ case 14: /* page fault (special errcode) */ ++ case 19: /* cache flush denied */ ++ if (mem_err_expected) { ++ /* ++ * This fault occured because of the ++ * get_char or set_char routines. These ++ * two routines use either eax of edx to ++ * indirectly reference the location in ++ * memory that they are working with. ++ * For a page fault, when we return the ++ * instruction will be retried, so we ++ * have to make sure that these ++ * registers point to valid memory. ++ */ ++ mem_err = 1; /* set mem error flag */ ++ mem_err_expected = 0; ++ mem_err_cnt++; /* helps in debugging */ ++ /* make valid address */ ++ regs.eax = (long) &garbage_loc; ++ /* make valid address */ ++ regs.edx = (long) &garbage_loc; ++ if (remote_debug) ++ printk("Return after memory error: " ++ "mem_err_cnt=%d\n", mem_err_cnt); ++ if (debug_regs) ++ print_regs(®s); ++ goto exit_kgdb; ++ } ++ break; ++ } ++ if (remote_debug) ++ printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); ++ ++ gdb_i386vector = exceptionVector; ++ gdb_i386errcode = err_code; ++ kgdb_info.called_from = __builtin_return_address(0); ++#ifdef CONFIG_SMP ++ /* ++ * OK, we can now communicate, lets tell gdb about the sync. ++ * but only if we had a problem. ++ */ ++ switch (entry_state) { ++ case NO_NMI: ++ to_gdb("NMI not active, other cpus not stopped\n"); ++ break; ++ case NO_SYNC: ++ to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); ++ default:; ++ } ++ ++#endif ++/* ++ * Set up the gdb function call area. ++ */ ++ trap_cpu = smp_processor_id(); ++ OLD_esp = NEW_esp = (int) (&linux_regs->esp); ++ ++ IF_SMP(once_again:) ++ /* reply to host that an exception has occurred */ ++ remcomOutBuffer[0] = 'S'; ++ remcomOutBuffer[1] = hexchars[signo >> 4]; ++ remcomOutBuffer[2] = hexchars[signo % 16]; ++ remcomOutBuffer[3] = 0; ++ ++ if (kgdb_eth_is_initializing) { ++ kgdb_eth_is_initializing = 0; ++ } else { ++ putpacket(remcomOutBuffer); ++ } ++ ++ kgdb_eth_reply_arp(); ++ while (1 == 1) { ++ error = 0; ++ remcomOutBuffer[0] = 0; ++ getpacket(remcomInBuffer); ++ switch (remcomInBuffer[0]) { ++ case '?': ++ remcomOutBuffer[0] = 'S'; ++ remcomOutBuffer[1] = hexchars[signo >> 4]; ++ remcomOutBuffer[2] = hexchars[signo % 16]; ++ remcomOutBuffer[3] = 0; ++ break; ++ case 'd': ++ remote_debug = !(remote_debug); /* toggle debug flag */ ++ printk("Remote debug %s\n", ++ remote_debug ? "on" : "off"); ++ break; ++ case 'g': /* return the value of the CPU registers */ ++ get_gdb_regs(usethread, ®s, gdb_regs); ++ mem2hex((char *) gdb_regs, ++ remcomOutBuffer, NUMREGBYTES, 0); ++ break; ++ case 'G': /* set the value of the CPU registers - return OK */ ++ hex2mem(&remcomInBuffer[1], ++ (char *) gdb_regs, NUMREGBYTES, 0); ++ if (!usethread || usethread == current) { ++ gdb_regs_to_regs(gdb_regs, ®s); ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "E00"); ++ } ++ break; ++ ++ case 'P':{ /* set the value of a single CPU register - ++ return OK */ ++ /* ++ * For some reason, gdb wants to talk about psudo ++ * registers (greater than 15). These may have ++ * meaning for ptrace, but for us it is safe to ++ * ignor them. We do this by dumping them into ++ * _GS which we also ignor, but do have memory for. ++ */ ++ int regno; ++ ++ ptr = &remcomInBuffer[1]; ++ regs_to_gdb_regs(gdb_regs, ®s); ++ if ((!usethread || usethread == current) && ++ hexToInt(&ptr, ®no) && ++ *ptr++ == '=' && (regno >= 0)) { ++ regno = ++ (regno >= NUMREGS ? _GS : regno); ++ hex2mem(ptr, (char *) &gdb_regs[regno], ++ 4, 0); ++ gdb_regs_to_regs(gdb_regs, ®s); ++ strcpy(remcomOutBuffer, "OK"); ++ break; ++ } ++ strcpy(remcomOutBuffer, "E01"); ++ break; ++ } ++ ++ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ ++ case 'm': ++ /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr) && ++ (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { ++ ptr = 0; ++ /* ++ * hex doubles the byte count ++ */ ++ if (length > (BUFMAX / 2)) ++ length = BUFMAX / 2; ++ mem2hex((char *) addr, ++ remcomOutBuffer, length, 1); ++ if (mem_err) { ++ strcpy(remcomOutBuffer, "E03"); ++ debug_error("memory fault\n", NULL); ++ } ++ } ++ ++ if (ptr) { ++ strcpy(remcomOutBuffer, "E01"); ++ debug_error ++ ("malformed read memory command: %s\n", ++ remcomInBuffer); ++ } ++ break; ++ ++ /* MAA..AA,LLLL: ++ Write LLLL bytes at address AA.AA return OK */ ++ case 'M': ++ /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr) && ++ (*(ptr++) == ',') && ++ (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { ++ hex2mem(ptr, (char *) addr, length, 1); ++ ++ if (mem_err) { ++ strcpy(remcomOutBuffer, "E03"); ++ debug_error("memory fault\n", NULL); ++ } else { ++ strcpy(remcomOutBuffer, "OK"); ++ } ++ ++ ptr = 0; ++ } ++ if (ptr) { ++ strcpy(remcomOutBuffer, "E02"); ++ debug_error ++ ("malformed write memory command: %s\n", ++ remcomInBuffer); ++ } ++ break; ++ case 'S': ++ remcomInBuffer[0] = 's'; ++ case 'C': ++ /* Csig;AA..AA where ;AA..AA is optional ++ * continue with signal ++ * Since signals are meaning less to us, delete that ++ * part and then fall into the 'c' code. ++ */ ++ ptr = &remcomInBuffer[1]; ++ length = 2; ++ while (*ptr && *ptr != ';') { ++ length++; ++ ptr++; ++ } ++ if (*ptr) { ++ do { ++ ptr++; ++ *(ptr - length++) = *ptr; ++ } while (*ptr); ++ } else { ++ remcomInBuffer[1] = 0; ++ } ++ ++ /* cAA..AA Continue at address AA..AA(optional) */ ++ /* sAA..AA Step one instruction from AA..AA(optional) */ ++ /* D detach, reply OK and then continue */ ++ case 'c': ++ case 's': ++ case 'D': ++ ++ /* try to read optional parameter, ++ pc unchanged if no parm */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr)) { ++ if (remote_debug) ++ printk("Changing EIP to 0x%x\n", addr); ++ ++ regs.eip = addr; ++ } ++ ++ newPC = regs.eip; ++ ++ if (kgdb_eth != -1) { ++ kgdb_eth_set_trapmode(0); ++ } ++ ++ /* clear the trace bit */ ++ regs.eflags &= 0xfffffeff; ++ ++ /* set the trace bit if we're stepping */ ++ if (remcomInBuffer[0] == 's') ++ regs.eflags |= 0x100; ++ ++ /* detach is a friendly version of continue. Note that ++ debugging is still enabled (e.g hit control C) ++ */ ++ if (remcomInBuffer[0] == 'D') { ++ strcpy(remcomOutBuffer, "OK"); ++ putpacket(remcomOutBuffer); ++ } ++ ++ if (remote_debug) { ++ printk("Resuming execution\n"); ++ print_regs(®s); ++ } ++ asm volatile ("movl %%db6, %0\n":"=r" (dr6) ++ :); ++ if (!(dr6 & 0x4000)) { ++ for (breakno = 0; breakno < 4; ++breakno) { ++ if (dr6 & (1 << breakno) && ++ (breakinfo[breakno].type == 0)) { ++ /* Set restore flag */ ++ regs.eflags |= 0x10000; ++ break; ++ } ++ } ++ } ++ correct_hw_break(); ++ asm volatile ("movl %0, %%db6\n"::"r" (0)); ++ goto exit_kgdb; ++ ++ /* kill the program */ ++ case 'k': /* do nothing */ ++ break; ++ ++ /* query */ ++ case 'q': ++ nothreads = 0; ++ switch (remcomInBuffer[1]) { ++ case 'f': ++ threadid = 1; ++ thread_list = 2; ++ thread_list_start = (usethread ? : current); ++ case 's': ++ if (!cmp_str(&remcomInBuffer[2], ++ "ThreadInfo", 10)) ++ break; ++ ++ remcomOutBuffer[nothreads++] = 'm'; ++ for (; threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ thread = getthread(threadid); ++ if (thread) { ++ nothreads += int_to_hex_v( ++ &remcomOutBuffer[ ++ nothreads], ++ threadid); ++ if (thread_min > threadid) ++ thread_min = threadid; ++ remcomOutBuffer[ ++ nothreads] = ','; ++ nothreads++; ++ if (nothreads > BUFMAX - 10) ++ break; ++ } ++ } ++ if (remcomOutBuffer[nothreads - 1] == 'm') { ++ remcomOutBuffer[nothreads - 1] = 'l'; ++ } else { ++ nothreads--; ++ } ++ remcomOutBuffer[nothreads] = 0; ++ break; ++ ++#ifdef old_thread_list /* Old thread info request */ ++ case 'L': ++ /* List threads */ ++ thread_list = 2; ++ thread_list_start = (usethread ? : current); ++ unpack_byte(remcomInBuffer + 3, &maxthreads); ++ unpack_threadid(remcomInBuffer + 5, &thref); ++ do { ++ int buf_thread_limit = ++ (BUFMAX - 22) / BUF_THREAD_ID_SIZE; ++ if (maxthreads > buf_thread_limit) { ++ maxthreads = buf_thread_limit; ++ } ++ } while (0); ++ remcomOutBuffer[0] = 'q'; ++ remcomOutBuffer[1] = 'M'; ++ remcomOutBuffer[4] = '0'; ++ pack_threadid(remcomOutBuffer + 5, &thref); ++ ++ threadid = threadref_to_int(&thref); ++ for (nothreads = 0; ++ nothreads < maxthreads && ++ threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ thread = getthread(threadid); ++ if (thread) { ++ int_to_threadref(&thref, ++ threadid); ++ pack_threadid(remcomOutBuffer + ++ 21 + ++ nothreads * 16, ++ &thref); ++ nothreads++; ++ if (thread_min > threadid) ++ thread_min = threadid; ++ } ++ } ++ ++ if (threadid == PID_MAX + MAX_NO_CPUS) { ++ remcomOutBuffer[4] = '1'; ++ } ++ pack_hex_byte(remcomOutBuffer + 2, nothreads); ++ remcomOutBuffer[21 + nothreads * 16] = '\0'; ++ break; ++#endif ++ case 'C': ++ /* Current thread id */ ++ remcomOutBuffer[0] = 'Q'; ++ remcomOutBuffer[1] = 'C'; ++ threadid = current->pid; ++ if (!threadid) { ++ /* ++ * idle thread ++ */ ++ for (threadid = PID_MAX; ++ threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ if (current == ++ idle_task(threadid - ++ PID_MAX)) ++ break; ++ } ++ } ++ int_to_threadref(&thref, threadid); ++ pack_threadid(remcomOutBuffer + 2, &thref); ++ remcomOutBuffer[18] = '\0'; ++ break; ++ ++ case 'E': ++ /* Print exception info */ ++ printexceptioninfo(exceptionVector, ++ err_code, remcomOutBuffer); ++ break; ++ case 'T':{ ++ char * nptr; ++ /* Thread extra info */ ++ if (!cmp_str(&remcomInBuffer[2], ++ "hreadExtraInfo,", 15)) { ++ break; ++ } ++ ptr = &remcomInBuffer[17]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ nptr = &thread->comm[0]; ++ length = 0; ++ ptr = &remcomOutBuffer[0]; ++ do { ++ length++; ++ ptr = pack_hex_byte(ptr, *nptr++); ++ } while (*nptr && length < 16); ++ /* ++ * would like that 16 to be the size of ++ * task_struct.comm but don't know the ++ * syntax.. ++ */ ++ *ptr = 0; ++ } ++ } ++ break; ++ ++ /* task related */ ++ case 'H': ++ switch (remcomInBuffer[1]) { ++ case 'g': ++ ptr = &remcomInBuffer[2]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ if (!thread) { ++ remcomOutBuffer[0] = 'E'; ++ remcomOutBuffer[1] = '\0'; ++ break; ++ } ++ /* ++ * Just in case I forget what this is all about, ++ * the "thread info" command to gdb causes it ++ * to ask for a thread list. It then switches ++ * to each thread and asks for the registers. ++ * For this (and only this) usage, we want to ++ * fudge the registers of tasks not on the run ++ * list (i.e. waiting) to show the routine that ++ * called schedule. Also, gdb, is a minimalist ++ * in that if the current thread is the last ++ * it will not re-read the info when done. ++ * This means that in this case we must show ++ * the real registers. So here is how we do it: ++ * Each entry we keep track of the min ++ * thread in the list (the last that gdb will) ++ * get info for. We also keep track of the ++ * starting thread. ++ * "thread_list" is cleared when switching back ++ * to the min thread if it is was current, or ++ * if it was not current, thread_list is set ++ * to 1. When the switch to current comes, ++ * if thread_list is 1, clear it, else do ++ * nothing. ++ */ ++ usethread = thread; ++ if ((thread_list == 1) && ++ (thread == thread_list_start)) { ++ thread_list = 0; ++ } ++ if (thread_list && (threadid == thread_min)) { ++ if (thread == thread_list_start) { ++ thread_list = 0; ++ } else { ++ thread_list = 1; ++ } ++ } ++ /* follow through */ ++ case 'c': ++ remcomOutBuffer[0] = 'O'; ++ remcomOutBuffer[1] = 'K'; ++ remcomOutBuffer[2] = '\0'; ++ break; ++ } ++ break; ++ ++ /* Query thread status */ ++ case 'T': ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ if (thread) { ++ remcomOutBuffer[0] = 'O'; ++ remcomOutBuffer[1] = 'K'; ++ remcomOutBuffer[2] = '\0'; ++ if (thread_min > threadid) ++ thread_min = threadid; ++ } else { ++ remcomOutBuffer[0] = 'E'; ++ remcomOutBuffer[1] = '\0'; ++ } ++ break; ++ ++ case 'Y': /* set up a hardware breakpoint */ ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &breakno); ++ ptr++; ++ hexToInt(&ptr, &breaktype); ++ ptr++; ++ hexToInt(&ptr, &length); ++ ptr++; ++ hexToInt(&ptr, &addr); ++ if (set_hw_break(breakno & 0x3, ++ breaktype & 0x3, ++ length & 0x3, addr) == 0) { ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "ERROR"); ++ } ++ break; ++ ++ /* Remove hardware breakpoint */ ++ case 'y': ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &breakno); ++ if (remove_hw_break(breakno & 0x3) == 0) { ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "ERROR"); ++ } ++ break; ++ ++ case 'r': /* reboot */ ++ strcpy(remcomOutBuffer, "OK"); ++ putpacket(remcomOutBuffer); ++ /*to_gdb("Rebooting\n"); */ ++ /* triplefault no return from here */ ++ { ++ static long no_idt[2]; ++ __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); ++ BREAKPOINT; ++ } ++ ++ } /* switch */ ++ ++ /* reply to the request */ ++ putpacket(remcomOutBuffer); ++ } /* while(1==1) */ ++ /* ++ * reached by goto only. ++ */ ++ exit_kgdb: ++ /* ++ * Here is where we set up to trap a gdb function call. NEW_esp ++ * will be changed if we are trying to do this. We handle both ++ * adding and subtracting, thus allowing gdb to put grung on ++ * the stack which it removes later. ++ */ ++ if (NEW_esp != OLD_esp) { ++ int *ptr = END_OF_LOOKASIDE; ++ if (NEW_esp < OLD_esp) ++ ptr -= (OLD_esp - NEW_esp) / sizeof (int); ++ *--ptr = linux_regs->eflags; ++ *--ptr = linux_regs->xcs; ++ *--ptr = linux_regs->eip; ++ *--ptr = linux_regs->ecx; ++ *--ptr = linux_regs->ebx; ++ *--ptr = linux_regs->eax; ++ linux_regs->ecx = NEW_esp - (sizeof (int) * 6); ++ linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; ++ if (NEW_esp < OLD_esp) { ++ linux_regs->eip = (unsigned int) fn_call_stub; ++ } else { ++ linux_regs->eip = (unsigned int) fn_rtn_stub; ++ linux_regs->eax = NEW_esp; ++ } ++ linux_regs->eflags &= ~(IF_BIT | TF_BIT); ++ } ++#ifdef CONFIG_SMP ++ /* ++ * Release gdb wait locks ++ * Sanity check time. Must have at least one cpu to run. Also single ++ * step must not be done if the current cpu is on hold. ++ */ ++ if (spinlock_count == 1) { ++ int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; ++ int cpu_avail = 0; ++ int i; ++ ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (!cpu_online(i)) ++ break; ++ if (!hold_cpu(i)) { ++ cpu_avail = 1; ++ } ++ } ++ /* ++ * Early in the bring up there will be NO cpus on line... ++ */ ++ if (!cpu_avail && !cpus_empty(cpu_online_map)) { ++ to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); ++ goto once_again; ++ } ++ if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { ++ to_gdb ++ ("Current cpu must be unblocked to single step\n"); ++ goto once_again; ++ } ++ if (!(ss_hold)) { ++ int i; ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (!hold_cpu(i)) { ++ spin_unlock(&waitlocks[i]); ++ } ++ } ++ } else { ++ spin_unlock(&waitlocks[smp_processor_id()]); ++ } ++ /* Release kgdb spinlock */ ++ KGDB_SPIN_UNLOCK(&kgdb_spinlock); ++ /* ++ * If this cpu is on hold, this is where we ++ * do it. Note, the NMI will pull us out of here, ++ * but will return as the above lock is not held. ++ * We will stay here till another cpu releases the lock for us. ++ */ ++ spin_unlock_wait(waitlocks + smp_processor_id()); ++ kgdb_local_irq_restore(flags); ++ return (0); ++ } ++#if 0 ++exit_just_unlock: ++#endif ++#endif ++ /* Release kgdb spinlock */ ++ KGDB_SPIN_UNLOCK(&kgdb_spinlock); ++ kgdb_local_irq_restore(flags); ++ return (0); ++} ++ ++/* this function is used to set up exception handlers for tracing and ++ * breakpoints. ++ * This function is not needed as the above line does all that is needed. ++ * We leave it for backward compatitability... ++ */ ++void ++set_debug_traps(void) ++{ ++ /* ++ * linux_debug_hook is defined in traps.c. We store a pointer ++ * to our own exception handler into it. ++ ++ * But really folks, every hear of labeled common, an old Fortran ++ * concept. Lots of folks can reference it and it is define if ++ * anyone does. Only one can initialize it at link time. We do ++ * this with the hook. See the statement above. No need for any ++ * executable code and it is ready as soon as the kernel is ++ * loaded. Very desirable in kernel debugging. ++ ++ linux_debug_hook = handle_exception ; ++ */ ++ ++ /* In case GDB is started before us, ack any packets (presumably ++ "$?#xx") sitting there. ++ putDebugChar ('+'); ++ ++ initialized = 1; ++ */ ++} ++ ++/* This function will generate a breakpoint exception. It is used at the ++ beginning of a program to sync up with a debugger and can be used ++ otherwise as a quick means to stop program execution and "break" into ++ the debugger. */ ++/* But really, just use the BREAKPOINT macro. We will handle the int stuff ++ */ ++ ++#ifdef later ++/* ++ * possibly we should not go thru the traps.c code at all? Someday. ++ */ ++void ++do_kgdb_int3(struct pt_regs *regs, long error_code) ++{ ++ kgdb_handle_exception(3, 5, error_code, regs); ++ return; ++} ++#endif ++#undef regs ++#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS ++asmlinkage void ++bad_sys_call_exit(int stuff) ++{ ++ struct pt_regs *regs = (struct pt_regs *) &stuff; ++ printk("Sys call %d return with %x preempt_count\n", ++ (int) regs->orig_eax, preempt_count()); ++} ++#endif ++#ifdef CONFIG_STACK_OVERFLOW_TEST ++#include ++asmlinkage void ++stack_overflow(void) ++{ ++#ifdef BREAKPOINT ++ BREAKPOINT; ++#else ++ printk("Kernel stack overflow, looping forever\n"); ++#endif ++ while (1) { ++ } ++} ++#endif ++ ++#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) ++char gdbconbuf[BUFMAX]; ++ ++static void ++kgdb_gdb_message(const char *s, unsigned count) ++{ ++ int i; ++ int wcount; ++ char *bufptr; ++ /* ++ * This takes care of NMI while spining out chars to gdb ++ */ ++ IF_SMP(in_kgdb_console = 1); ++ gdbconbuf[0] = 'O'; ++ bufptr = gdbconbuf + 1; ++ while (count > 0) { ++ if ((count << 1) > (BUFMAX - 2)) { ++ wcount = (BUFMAX - 2) >> 1; ++ } else { ++ wcount = count; ++ } ++ count -= wcount; ++ for (i = 0; i < wcount; i++) { ++ bufptr = pack_hex_byte(bufptr, s[i]); ++ } ++ *bufptr = '\0'; ++ s += wcount; ++ ++ putpacket(gdbconbuf); ++ ++ } ++ IF_SMP(in_kgdb_console = 0); ++} ++#endif ++#ifdef CONFIG_SMP ++static void ++to_gdb(const char *s) ++{ ++ int count = 0; ++ while (s[count] && (count++ < BUFMAX)) ; ++ kgdb_gdb_message(s, count); ++} ++#endif ++#ifdef CONFIG_KGDB_CONSOLE ++#include ++#include ++#include ++#include ++#include ++ ++void ++kgdb_console_write(struct console *co, const char *s, unsigned count) ++{ ++ ++ if (gdb_i386vector == -1) { ++ /* ++ * We have not yet talked to gdb. What to do... ++ * lets break, on continue we can do the write. ++ * But first tell him whats up. Uh, well no can do, ++ * as this IS the console. Oh well... ++ * We do need to wait or the messages will be lost. ++ * Other option would be to tell the above code to ++ * ignore this breakpoint and do an auto return, ++ * but that might confuse gdb. Also this happens ++ * early enough in boot up that we don't have the traps ++ * set up yet, so... ++ */ ++ breakpoint(); ++ } ++ kgdb_gdb_message(s, count); ++} ++ ++/* ++ * ------------------------------------------------------------ ++ * Serial KGDB driver ++ * ------------------------------------------------------------ ++ */ ++ ++static struct console kgdbcons = { ++ name:"kgdb", ++ write:kgdb_console_write, ++#ifdef CONFIG_KGDB_USER_CONSOLE ++ device:kgdb_console_device, ++#endif ++ flags:CON_PRINTBUFFER | CON_ENABLED, ++ index:-1, ++}; ++ ++/* ++ * The trick here is that this file gets linked before printk.o ++ * That means we get to peer at the console info in the command ++ * line before it does. If we are up, we register, otherwise, ++ * do nothing. By returning 0, we allow printk to look also. ++ */ ++static int kgdb_console_enabled; ++ ++int __init ++kgdb_console_init(char *str) ++{ ++ if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { ++ register_console(&kgdbcons); ++ kgdb_console_enabled = 1; ++ } ++ return 0; /* let others look at the string */ ++} ++ ++__setup("console=", kgdb_console_init); ++ ++#ifdef CONFIG_KGDB_USER_CONSOLE ++static kdev_t kgdb_console_device(struct console *c); ++/* This stuff sort of works, but it knocks out telnet devices ++ * we are leaving it here in case we (or you) find time to figure it out ++ * better.. ++ */ ++ ++/* ++ * We need a real char device as well for when the console is opened for user ++ * space activities. ++ */ ++ ++static int ++kgdb_consdev_open(struct inode *inode, struct file *file) ++{ ++ return 0; ++} ++ ++static ssize_t ++kgdb_consdev_write(struct file *file, const char *buf, ++ size_t count, loff_t * ppos) ++{ ++ int size, ret = 0; ++ static char kbuf[128]; ++ static DECLARE_MUTEX(sem); ++ ++ /* We are not reentrant... */ ++ if (down_interruptible(&sem)) ++ return -ERESTARTSYS; ++ ++ while (count > 0) { ++ /* need to copy the data from user space */ ++ size = count; ++ if (size > sizeof (kbuf)) ++ size = sizeof (kbuf); ++ if (copy_from_user(kbuf, buf, size)) { ++ ret = -EFAULT; ++ break;; ++ } ++ kgdb_console_write(&kgdbcons, kbuf, size); ++ count -= size; ++ ret += size; ++ buf += size; ++ } ++ ++ up(&sem); ++ ++ return ret; ++} ++ ++struct file_operations kgdb_consdev_fops = { ++ open:kgdb_consdev_open, ++ write:kgdb_consdev_write ++}; ++static kdev_t ++kgdb_console_device(struct console *c) ++{ ++ return MKDEV(TTYAUX_MAJOR, 1); ++} ++ ++/* ++ * This routine gets called from the serial stub in the i386/lib ++ * This is so it is done late in bring up (just before the console open). ++ */ ++void ++kgdb_console_finit(void) ++{ ++ if (kgdb_console_enabled) { ++ char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); ++ char *cp = cptr; ++ while (*cptr && *cptr != '(') ++ cptr++; ++ *cptr = 0; ++ unregister_chrdev(TTYAUX_MAJOR, cp); ++ register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); ++ } ++} ++#endif ++#endif ++#ifdef CONFIG_KGDB_TS ++#include /* time stamp code */ ++#include /* in_interrupt */ ++#ifdef CONFIG_KGDB_TS_64 ++#define DATA_POINTS 64 ++#endif ++#ifdef CONFIG_KGDB_TS_128 ++#define DATA_POINTS 128 ++#endif ++#ifdef CONFIG_KGDB_TS_256 ++#define DATA_POINTS 256 ++#endif ++#ifdef CONFIG_KGDB_TS_512 ++#define DATA_POINTS 512 ++#endif ++#ifdef CONFIG_KGDB_TS_1024 ++#define DATA_POINTS 1024 ++#endif ++#ifndef DATA_POINTS ++#define DATA_POINTS 128 /* must be a power of two */ ++#endif ++#define INDEX_MASK (DATA_POINTS - 1) ++#if (INDEX_MASK & DATA_POINTS) ++#error "CONFIG_KGDB_TS_COUNT must be a power of 2" ++#endif ++struct kgdb_and_then_struct { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ struct task_struct *task; ++ long long at_time; ++ int from_ln; ++ char *in_src; ++ void *from; ++ int *with_shpf; ++ int data0; ++ int data1; ++}; ++struct kgdb_and_then_struct2 { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ struct task_struct *task; ++ long long at_time; ++ int from_ln; ++ char *in_src; ++ void *from; ++ int *with_shpf; ++ struct task_struct *t1; ++ struct task_struct *t2; ++}; ++struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; ++ ++struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; ++int kgdb_and_then_count; ++ ++void ++kgdb_tstamp(int line, char *source, int data0, int data1) ++{ ++ static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; ++ int flags; ++ kgdb_local_irq_save(flags); ++ spin_lock(&ts_spin); ++ rdtscll(kgdb_and_then->at_time); ++#ifdef CONFIG_SMP ++ kgdb_and_then->on_cpu = smp_processor_id(); ++#endif ++ kgdb_and_then->task = current; ++ kgdb_and_then->from_ln = line; ++ kgdb_and_then->in_src = source; ++ kgdb_and_then->from = __builtin_return_address(0); ++ kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | ++ (preempt_count() << 8)); ++ kgdb_and_then->data0 = data0; ++ kgdb_and_then->data1 = data1; ++ kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; ++ spin_unlock(&ts_spin); ++ kgdb_local_irq_restore(flags); ++#ifdef CONFIG_PREEMPT ++ ++#endif ++ return; ++} ++#endif ++typedef int gdb_debug_hook(int exceptionVector, ++ int signo, int err_code, struct pt_regs *linux_regs); ++gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ ++ ++static int __init kgdb_opt_kgdbeth(char *str) ++{ ++ kgdb_eth = simple_strtoul(str, NULL, 10); ++ return 1; ++} ++ ++static int __init kgdb_opt_kgdbeth_remoteip(char *str) ++{ ++ kgdb_remoteip = in_aton(str); ++ return 1; ++} ++ ++static int __init kgdb_opt_kgdbeth_listenport(char *str) ++{ ++ kgdb_listenport = simple_strtoul(str, NULL, 10); ++ kgdb_sendport = kgdb_listenport - 1; ++ return 1; ++} ++ ++static int __init parse_hw_addr(char *str, unsigned char *addr) ++{ ++ int i; ++ char *p; ++ ++ p = str; ++ i = 0; ++ while(1) ++ { ++ unsigned int c; ++ ++ sscanf(p, "%x:", &c); ++ addr[i++] = c; ++ while((*p != 0) && (*p != ':')) { ++ p++; ++ } ++ if (*p == 0) { ++ break; ++ } ++ p++; ++ } ++ ++ return 1; ++} ++ ++static int __init kgdb_opt_kgdbeth_remotemac(char *str) ++{ ++ return parse_hw_addr(str, kgdb_remotemac); ++} ++static int __init kgdb_opt_kgdbeth_localmac(char *str) ++{ ++ return parse_hw_addr(str, kgdb_localmac); ++} ++ ++ ++__setup("gdbeth=", kgdb_opt_kgdbeth); ++__setup("gdbeth_remoteip=", kgdb_opt_kgdbeth_remoteip); ++__setup("gdbeth_listenport=", kgdb_opt_kgdbeth_listenport); ++__setup("gdbeth_remotemac=", kgdb_opt_kgdbeth_remotemac); ++__setup("gdbeth_localmac=", kgdb_opt_kgdbeth_localmac); ++ +--- linux-2.6.0-test6/arch/i386/kernel/ldt.c 2003-08-22 19:23:40.000000000 -0700 ++++ 25/arch/i386/kernel/ldt.c 2003-10-05 00:36:48.000000000 -0700 +@@ -2,7 +2,7 @@ + * linux/kernel/ldt.c + * + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds +- * Copyright (C) 1999 Ingo Molnar ++ * Copyright (C) 1999, 2003 Ingo Molnar + */ + + #include +@@ -18,6 +18,8 @@ + #include + #include + #include ++#include ++#include + + #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ + static void flush_ldt(void *null) +@@ -29,34 +31,31 @@ static void flush_ldt(void *null) + + static int alloc_ldt(mm_context_t *pc, int mincount, int reload) + { +- void *oldldt; +- void *newldt; +- int oldsize; ++ int oldsize, newsize, i; + + if (mincount <= pc->size) + return 0; ++ /* ++ * LDT got larger - reallocate if necessary. ++ */ + oldsize = pc->size; + mincount = (mincount+511)&(~511); +- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) +- newldt = vmalloc(mincount*LDT_ENTRY_SIZE); +- else +- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); +- +- if (!newldt) +- return -ENOMEM; +- +- if (oldsize) +- memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); +- oldldt = pc->ldt; +- memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); +- pc->ldt = newldt; +- wmb(); ++ newsize = mincount*LDT_ENTRY_SIZE; ++ for (i = 0; i < newsize; i += PAGE_SIZE) { ++ int nr = i/PAGE_SIZE; ++ BUG_ON(i >= 64*1024); ++ if (!pc->ldt_pages[nr]) { ++ pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER); ++ if (!pc->ldt_pages[nr]) ++ return -ENOMEM; ++ clear_highpage(pc->ldt_pages[nr]); ++ } ++ } + pc->size = mincount; +- wmb(); +- + if (reload) { + #ifdef CONFIG_SMP + cpumask_t mask; ++ + preempt_disable(); + load_LDT(pc); + mask = cpumask_of_cpu(smp_processor_id()); +@@ -67,21 +66,20 @@ static int alloc_ldt(mm_context_t *pc, i + load_LDT(pc); + #endif + } +- if (oldsize) { +- if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) +- vfree(oldldt); +- else +- kfree(oldldt); +- } + return 0; + } + + static inline int copy_ldt(mm_context_t *new, mm_context_t *old) + { +- int err = alloc_ldt(new, old->size, 0); +- if (err < 0) ++ int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE; ++ ++ err = alloc_ldt(new, size, 0); ++ if (err < 0) { ++ new->size = 0; + return err; +- memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); ++ } ++ for (i = 0; i < nr_pages; i++) ++ copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0); + return 0; + } + +@@ -96,6 +94,7 @@ int init_new_context(struct task_struct + + init_MUTEX(&mm->context.sem); + mm->context.size = 0; ++ memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES); + old_mm = current->mm; + if (old_mm && old_mm->context.size > 0) { + down(&old_mm->context.sem); +@@ -107,23 +106,21 @@ int init_new_context(struct task_struct + + /* + * No need to lock the MM as we are the last user ++ * Do not touch the ldt register, we are already ++ * in the next thread. + */ + void destroy_context(struct mm_struct *mm) + { +- if (mm->context.size) { +- if (mm == current->active_mm) +- clear_LDT(); +- if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) +- vfree(mm->context.ldt); +- else +- kfree(mm->context.ldt); +- mm->context.size = 0; +- } ++ int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; ++ ++ for (i = 0; i < nr_pages; i++) ++ __free_page(mm->context.ldt_pages[i]); ++ mm->context.size = 0; + } + + static int read_ldt(void __user * ptr, unsigned long bytecount) + { +- int err; ++ int err, i; + unsigned long size; + struct mm_struct * mm = current->mm; + +@@ -138,8 +135,25 @@ static int read_ldt(void __user * ptr, u + size = bytecount; + + err = 0; +- if (copy_to_user(ptr, mm->context.ldt, size)) +- err = -EFAULT; ++ /* ++ * This is necessary just in case we got here straight from a ++ * context-switch where the ptes were set but no tlb flush ++ * was done yet. We rather avoid doing a TLB flush in the ++ * context-switch path and do it here instead. ++ */ ++ __flush_tlb_global(); ++ ++ for (i = 0; i < size; i += PAGE_SIZE) { ++ int nr = i / PAGE_SIZE, bytes; ++ char *kaddr = kmap(mm->context.ldt_pages[nr]); ++ ++ bytes = size - i; ++ if (bytes > PAGE_SIZE) ++ bytes = PAGE_SIZE; ++ if (copy_to_user(ptr + i, kaddr, size - i)) ++ err = -EFAULT; ++ kunmap(mm->context.ldt_pages[nr]); ++ } + up(&mm->context.sem); + if (err < 0) + return err; +@@ -158,7 +172,7 @@ static int read_default_ldt(void __user + + err = 0; + address = &default_ldt[0]; +- size = 5*sizeof(struct desc_struct); ++ size = 5*LDT_ENTRY_SIZE; + if (size > bytecount) + size = bytecount; + +@@ -200,7 +214,15 @@ static int write_ldt(void __user * ptr, + goto out_unlock; + } + +- lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); ++ /* ++ * No rescheduling allowed from this point to the install. ++ * ++ * We do a TLB flush for the same reason as in the read_ldt() path. ++ */ ++ preempt_disable(); ++ __flush_tlb_global(); ++ lp = (__u32 *) ((ldt_info.entry_number << 3) + ++ (char *) __kmap_atomic_vaddr(KM_LDT_PAGE0)); + + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { +@@ -221,6 +243,7 @@ install: + *lp = entry_1; + *(lp+1) = entry_2; + error = 0; ++ preempt_enable(); + + out_unlock: + up(&mm->context.sem); +@@ -248,3 +271,26 @@ asmlinkage int sys_modify_ldt(int func, + } + return ret; + } ++ ++/* ++ * load one particular LDT into the current CPU ++ */ ++void load_LDT_nolock(mm_context_t *pc, int cpu) ++{ ++ struct page **pages = pc->ldt_pages; ++ int count = pc->size; ++ int nr_pages, i; ++ ++ if (likely(!count)) { ++ pages = &default_ldt_page; ++ count = 5; ++ } ++ nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; ++ ++ for (i = 0; i < nr_pages; i++) { ++ __kunmap_atomic_type(KM_LDT_PAGE0 - i); ++ __kmap_atomic(pages[i], KM_LDT_PAGE0 - i); ++ } ++ set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count); ++ load_LDT_desc(); ++} +--- linux-2.6.0-test6/arch/i386/kernel/Makefile 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/kernel/Makefile 2003-10-05 00:36:48.000000000 -0700 +@@ -7,13 +7,14 @@ extra-y := head.o init_task.o vmlinux.ld + obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ + ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ + pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ +- doublefault.o ++ doublefault.o efi.o efi_stub.o entry_trampoline.o + + obj-y += cpu/ + obj-y += timers/ + obj-$(CONFIG_ACPI_BOOT) += acpi/ + obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o + obj-$(CONFIG_MCA) += mca.o ++obj-$(CONFIG_KGDB) += kgdb_stub.o + obj-$(CONFIG_X86_MSR) += msr.o + obj-$(CONFIG_X86_CPUID) += cpuid.o + obj-$(CONFIG_MICROCODE) += microcode.o +--- linux-2.6.0-test6/arch/i386/kernel/mca.c 2003-08-22 19:23:40.000000000 -0700 ++++ 25/arch/i386/kernel/mca.c 2003-10-05 00:36:10.000000000 -0700 +@@ -132,7 +132,9 @@ struct resource mca_standard_resources[] + #define MCA_STANDARD_RESOURCES (sizeof(mca_standard_resources)/sizeof(struct resource)) + + /** +- * mca_read_pos - read the POS registers into a memory buffer ++ * mca_read_and_store_pos - read the POS registers into a memory buffer ++ * @pos: a char pointer to 8 bytes, contains the POS register value on ++ * successful return + * + * Returns 1 if a card actually exists (i.e. the pos isn't + * all 0xff) or 0 otherwise +--- linux-2.6.0-test6/arch/i386/kernel/mpparse.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/kernel/mpparse.c 2003-10-05 00:36:48.000000000 -0700 +@@ -169,7 +169,7 @@ void __init MP_processor_info (struct mp + + if (num_processors >= NR_CPUS) { + printk(KERN_WARNING "NR_CPUS limit of %i reached. Cannot " +- "boot CPU(apicid 0x%d).\n", NR_CPUS, m->mpc_apicid); ++ "boot CPU(apicid 0x%x).\n", NR_CPUS, m->mpc_apicid); + return; + } + num_processors++; +@@ -616,6 +616,31 @@ static inline void __init construct_defa + } + } + ++#ifdef CONFIG_X86_IO_APIC ++/* irq_vector must be have an entry for all RTEs of all I/O APICs. */ ++void __init alloc_irq_vector_array(void) ++{ ++ int total = 0; ++ int idx; ++ union IO_APIC_reg_01 reg_01; ++ ++ /* The I/O APIC fixmaps aren't inited yet, so use the first one. */ ++ for (idx = 0; idx < nr_ioapics; idx++) { ++ set_fixmap_nocache(FIX_IO_APIC_BASE_0, mp_ioapics[idx].mpc_apicaddr); ++ reg_01.raw = io_apic_read(0, 1); ++ total += reg_01.bits.entries + 1; ++ } ++ ++ /* Always alloc at least NR_IRQS vectors. */ ++ nr_irqs = max(total, NR_IRQS); ++ irq_vector = (u8 *) alloc_bootmem(nr_irqs); ++ memset(irq_vector, 0, nr_irqs); ++ irq_vector[0] = FIRST_DEVICE_VECTOR; ++} ++#else ++void __init alloc_irq_vector_array(void) { } ++#endif /* CONFIG_X86_IO_APIC */ ++ + static struct intel_mp_floating *mpf_found; + + /* +@@ -633,6 +658,7 @@ void __init get_smp_config (void) + */ + if (acpi_lapic && acpi_ioapic) { + printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); ++ alloc_irq_vector_array(); + return; + } + else if (acpi_lapic) +@@ -661,10 +687,11 @@ void __init get_smp_config (void) + * Read the physical hardware table. Anything here will + * override the defaults. + */ +- if (!smp_read_mpc((void *)mpf->mpf_physptr)) { ++ if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) { + smp_found_config = 0; + printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); + printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); ++ alloc_irq_vector_array(); + return; + } + /* +@@ -688,6 +715,7 @@ void __init get_smp_config (void) + } else + BUG(); + ++ alloc_irq_vector_array(); + printk(KERN_INFO "Processors: %d\n", num_processors); + /* + * Only use the first configuration found. +@@ -830,7 +858,7 @@ void __init mp_register_lapic ( + MP_processor_info(&processor); + } + +-#ifdef CONFIG_X86_IO_APIC ++#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) + + #define MP_ISA_BUS 0 + #define MP_MAX_IOAPIC_PIN 127 +@@ -1019,10 +1047,6 @@ void __init mp_config_acpi_legacy_irqs ( + } + } + +-#ifdef CONFIG_ACPI +- +-/* Ensure the ACPI SCI interrupt level is active low, edge-triggered */ +- + extern FADT_DESCRIPTOR acpi_fadt; + + void __init mp_config_ioapic_for_sci(int irq) +@@ -1031,6 +1055,7 @@ void __init mp_config_ioapic_for_sci(int + int ioapic_pin; + struct acpi_table_madt *madt; + struct acpi_table_int_src_ovr *entry = NULL; ++ acpi_interrupt_flags flags; + void *madt_end; + acpi_status status; + +@@ -1049,32 +1074,37 @@ void __init mp_config_ioapic_for_sci(int + + while ((void *) entry < madt_end) { + if (entry->header.type == ACPI_MADT_INT_SRC_OVR && +- acpi_fadt.sci_int == entry->bus_irq) { +- /* +- * See the note at the end of ACPI 2.0b section +- * 5.2.10.8 for what this is about. +- */ +- if (entry->bus_irq != entry->global_irq) { +- acpi_fadt.sci_int = entry->global_irq; +- irq = entry->global_irq; +- break; +- } +- else +- return; +- } +- ++ acpi_fadt.sci_int == entry->bus_irq) ++ goto found; ++ + entry = (struct acpi_table_int_src_ovr *) + ((unsigned long) entry + entry->header.length); + } + } ++ /* ++ * Although the ACPI spec says that the SCI should be level/low ++ * don't reprogram it unless there is an explicit MADT OVR entry ++ * instructing us to do so -- otherwise we break Tyan boards which ++ * have the SCI wired edge/high but no MADT OVR. ++ */ ++ return; ++ ++found: ++ /* ++ * See the note at the end of ACPI 2.0b section ++ * 5.2.10.8 for what this is about. ++ */ ++ flags = entry->flags; ++ acpi_fadt.sci_int = entry->global_irq; ++ irq = entry->global_irq; + + ioapic = mp_find_ioapic(irq); + + ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start; + +- io_apic_set_pci_routing(ioapic, ioapic_pin, irq, 1, 1); // Active low, level triggered ++ io_apic_set_pci_routing(ioapic, ioapic_pin, irq, ++ (flags.trigger >> 1) , (flags.polarity >> 1)); + } +-#endif /* CONFIG_ACPI */ + + #ifdef CONFIG_ACPI_PCI + +@@ -1110,8 +1140,10 @@ void __init mp_parse_prt (void) + } + + /* Don't set up the ACPI SCI because it's already set up */ +- if (acpi_fadt.sci_int == irq) ++ if (acpi_fadt.sci_int == irq) { ++ entry->irq = irq; /*we still need to set entry's irq*/ + continue; ++ } + + ioapic = mp_find_ioapic(irq); + if (ioapic < 0) +@@ -1136,15 +1168,19 @@ void __init mp_parse_prt (void) + if ((1<irq = irq; ++ if (use_pci_vector() && !platform_legacy_irq(irq)) ++ irq = IO_APIC_VECTOR(irq); ++ entry->irq = irq; + continue; + } + + mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<irq = irq; +- ++ if (!io_apic_set_pci_routing(ioapic, ioapic_pin, irq, edge_level, active_high_low)) { ++ if (use_pci_vector() && !platform_legacy_irq(irq)) ++ irq = IO_APIC_VECTOR(irq); ++ entry->irq = irq; ++ } + printk(KERN_DEBUG "%02x:%02x:%02x[%c] -> %d-%d -> IRQ %d\n", + entry->id.segment, entry->id.bus, + entry->id.device, ('A' + entry->pin), +@@ -1154,5 +1190,5 @@ void __init mp_parse_prt (void) + } + + #endif /*CONFIG_ACPI_PCI*/ +-#endif /* CONFIG_X86_IO_APIC */ ++#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/ + #endif /*CONFIG_ACPI_BOOT*/ +--- linux-2.6.0-test6/arch/i386/kernel/nmi.c 2003-08-08 22:55:10.000000000 -0700 ++++ 25/arch/i386/kernel/nmi.c 2003-10-05 00:33:38.000000000 -0700 +@@ -31,7 +31,16 @@ + #include + #include + ++#ifdef CONFIG_KGDB ++#include ++#ifdef CONFIG_SMP ++unsigned int nmi_watchdog = NMI_IO_APIC; ++#else ++unsigned int nmi_watchdog = NMI_LOCAL_APIC; ++#endif ++#else + unsigned int nmi_watchdog = NMI_NONE; ++#endif + static unsigned int nmi_hz = HZ; + unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ + extern void show_registers(struct pt_regs *regs); +@@ -408,6 +417,9 @@ void touch_nmi_watchdog (void) + for (i = 0; i < NR_CPUS; i++) + alert_counter[i] = 0; + } ++#ifdef CONFIG_KGDB ++int tune_watchdog = 5*HZ; ++#endif + + void nmi_watchdog_tick (struct pt_regs * regs) + { +@@ -421,12 +433,24 @@ void nmi_watchdog_tick (struct pt_regs * + + sum = irq_stat[cpu].apic_timer_irqs; + ++#ifdef CONFIG_KGDB ++ if (! in_kgdb(regs) && last_irq_sums[cpu] == sum ) { ++ ++#else + if (last_irq_sums[cpu] == sum) { ++#endif + /* + * Ayiee, looks like this CPU is stuck ... + * wait a few IRQs (5 seconds) before doing the oops ... + */ + alert_counter[cpu]++; ++#ifdef CONFIG_KGDB ++ if (alert_counter[cpu] == tune_watchdog) { ++ kgdb_handle_exception(2, SIGPWR, 0, regs); ++ last_irq_sums[cpu] = sum; ++ alert_counter[cpu] = 0; ++ } ++#endif + if (alert_counter[cpu] == 5*nmi_hz) { + spin_lock(&nmi_print_lock); + /* +--- linux-2.6.0-test6/arch/i386/kernel/process.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/kernel/process.c 2003-10-05 00:36:48.000000000 -0700 +@@ -47,6 +47,7 @@ + #include + #include + #include ++#include + #ifdef CONFIG_MATH_EMULATION + #include + #endif +@@ -298,6 +299,9 @@ void flush_thread(void) + struct task_struct *tsk = current; + + memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); ++#ifdef CONFIG_X86_HIGH_ENTRY ++ clear_thread_flag(TIF_DB7); ++#endif + memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); + /* + * Forget coprocessor state.. +@@ -311,9 +315,8 @@ void release_thread(struct task_struct * + if (dead_task->mm) { + // temporary debugging check + if (dead_task->mm->context.size) { +- printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", ++ printk("WARNING: dead process %8s still has LDT? <%d>\n", + dead_task->comm, +- dead_task->mm->context.ldt, + dead_task->mm->context.size); + BUG(); + } +@@ -348,7 +351,17 @@ int copy_thread(int nr, unsigned long cl + p->thread.esp = (unsigned long) childregs; + p->thread.esp0 = (unsigned long) (childregs+1); + ++ /* ++ * get the two stack pages, for the virtual stack. ++ * ++ * IMPORTANT: this code relies on the fact that the task ++ * structure is an 8K aligned piece of physical memory. ++ */ ++ p->thread.stack_page0 = virt_to_page((unsigned long)p->thread_info); ++ p->thread.stack_page1 = virt_to_page((unsigned long)p->thread_info + PAGE_SIZE); ++ + p->thread.eip = (unsigned long) ret_from_fork; ++ p->thread_info->real_stack = p->thread_info; + + savesegment(fs,p->thread.fs); + savesegment(gs,p->thread.gs); +@@ -500,10 +513,40 @@ struct task_struct * __switch_to(struct + + __unlazy_fpu(prev_p); + ++#ifdef CONFIG_X86_HIGH_ENTRY ++ /* ++ * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is ++ * needed because otherwise NMIs could interrupt the ++ * user-return code with a virtual stack and stale TLBs.) ++ */ ++ __kunmap_atomic_type(KM_VSTACK0); ++ __kunmap_atomic_type(KM_VSTACK1); ++ __kmap_atomic(next->stack_page0, KM_VSTACK0); ++ __kmap_atomic(next->stack_page1, KM_VSTACK1); ++ ++ /* ++ * NOTE: here we rely on the task being the stack as well ++ */ ++ next_p->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK0); ++ ++#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) ++ /* ++ * If next was preempted on entry from userspace to kernel, ++ * and now it's on a different cpu, we need to adjust %esp. ++ * This assumes that entry.S does not copy %esp while on the ++ * virtual stack (with interrupts enabled): which is so, ++ * except within __SWITCH_KERNELSPACE itself. ++ */ ++ if (unlikely(next->esp >= TASK_SIZE)) { ++ next->esp &= THREAD_SIZE - 1; ++ next->esp |= (unsigned long) next_p->thread_info->virtual_stack; ++ } ++#endif ++#endif + /* +- * Reload esp0, LDT and the page table pointer: ++ * Reload esp0: + */ +- load_esp0(tss, next->esp0); ++ load_esp0(tss, virtual_esp0(next_p)); + + /* + * Load the per-thread Thread-Local Storage descriptor. +--- linux-2.6.0-test6/arch/i386/kernel/reboot.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/kernel/reboot.c 2003-10-05 00:36:48.000000000 -0700 +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + #include + #include + #include "mach_reboot.h" +@@ -153,12 +154,11 @@ void machine_real_restart(unsigned char + CMOS_WRITE(0x00, 0x8f); + spin_unlock_irqrestore(&rtc_lock, flags); + +- /* Remap the kernel at virtual address zero, as well as offset zero +- from the kernel segment. This assumes the kernel segment starts at +- virtual address PAGE_OFFSET. */ +- +- memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, +- sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); ++ /* ++ * Remap the first 16 MB of RAM (which includes the kernel image) ++ * at virtual address zero: ++ */ ++ setup_identity_mappings(swapper_pg_dir, 0, 16*1024*1024); + + /* + * Use `swapper_pg_dir' as our page directory. +@@ -262,7 +262,12 @@ void machine_restart(char * __unused) + disable_IO_APIC(); + #endif + +- if(!reboot_thru_bios) { ++ if (!reboot_thru_bios) { ++ if (efi_enabled) { ++ efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, 0); ++ __asm__ __volatile__("lidt %0": :"m" (no_idt)); ++ __asm__ __volatile__("int3"); ++ } + /* rebooting needs to touch the page at absolute addr 0 */ + *((unsigned short *)__va(0x472)) = reboot_mode; + for (;;) { +@@ -272,6 +277,8 @@ void machine_restart(char * __unused) + __asm__ __volatile__("int3"); + } + } ++ if (efi_enabled) ++ efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, 0); + + machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); + } +@@ -282,6 +289,8 @@ void machine_halt(void) + + void machine_power_off(void) + { ++ if (efi_enabled) ++ efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, 0); + if (pm_power_off) + pm_power_off(); + } +--- linux-2.6.0-test6/arch/i386/kernel/setup.c 2003-09-27 18:57:43.000000000 -0700 ++++ 25/arch/i386/kernel/setup.c 2003-10-05 00:36:22.000000000 -0700 +@@ -36,6 +36,8 @@ + #include + #include + #include ++#include ++#include + #include