--- linux-2.6.0-test6/arch/alpha/Kconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/alpha/Kconfig 2003-10-05 00:36:11.000000000 -0700 @@ -471,21 +471,6 @@ config EISA bool depends on ALPHA_GENERIC || ALPHA_JENSEN || ALPHA_ALCOR || ALPHA_MIKASA || ALPHA_SABLE || ALPHA_LYNX || ALPHA_NORITAKE || ALPHA_RAWHIDE default y - ---help--- - The Extended Industry Standard Architecture (EISA) bus was - developed as an open alternative to the IBM MicroChannel bus. - - The EISA bus provided some of the features of the IBM MicroChannel - bus while maintaining backward compatibility with cards made for - the older ISA bus. The EISA bus saw limited use between 1988 and - 1995 when it was made obsolete by the PCI bus. - - Say Y here if you are building a kernel for an EISA-based machine. - - Otherwise, say N. - -config EISA_ALWAYS - def_bool EISA config SMP bool "Symmetric multi-processing support" --- linux-2.6.0-test6/arch/alpha/kernel/core_irongate.c 2003-06-14 12:18:25.000000000 -0700 +++ 25/arch/alpha/kernel/core_irongate.c 2003-10-05 00:33:23.000000000 -0700 @@ -391,7 +391,7 @@ irongate_ioremap(unsigned long addr, uns cur_gatt = phys_to_virt(GET_GATT(baddr) & ~1); pte = cur_gatt[GET_GATT_OFF(baddr)] & ~1; - if (__alpha_remap_area_pages(VMALLOC_VMADDR(vaddr), + if (__alpha_remap_area_pages(vaddr, pte, PAGE_SIZE, 0)) { printk("AGP ioremap: FAILED to map...\n"); vfree(area->addr); --- linux-2.6.0-test6/arch/alpha/kernel/core_marvel.c 2003-08-22 19:23:39.000000000 -0700 +++ 25/arch/alpha/kernel/core_marvel.c 2003-10-05 00:33:23.000000000 -0700 @@ -696,7 +696,7 @@ marvel_ioremap(unsigned long addr, unsig } pfn >>= 1; /* make it a true pfn */ - if (__alpha_remap_area_pages(VMALLOC_VMADDR(vaddr), + if (__alpha_remap_area_pages(vaddr, pfn << PAGE_SHIFT, PAGE_SIZE, 0)) { printk("FAILED to map...\n"); --- linux-2.6.0-test6/arch/alpha/kernel/core_titan.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/alpha/kernel/core_titan.c 2003-10-05 00:33:23.000000000 -0700 @@ -534,7 +534,7 @@ titan_ioremap(unsigned long addr, unsign } pfn >>= 1; /* make it a true pfn */ - if (__alpha_remap_area_pages(VMALLOC_VMADDR(vaddr), + if (__alpha_remap_area_pages(vaddr, pfn << PAGE_SHIFT, PAGE_SIZE, 0)) { printk("FAILED to map...\n"); --- linux-2.6.0-test6/arch/alpha/kernel/setup.c 2003-08-22 19:23:39.000000000 -0700 +++ 25/arch/alpha/kernel/setup.c 2003-10-05 00:36:11.000000000 -0700 @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef CONFIG_MAGIC_SYSRQ #include #include @@ -680,6 +681,11 @@ setup_arch(char **cmdline_p) /* Default root filesystem to sda2. */ ROOT_DEV = Root_SDA2; +#ifdef CONFIG_EISA + /* FIXME: only set this when we actually have EISA in this box? */ + EISA_bus = 1; +#endif + /* * Check ASN in HWRPB for validity, report if bad. * FIXME: how was this failing? Should we trust it instead, @@ -1203,7 +1209,7 @@ show_cpuinfo(struct seq_file *f, void *s platform_string(), nr_processors); #ifdef CONFIG_SMP - seq_printf(f, "cpus active\t\t: %d\n" + seq_printf(f, "cpus active\t\t: %ld\n" "cpu active mask\t\t: %016lx\n", num_online_cpus(), cpu_present_mask); #endif --- linux-2.6.0-test6/arch/alpha/kernel/smp.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/alpha/kernel/smp.c 2003-10-05 00:33:23.000000000 -0700 @@ -597,7 +597,7 @@ smp_cpus_done(unsigned int max_cpus) if (cpu_online(cpu)) bogosum += cpu_data[cpu].loops_per_jiffy; - printk(KERN_INFO "SMP: Total of %d processors activated " + printk(KERN_INFO "SMP: Total of %ld processors activated " "(%lu.%02lu BogoMIPS).\n", num_online_cpus(), (bogosum + 2500) / (500000/HZ), --- linux-2.6.0-test6/arch/alpha/kernel/time.c 2003-08-08 22:55:10.000000000 -0700 +++ 25/arch/alpha/kernel/time.c 2003-10-05 00:33:23.000000000 -0700 @@ -89,6 +89,16 @@ static inline __u32 rpcc(void) return result; } +/* + * Scheduler clock - returns current time in nanosec units. + * + * Copied from ARM code for expediency... ;-} + */ +unsigned long long sched_clock(void) +{ + return (unsigned long long)jiffies * (1000000000 / HZ); +} + /* * timer_interrupt() needs to keep up the real-time clock, @@ -239,8 +249,9 @@ validate_cc_value(unsigned long cc) * arch/i386/time.c. */ -#define CALIBRATE_LATCH (52 * LATCH) -#define CALIBRATE_TIME (52 * 1000020 / HZ) +#define PIC_TICK_RATE 1193180UL +#define CALIBRATE_LATCH 0xffff +#define TIMEOUT_COUNT 0x100000 static unsigned long __init calibrate_cc_with_pic(void) @@ -263,19 +274,15 @@ calibrate_cc_with_pic(void) cc = rpcc(); do { - count+=100; /* by 1 takes too long to timeout from 0 */ - } while ((inb(0x61) & 0x20) == 0 && count > 0); + count++; + } while ((inb(0x61) & 0x20) == 0 && count < TIMEOUT_COUNT); cc = rpcc() - cc; /* Error: ECTCNEVERSET or ECPUTOOFAST. */ - if (count <= 100) - return 0; - - /* Error: ECPUTOOSLOW. */ - if (cc <= CALIBRATE_TIME) + if (count <= 1 || count == TIMEOUT_COUNT) return 0; - return (cc * 1000000UL) / CALIBRATE_TIME; + return ((long)cc * PIC_TICK_RATE) / (CALIBRATE_LATCH + 1); } /* The Linux interpretation of the CMOS clock register contents: --- linux-2.6.0-test6/arch/alpha/mm/init.c 2003-07-27 12:14:38.000000000 -0700 +++ 25/arch/alpha/mm/init.c 2003-10-05 00:33:23.000000000 -0700 @@ -210,7 +210,8 @@ callback_init(void * kernel_end) /* Allocate one PGD and one PMD. In the case of SRM, we'll need these to actually remap the console. There is an assumption here that only one of each is needed, and this allows for 8MB. - Currently (late 1999), big consoles are still under 4MB. + On systems with larger consoles, additional pages will be + allocated as needed during the mapping process. In the case of not SRM, but not CONFIG_ALPHA_LARGE_VMALLOC, we need to allocate the PGD we use for vmalloc before we start @@ -237,6 +238,15 @@ callback_init(void * kernel_end) unsigned long pfn = crb->map[i].pa >> PAGE_SHIFT; crb->map[i].va = vaddr; for (j = 0; j < crb->map[i].count; ++j) { + /* Newer console's (especially on larger + systems) may require more pages of + PTEs. Grab additional pages as needed. */ + if (pmd != pmd_offset(pgd, vaddr)) { + memset(kernel_end, 0, PAGE_SIZE); + pmd = pmd_offset(pgd, vaddr); + pmd_set(pmd, (pte_t *)kernel_end); + kernel_end += PAGE_SIZE; + } set_pte(pte_offset_kernel(pmd, vaddr), pfn_pte(pfn, PAGE_KERNEL)); pfn++; --- linux-2.6.0-test6/arch/arm/boot/compressed/head.S 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/arm/boot/compressed/head.S 2003-10-05 00:33:23.000000000 -0700 @@ -477,6 +477,12 @@ proc_types: @ b __arm6_cache_off @ b __armv3_cache_flush + .word 0x00000000 @ old ARM ID + .word 0x0000f000 + mov pc, lr + mov pc, lr + mov pc, lr + .word 0x41007000 @ ARM7/710 .word 0xfff8fe00 b __arm7_cache_off @@ -489,6 +495,14 @@ proc_types: b __armv4_cache_off mov pc, lr + .word 0x00007000 @ ARM7 IDs + .word 0x0000f000 + mov pc, lr + mov pc, lr + mov pc, lr + + @ Everything from here on will be the new ID system. + .word 0x41129200 @ ARM920T .word 0xff00fff0 b __armv4_cache_on @@ -507,8 +521,16 @@ proc_types: b __armv4_cache_off b __armv4_cache_flush - .word 0x69050000 @ xscale - .word 0xffff0000 + @ These match on the architecture ID + + .word 0x00050000 @ ARMv5TE + .word 0x000f0000 + b __armv4_cache_on + b __armv4_cache_off + b __armv4_cache_flush + + .word 0x00060000 @ ARMv5TEJ + .word 0x000f0000 b __armv4_cache_on b __armv4_cache_off b __armv4_cache_flush --- linux-2.6.0-test6/arch/arm/Kconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/Kconfig 2003-10-05 00:33:23.000000000 -0700 @@ -239,7 +239,7 @@ config DISCONTIGMEM # Now handle the bus types config PCI - bool "PCI support" if ARCH_INTEGRATOR + bool "PCI support" if ARCH_INTEGRATOR_AP default y if ARCH_FTVPCI || ARCH_SHARK || FOOTBRIDGE_HOST || ARCH_IOP3XX help Find out whether you have a PCI motherboard. PCI is the name of a @@ -645,8 +645,6 @@ source "drivers/misc/Kconfig" source "drivers/usb/Kconfig" -source "net/bluetooth/Kconfig" - menu "Kernel hacking" --- linux-2.6.0-test6/arch/arm/kernel/apm.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/arm/kernel/apm.c 2003-10-05 00:33:23.000000000 -0700 @@ -179,13 +179,10 @@ static void queue_event(apm_event_t even wake_up_interruptible(&apm_waitqueue); } -/* defined in pm.c */ -extern int suspend(void); - static int apm_suspend(void) { struct list_head *l; - int err = suspend(); + int err = pm_suspend(PM_SUSPEND_MEM); /* * Anyone on the APM queues will think we're still suspended. --- linux-2.6.0-test6/arch/arm/kernel/entry-armv.S 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/kernel/entry-armv.S 2003-10-05 00:33:23.000000000 -0700 @@ -439,20 +439,25 @@ ENTRY(soft_irq_mask) .macro get_irqnr_and_base, irqnr, irqstat, base, tmp /* FIXME: should not be using soo many LDRs here */ - ldr \irqnr, =IO_ADDRESS(INTEGRATOR_IC_BASE) - ldr \irqstat, [\irqnr, #IRQ_STATUS] @ get masked status - ldr \irqnr, =IO_ADDRESS(INTEGRATOR_HDR_BASE) - ldr \irqnr, [\irqnr, #(INTEGRATOR_HDR_IC_OFFSET+IRQ_STATUS)] - orr \irqstat, \irqstat, \irqnr, lsl #INTEGRATOR_CM_INT0 + ldr \base, =IO_ADDRESS(INTEGRATOR_IC_BASE) + mov \irqnr, #IRQ_PIC_START + ldr \irqstat, [\base, #IRQ_STATUS] @ get masked status + ldr \base, =IO_ADDRESS(INTEGRATOR_HDR_BASE) + teq \irqstat, #0 + ldreq \irqstat, [\base, #(INTEGRATOR_HDR_IC_OFFSET+IRQ_STATUS)] + moveq \irqnr, #IRQ_CIC_START - mov \irqnr, #0 -1001: tst \irqstat, #1 +1001: tst \irqstat, #15 bne 1002f + add \irqnr, \irqnr, #4 + movs \irqstat, \irqstat, lsr #4 + bne 1001b +1002: tst \irqstat, #1 + bne 1003f add \irqnr, \irqnr, #1 - mov \irqstat, \irqstat, lsr #1 - cmp \irqnr, #22 - bcc 1001b -1002: /* EQ will be set if we reach 22 */ + movs \irqstat, \irqstat, lsr #1 + bne 1002b +1003: /* EQ will be set if no irqs pending */ .endm .macro irq_prio_table --- linux-2.6.0-test6/arch/arm/kernel/pm.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/arm/kernel/pm.c 2003-10-05 00:33:23.000000000 -0700 @@ -9,68 +9,18 @@ * sleep. */ #include +#include +#include #include -#include -#include #include #include -#include -#include - -/* - * Tell the linker that pm_do_suspend may not be present. - */ -extern int pm_do_suspend(void) __attribute__((weak)); - -int suspend(void) -{ - int ret; - - if (!pm_do_suspend) - return -ENOSYS; - - /* - * Suspend "legacy" devices. - */ - ret = pm_send_all(PM_SUSPEND, (void *)3); - if (ret != 0) - goto out; - - ret = device_suspend(3); - if (ret) - goto resume_legacy; - - local_irq_disable(); - leds_event(led_stop); - - sysdev_suspend(3); - - ret = pm_do_suspend(); - - sysdev_resume(); - - leds_event(led_start); - local_irq_enable(); - - device_resume(); - - resume_legacy: - pm_send_all(PM_RESUME, (void *)0); - - out: - return ret; -} - #ifdef CONFIG_SYSCTL /* * We really want this to die. It's a disgusting hack using unallocated * sysctl numbers. We should be using a real interface. */ -#include -#include - static int pm_sysctl_proc_handler(ctl_table *ctl, int write, struct file *filp, void *buffer, size_t *lenp) @@ -79,7 +29,7 @@ pm_sysctl_proc_handler(ctl_table *ctl, i printk("PM: task %s (pid %d) uses deprecated sysctl PM interface\n", current->comm, current->pid); if (write) - ret = suspend(); + ret = pm_suspend(PM_SUSPEND_MEM); return ret; } --- linux-2.6.0-test6/arch/arm/kernel/process.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/kernel/process.c 2003-10-05 00:33:23.000000000 -0700 @@ -117,12 +117,10 @@ __setup("reboot=", reboot_setup); void machine_halt(void) { - leds_event(led_halted); } void machine_power_off(void) { - leds_event(led_halted); if (pm_power_off) pm_power_off(); } --- linux-2.6.0-test6/arch/arm/kernel/setup.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/kernel/setup.c 2003-10-05 00:33:23.000000000 -0700 @@ -182,7 +182,7 @@ static const char *proc_arch[] = { "5", "5T", "5TE", - "?(8)", + "5TEJ", "?(9)", "?(10)", "?(11)", --- linux-2.6.0-test6/arch/arm/kernel/signal.c 2003-06-14 12:17:56.000000000 -0700 +++ 25/arch/arm/kernel/signal.c 2003-10-05 00:33:23.000000000 -0700 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -539,6 +540,11 @@ static int do_signal(sigset_t *oldset, s if (!user_mode(regs)) return 0; + if (current->flags & PF_FREEZE) { + refrigerator(0); + goto no_signal; + } + if (current->ptrace & PT_SINGLESTEP) ptrace_cancel_bpt(current); @@ -550,6 +556,7 @@ static int do_signal(sigset_t *oldset, s return 1; } + no_signal: /* * No signal to deliver to the process - restart the syscall. */ --- linux-2.6.0-test6/arch/arm/kernel/time.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/kernel/time.c 2003-10-05 00:33:23.000000000 -0700 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -72,8 +73,6 @@ unsigned long (*gettimeoffset)(void) = d */ unsigned long long sched_clock(void) { - unsigned long long this_offset; - return (unsigned long long)jiffies * (1000000000 / HZ); } @@ -137,6 +136,47 @@ static void dummy_leds_event(led_event_t void (*leds_event)(led_event_t) = dummy_leds_event; +static int leds_suspend(struct sys_device *dev, u32 state) +{ + leds_event(led_stop); + return 0; +} + +static int leds_resume(struct sys_device *dev) +{ + leds_event(led_start); + return 0; +} + +static int leds_shutdown(struct sys_device *dev) +{ + leds_event(led_halted); + return 0; +} + +static struct sysdev_class leds_sysclass = { + set_kset_name("leds"), + .shutdown = leds_shutdown, + .suspend = leds_suspend, + .resume = leds_resume, +}; + +static struct sys_device leds_device = { + .id = 0, + .cls = &leds_sysclass, +}; + +static int __init leds_init(void) +{ + int ret; + ret = sysdev_class_register(&leds_sysclass); + if (ret == 0) + ret = sys_device_register(&leds_device); + return ret; +} + +device_initcall(leds_init); + EXPORT_SYMBOL(leds_event); #endif --- linux-2.6.0-test6/arch/arm/kernel/traps.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/arm/kernel/traps.c 2003-10-05 00:33:23.000000000 -0700 @@ -212,10 +212,10 @@ NORET_TYPE void die(const char *str, str printk("CPU: %d\n", smp_processor_id()); show_regs(regs); printk("Process %s (pid: %d, stack limit = 0x%p)\n", - current->comm, current->pid, tsk->thread_info + 1); + tsk->comm, tsk->pid, tsk->thread_info + 1); if (!user_mode(regs) || in_interrupt()) { - dump_mem("Stack: ", (unsigned long)(regs + 1), 8192+(unsigned long)tsk->thread_info); + dump_mem("Stack: ", regs->ARM_sp, 8192+(unsigned long)tsk->thread_info); dump_backtrace(regs, tsk); dump_instr(regs); } --- linux-2.6.0-test6/arch/arm/lib/io-readsl-armv4.S 2003-06-14 12:18:30.000000000 -0700 +++ 25/arch/arm/lib/io-readsl-armv4.S 2003-10-05 00:33:23.000000000 -0700 @@ -9,7 +9,6 @@ */ #include #include -#include /* * Note that some reads can be aligned on half-word boundaries. @@ -31,6 +30,10 @@ ENTRY(__raw_readsl) blt 4f bgt 6f +#ifndef __ARMEB__ + + /* little endian code */ + strh ip, [r1], #2 mov ip, ip, lsr #16 3: subs r2, r2, #1 @@ -68,3 +71,48 @@ ENTRY(__raw_readsl) strb ip, [r1] mov pc, lr +#else + + /* big endian code */ + + + mov r3, ip, lsr #16 + strh r3, [r1], #2 +3: mov r3, ip, lsl #16 + subs r2, r2, #1 + ldrne ip, [r0] + orrne r3, r3, ip, lsr #16 + strne r3, [r1], #4 + bne 3b + strh ip, [r1], #2 + mov pc, lr + +4: mov r3, ip, lsr #24 + strb r3, [r1], #1 + mov r3, ip, lsr #8 + strh r3, [r1], #2 +5: mov r3, ip, lsl #24 + subs r2, r2, #1 + ldrne ip, [r0] + orrne r3, r3, ip, lsr #8 + strne r3, [r1], #4 + bne 5b + strb ip, [r1], #1 + mov pc, lr + +6: mov r3, ip, lsr #24 + strb r3, [r1], #1 +7: mov r3, ip, lsl #8 + subs r2, r2, #1 + ldrne ip, [r0] + orrne r3, r3, ip, lsr #24 + strne r3, [r1], #4 + bne 7b + mov r3, ip, lsr #8 + strh r3, [r1], #2 + strb ip, [r1], #1 + mov pc, lr + +#endif + + --- linux-2.6.0-test6/arch/arm/lib/io-readsw-armv4.S 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/arm/lib/io-readsw-armv4.S 2003-10-05 00:33:23.000000000 -0700 @@ -9,7 +9,14 @@ */ #include #include -#include + + .macro pack, rd, hw1, hw2 +#ifndef __ARMEB__ + orr \rd, \hw1, \hw2, lsl #16 +#else + orr \rd, \hw2, \hw1, lsl #16 +#endif + .endm .insw_bad_alignment: adr r0, .insw_bad_align_msg @@ -41,19 +48,19 @@ ENTRY(__raw_readsw) .insw_8_lp: ldrh r3, [r0] ldrh r4, [r0] - orr r3, r3, r4, lsl #16 + pack r3, r3, r4 ldrh r4, [r0] ldrh r5, [r0] - orr r4, r4, r5, lsl #16 + pack r4, r4, r5 ldrh r5, [r0] ldrh ip, [r0] - orr r5, r5, ip, lsl #16 + pack r5, r5, ip ldrh ip, [r0] ldrh lr, [r0] - orr ip, ip, lr, lsl #16 + pack ip, ip, lr stmia r1!, {r3 - r5, ip} @@ -68,11 +75,11 @@ ENTRY(__raw_readsw) ldrh r3, [r0] ldrh r4, [r0] - orr r3, r3, r4, lsl #16 + pack r3, r3, r4 ldrh r4, [r0] ldrh ip, [r0] - orr r4, r4, ip, lsl #16 + pack r4, r4, ip stmia r1!, {r3, r4} @@ -81,7 +88,7 @@ ENTRY(__raw_readsw) ldrh r3, [r0] ldrh ip, [r0] - orr r3, r3, ip, lsl #16 + pack r3, r3, ip str r3, [r1], #4 --- linux-2.6.0-test6/arch/arm/lib/io-writesw-armv4.S 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/arm/lib/io-writesw-armv4.S 2003-10-05 00:33:23.000000000 -0700 @@ -9,7 +9,18 @@ */ #include #include -#include + + .macro outword, rd +#ifndef __ARMEB__ + strh \rd, [r0] + mov \rd, \rd, lsr #16 + strh \rd, [r0] +#else + mov lr, \rd, lsr #16 + strh lr, [r0] + strh \rd, [r0] +#endif + .endm .outsw_bad_alignment: adr r0, .outsw_bad_align_msg @@ -40,20 +51,10 @@ ENTRY(__raw_writesw) bmi .no_outsw_8 .outsw_8_lp: ldmia r1!, {r3, r4, r5, ip} - - strh r3, [r0] - mov r3, r3, lsr #16 - strh r3, [r0] - strh r4, [r0] - mov r4, r4, lsr #16 - strh r4, [r0] - strh r5, [r0] - mov r5, r5, lsr #16 - strh r5, [r0] - strh ip, [r0] - mov ip, ip, lsr #16 - strh ip, [r0] - + outword r3 + outword r4 + outword r5 + outword ip subs r2, r2, #8 bpl .outsw_8_lp @@ -64,20 +65,14 @@ ENTRY(__raw_writesw) beq .no_outsw_4 ldmia r1!, {r3, ip} - strh r3, [r0] - mov r3, r3, lsr #16 - strh r3, [r0] - strh ip, [r0] - mov ip, ip, lsr #16 - strh ip, [r0] + outword r3 + outword ip .no_outsw_4: tst r2, #2 beq .no_outsw_2 ldr r3, [r1], #4 - strh r3, [r0] - mov r3, r3, lsr #16 - strh r3, [r0] + outword r3 .no_outsw_2: tst r2, #1 ldrneh r3, [r1] --- linux-2.6.0-test6/arch/arm/lib/lib1funcs.S 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/lib/lib1funcs.S 2003-10-05 00:33:23.000000000 -0700 @@ -1,7 +1,12 @@ -@ libgcc1 routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) +/* + * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines + * + * Author: Nicolas Pitre + * - contributed to gcc-3.4 on Sep 30, 2003 + * - adapted for the Linux kernel on Oct 2, 2003 + */ -/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc. +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. This file is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -10,11 +15,12 @@ later version. In addition to the permissions in the GNU General Public License, the Free Software Foundation gives you unlimited permission to link the -compiled version of this file with other programs, and to distribute -those programs without any restriction coming from the use of this -file. (The General Public License restrictions do apply in other -respects; for example, they cover modification of the file, and -distribution when not linked into another program.) +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -26,286 +32,283 @@ along with this program; see the file CO the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* As a special exception, if you link this library with other files, - some of which are compiled with GCC, to produce an executable, - this library does not by itself cause the resulting executable - to be covered by the GNU General Public License. - This exception does not however invalidate any other reasons why - the executable file might be covered by the GNU General Public License. - */ -/* This code is derived from gcc 2.95.3 - * 29/07/01 Adapted for linux - * 27/03/03 Ian Molton Clean up CONFIG_CPU - */ #include #include -#include -#define RET mov -#define RETc(x) mov##x -#define RETCOND - -dividend .req r0 -divisor .req r1 -result .req r2 -overdone .req r2 -curbit .req r3 + +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 -ENTRY(__udivsi3) - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - mov result, #0 - cmp dividend, divisor - bcc Lgot_result_udivsi3 -1: +#else + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + @ Unless the divisor is very big, shift it up in multiples of @ four bits, since this is the amount of unwinding in the main @ division loop. Continue shifting until the divisor is @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc 1b +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b -2: @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc 2b - -3: - @ Test for possible subtractions, and note which bits - @ are done in the result. On the final pass, this may subtract - @ too much from the dividend, but the result will be ok, since the - @ "bit" will have been shifted out at the bottom. - cmp dividend, divisor - subcs dividend, dividend, divisor - orrcs result, result, curbit - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs result, result, curbit, lsr #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs result, result, curbit, lsr #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs result, result, curbit, lsr #3 - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne 3b -Lgot_result_udivsi3: - mov r0, result - RET pc, lr +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b -Ldiv0: - str lr, [sp, #-4]! - bl __div0 - mov r0, #0 @ about as wrong as it could be - ldmia sp!, {pc}RETCOND + mov \result, #0 -/* __umodsi3 ----------------------- */ +#endif + + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +.endm + + +.macro ARM_DIV2_ORDER divisor, order + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm + + +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else + + mov \order, #0 -ENTRY(__umodsi3) - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - cmp dividend, divisor - RETc(cc) pc, lr -1: @ Unless the divisor is very big, shift it up in multiples of @ four bits, since this is the amount of unwinding in the main @ division loop. Continue shifting until the divisor is @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc 1b +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b -2: @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc 2b - -3: - @ Test for possible subtractions. On the final pass, this may - @ subtract too much from the dividend, so keep track of which - @ subtractions are done, we can fix them up afterwards... - mov overdone, #0 - cmp dividend, divisor - subcs dividend, dividend, divisor - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs overdone, overdone, curbit, ror #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs overdone, overdone, curbit, ror #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs overdone, overdone, curbit, ror #3 - mov ip, curbit - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne 3b - - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - @ If we terminated early, because dividend became zero, - @ then none of the below will match, since the bit in ip will not be - @ in the bottom nibble. - ands overdone, overdone, #0xe0000000 - RETc(eq) pc, lr @ No fixups needed - tst overdone, ip, ror #3 - addne dividend, dividend, divisor, lsr #3 - tst overdone, ip, ror #2 - addne dividend, dividend, divisor, lsr #2 - tst overdone, ip, ror #1 - addne dividend, dividend, divisor, lsr #1 - RET pc, lr +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif + + @ Perform all needed substractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/substractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: +.endm + + +ENTRY(__udivsi3) + + subs r2, r1, #1 + moveq pc, lr + bcc Ldiv0 + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + mov pc, lr + +11: moveq r0, #1 + movne r0, #0 + mov pc, lr + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + mov pc, lr + + +ENTRY(__umodsi3) + + subs r2, r1, #1 @ compare divisor with 1 + bcc Ldiv0 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + movls pc, lr + + ARM_MOD_BODY r0, r1, r2, r3 + + mov pc, lr + ENTRY(__divsi3) - eor ip, dividend, divisor @ Save the sign of the result. - mov curbit, #1 - mov result, #0 - cmp divisor, #0 - rsbmi divisor, divisor, #0 @ Loops below use unsigned. + + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. beq Ldiv0 - cmp dividend, #0 - rsbmi dividend, dividend, #0 - cmp dividend, divisor - bcc Lgot_result_divsi3 + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f -1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc 1b + ARM_DIV_BODY r3, r1, r0, r2 -2: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc 2b - -3: - @ Test for possible subtractions, and note which bits - @ are done in the result. On the final pass, this may subtract - @ too much from the dividend, but the result will be ok, since the - @ "bit" will have been shifted out at the bottom. - cmp dividend, divisor - subcs dividend, dividend, divisor - orrcs result, result, curbit - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs result, result, curbit, lsr #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs result, result, curbit, lsr #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs result, result, curbit, lsr #3 - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne 3b -Lgot_result_divsi3: - mov r0, result cmp ip, #0 rsbmi r0, r0, #0 - RET pc, lr + mov pc, lr + +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + mov pc, lr + +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + mov pc, lr + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + mov pc, lr + ENTRY(__modsi3) - mov curbit, #1 - cmp divisor, #0 - rsbmi divisor, divisor, #0 @ Loops below use unsigned. + + cmp r1, #0 beq Ldiv0 - @ Need to save the sign of the dividend, unfortunately, we need - @ ip later on; this is faster than pushing lr and using that. - str dividend, [sp, #-4]! - cmp dividend, #0 - rsbmi dividend, dividend, #0 - cmp dividend, divisor - bcc Lgot_result_modsi3 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr + + +Ldiv0: + + str lr, [sp, #-4]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #4 -1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc 1b -2: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc 2b - -3: - @ Test for possible subtractions. On the final pass, this may - @ subtract too much from the dividend, so keep track of which - @ subtractions are done, we can fix them up afterwards... - mov overdone, #0 - cmp dividend, divisor - subcs dividend, dividend, divisor - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs overdone, overdone, curbit, ror #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs overdone, overdone, curbit, ror #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs overdone, overdone, curbit, ror #3 - mov ip, curbit - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne 3b - - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - @ If we terminated early, because dividend became zero, - @ then none of the below will match, since the bit in ip will not be - @ in the bottom nibble. - ands overdone, overdone, #0xe0000000 - beq Lgot_result_modsi3 - tst overdone, ip, ror #3 - addne dividend, dividend, divisor, lsr #3 - tst overdone, ip, ror #2 - addne dividend, dividend, divisor, lsr #2 - tst overdone, ip, ror #1 - addne dividend, dividend, divisor, lsr #1 -Lgot_result_modsi3: - ldr ip, [sp], #4 - cmp ip, #0 - rsbmi dividend, dividend, #0 - RET pc, lr --- linux-2.6.0-test6/arch/arm/mach-integrator/core.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/mach-integrator/core.c 2003-10-05 00:33:23.000000000 -0700 @@ -1,134 +1,59 @@ /* - * linux/arch/arm/mach-integrator/arch.c + * linux/arch/arm/mach-integrator/core.c * - * Copyright (C) 2000 Deep Blue Solutions Ltd + * Copyright (C) 2000-2003 Deep Blue Solutions Ltd * * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. */ #include #include #include -#include #include -#include -#include #include -#include #include -#include -#include #include -#include - -#include - -#include -#include -#include - -/* - * All IO addresses are mapped onto VA 0xFFFx.xxxx, where x.xxxx - * is the (PA >> 12). - * - * Setup a VA for the Integrator interrupt controller (for header #0, - * just for now). - */ -#define VA_IC_BASE IO_ADDRESS(INTEGRATOR_IC_BASE) -#define VA_SC_BASE IO_ADDRESS(INTEGRATOR_SC_BASE) -#define VA_CMIC_BASE IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_IC_OFFSET - -/* - * Logical Physical - * e8000000 40000000 PCI memory PHYS_PCI_MEM_BASE (max 512M) - * ec000000 61000000 PCI config space PHYS_PCI_CONFIG_BASE (max 16M) - * ed000000 62000000 PCI V3 regs PHYS_PCI_V3_BASE (max 64k) - * ee000000 60000000 PCI IO PHYS_PCI_IO_BASE (max 16M) - * ef000000 Cache flush - * f1000000 10000000 Core module registers - * f1100000 11000000 System controller registers - * f1200000 12000000 EBI registers - * f1300000 13000000 Counter/Timer - * f1400000 14000000 Interrupt controller - * f1500000 15000000 RTC - * f1600000 16000000 UART 0 - * f1700000 17000000 UART 1 - * f1a00000 1a000000 Debug LEDs - * f1b00000 1b000000 GPIO - */ -static struct map_desc integrator_io_desc[] __initdata = { - { IO_ADDRESS(INTEGRATOR_HDR_BASE), INTEGRATOR_HDR_BASE, SZ_4K, MT_DEVICE }, - { IO_ADDRESS(INTEGRATOR_SC_BASE), INTEGRATOR_SC_BASE, SZ_4K, MT_DEVICE }, - { IO_ADDRESS(INTEGRATOR_EBI_BASE), INTEGRATOR_EBI_BASE, SZ_4K, MT_DEVICE }, - { IO_ADDRESS(INTEGRATOR_CT_BASE), INTEGRATOR_CT_BASE, SZ_4K, MT_DEVICE }, - { IO_ADDRESS(INTEGRATOR_IC_BASE), INTEGRATOR_IC_BASE, SZ_4K, MT_DEVICE }, - { IO_ADDRESS(INTEGRATOR_RTC_BASE), INTEGRATOR_RTC_BASE, SZ_4K, MT_DEVICE }, - { IO_ADDRESS(INTEGRATOR_UART0_BASE), INTEGRATOR_UART0_BASE, SZ_4K, MT_DEVICE }, - { IO_ADDRESS(INTEGRATOR_UART1_BASE), INTEGRATOR_UART1_BASE, SZ_4K, MT_DEVICE }, - { IO_ADDRESS(INTEGRATOR_DBG_BASE), INTEGRATOR_DBG_BASE, SZ_4K, MT_DEVICE }, - { IO_ADDRESS(INTEGRATOR_GPIO_BASE), INTEGRATOR_GPIO_BASE, SZ_4K, MT_DEVICE }, - { PCI_MEMORY_VADDR, PHYS_PCI_MEM_BASE, SZ_16M, MT_DEVICE }, - { PCI_CONFIG_VADDR, PHYS_PCI_CONFIG_BASE, SZ_16M, MT_DEVICE }, - { PCI_V3_VADDR, PHYS_PCI_V3_BASE, SZ_64K, MT_DEVICE }, - { PCI_IO_VADDR, PHYS_PCI_IO_BASE, SZ_64K, MT_DEVICE } +static struct amba_device rtc_device = { + .dev = { + .bus_id = "mb:15", + }, + .res = { + .start = INTEGRATOR_RTC_BASE, + .end = INTEGRATOR_RTC_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + .irq = IRQ_RTCINT, + .periphid = 0x00041030, }; -static void __init integrator_map_io(void) -{ - iotable_init(integrator_io_desc, ARRAY_SIZE(integrator_io_desc)); -} - -#define ALLPCI ( (1 << IRQ_PCIINT0) | (1 << IRQ_PCIINT1) | (1 << IRQ_PCIINT2) | (1 << IRQ_PCIINT3) ) - -static void sc_mask_irq(unsigned int irq) -{ - writel(1 << irq, VA_IC_BASE + IRQ_ENABLE_CLEAR); -} - -static void sc_unmask_irq(unsigned int irq) -{ - writel(1 << irq, VA_IC_BASE + IRQ_ENABLE_SET); -} - -static struct irqchip sc_chip = { - .ack = sc_mask_irq, - .mask = sc_mask_irq, - .unmask = sc_unmask_irq, +static struct amba_device uart0_device = { + .dev = { + .bus_id = "mb:16", + }, + .res = { + .start = INTEGRATOR_UART0_BASE, + .end = INTEGRATOR_UART0_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + .irq = IRQ_UARTINT0, + .periphid = 0x0041010, }; -static void __init integrator_init_irq(void) -{ - unsigned int i; - - /* Disable all interrupts initially. */ - /* Do the core module ones */ - writel(-1, VA_CMIC_BASE + IRQ_ENABLE_CLEAR); - - /* do the header card stuff next */ - writel(-1, VA_IC_BASE + IRQ_ENABLE_CLEAR); - writel(-1, VA_IC_BASE + FIQ_ENABLE_CLEAR); - - for (i = 0; i < NR_IRQS; i++) { - if (((1 << i) && INTEGRATOR_SC_VALID_INT) != 0) { - set_irq_chip(i, &sc_chip); - set_irq_handler(i, do_level_IRQ); - set_irq_flags(i, IRQF_VALID | IRQF_PROBE); - } - } -} +static struct amba_device uart1_device = { + .dev = { + .bus_id = "mb:17", + }, + .res = { + .start = INTEGRATOR_UART1_BASE, + .end = INTEGRATOR_UART1_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + .irq = IRQ_UARTINT1, + .periphid = 0x0041010, +}; static struct amba_device kmi0_device = { .dev = { @@ -136,7 +61,7 @@ static struct amba_device kmi0_device = }, .res = { .start = KMI0_BASE, - .end = KMI0_BASE + KMI_SIZE - 1, + .end = KMI0_BASE + SZ_4K - 1, .flags = IORESOURCE_MEM, }, .irq = IRQ_KMIINT0, @@ -149,7 +74,7 @@ static struct amba_device kmi1_device = }, .res = { .start = KMI1_BASE, - .end = KMI1_BASE + KMI_SIZE - 1, + .end = KMI1_BASE + SZ_4K - 1, .flags = IORESOURCE_MEM, }, .irq = IRQ_KMIINT1, @@ -157,52 +82,23 @@ static struct amba_device kmi1_device = }; static struct amba_device *amba_devs[] __initdata = { + &rtc_device, + &uart0_device, + &uart1_device, &kmi0_device, &kmi1_device, }; -static int __init register_devices(void) +static int __init integrator_init(void) { - unsigned long sc_dec; int i; for (i = 0; i < ARRAY_SIZE(amba_devs); i++) { struct amba_device *d = amba_devs[i]; - amba_device_register(d, &iomem_resource); } - sc_dec = readl(VA_SC_BASE + INTEGRATOR_SC_DEC_OFFSET); - for (i = 0; i < 4; i++) { - struct lm_device *lmdev; - - if ((sc_dec & (16 << i)) == 0) - continue; - - lmdev = kmalloc(sizeof(struct lm_device), GFP_KERNEL); - if (!lmdev) - continue; - - memset(lmdev, 0, sizeof(struct lm_device)); - - lmdev->resource.start = 0xc0000000 + 0x10000000 * i; - lmdev->resource.end = lmdev->resource.start + 0x0fffffff; - lmdev->resource.flags = IORESOURCE_MEM; - lmdev->irq = IRQ_EXPINT0 + i; - lmdev->id = i; - - lm_device_register(lmdev); - } - return 0; } -arch_initcall(register_devices); - -MACHINE_START(INTEGRATOR, "ARM-Integrator") - MAINTAINER("ARM Ltd/Deep Blue Solutions Ltd") - BOOT_MEM(0x00000000, 0x16000000, 0xf1600000) - BOOT_PARAMS(0x00000100) - MAPIO(integrator_map_io) - INITIRQ(integrator_init_irq) -MACHINE_END +arch_initcall(integrator_init); --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/arm/mach-integrator/integrator_ap.c 2003-10-05 00:33:23.000000000 -0700 @@ -0,0 +1,294 @@ +/* + * linux/arch/arm/mach-integrator/integrator_ap.c + * + * Copyright (C) 2000-2003 Deep Blue Solutions Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + + +/* + * All IO addresses are mapped onto VA 0xFFFx.xxxx, where x.xxxx + * is the (PA >> 12). + * + * Setup a VA for the Integrator interrupt controller (for header #0, + * just for now). + */ +#define VA_IC_BASE IO_ADDRESS(INTEGRATOR_IC_BASE) +#define VA_SC_BASE IO_ADDRESS(INTEGRATOR_SC_BASE) +#define VA_EBI_BASE IO_ADDRESS(INTEGRATOR_EBI_BASE) +#define VA_CMIC_BASE IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_IC_OFFSET + +/* + * Logical Physical + * e8000000 40000000 PCI memory PHYS_PCI_MEM_BASE (max 512M) + * ec000000 61000000 PCI config space PHYS_PCI_CONFIG_BASE (max 16M) + * ed000000 62000000 PCI V3 regs PHYS_PCI_V3_BASE (max 64k) + * ee000000 60000000 PCI IO PHYS_PCI_IO_BASE (max 16M) + * ef000000 Cache flush + * f1000000 10000000 Core module registers + * f1100000 11000000 System controller registers + * f1200000 12000000 EBI registers + * f1300000 13000000 Counter/Timer + * f1400000 14000000 Interrupt controller + * f1500000 15000000 RTC + * f1600000 16000000 UART 0 + * f1700000 17000000 UART 1 + * f1a00000 1a000000 Debug LEDs + * f1b00000 1b000000 GPIO + */ + +static struct map_desc ap_io_desc[] __initdata = { + { IO_ADDRESS(INTEGRATOR_HDR_BASE), INTEGRATOR_HDR_BASE, SZ_4K, MT_DEVICE }, + { IO_ADDRESS(INTEGRATOR_SC_BASE), INTEGRATOR_SC_BASE, SZ_4K, MT_DEVICE }, + { IO_ADDRESS(INTEGRATOR_EBI_BASE), INTEGRATOR_EBI_BASE, SZ_4K, MT_DEVICE }, + { IO_ADDRESS(INTEGRATOR_CT_BASE), INTEGRATOR_CT_BASE, SZ_4K, MT_DEVICE }, + { IO_ADDRESS(INTEGRATOR_IC_BASE), INTEGRATOR_IC_BASE, SZ_4K, MT_DEVICE }, + { IO_ADDRESS(INTEGRATOR_RTC_BASE), INTEGRATOR_RTC_BASE, SZ_4K, MT_DEVICE }, + { IO_ADDRESS(INTEGRATOR_UART0_BASE), INTEGRATOR_UART0_BASE, SZ_4K, MT_DEVICE }, + { IO_ADDRESS(INTEGRATOR_UART1_BASE), INTEGRATOR_UART1_BASE, SZ_4K, MT_DEVICE }, + { IO_ADDRESS(INTEGRATOR_DBG_BASE), INTEGRATOR_DBG_BASE, SZ_4K, MT_DEVICE }, + { IO_ADDRESS(INTEGRATOR_GPIO_BASE), INTEGRATOR_GPIO_BASE, SZ_4K, MT_DEVICE }, + { PCI_MEMORY_VADDR, PHYS_PCI_MEM_BASE, SZ_16M, MT_DEVICE }, + { PCI_CONFIG_VADDR, PHYS_PCI_CONFIG_BASE, SZ_16M, MT_DEVICE }, + { PCI_V3_VADDR, PHYS_PCI_V3_BASE, SZ_64K, MT_DEVICE }, + { PCI_IO_VADDR, PHYS_PCI_IO_BASE, SZ_64K, MT_DEVICE } +}; + +static void __init ap_map_io(void) +{ + iotable_init(ap_io_desc, ARRAY_SIZE(ap_io_desc)); +} + +#define INTEGRATOR_SC_VALID_INT 0x003fffff + +static void sc_mask_irq(unsigned int irq) +{ + writel(1 << irq, VA_IC_BASE + IRQ_ENABLE_CLEAR); +} + +static void sc_unmask_irq(unsigned int irq) +{ + writel(1 << irq, VA_IC_BASE + IRQ_ENABLE_SET); +} + +static struct irqchip sc_chip = { + .ack = sc_mask_irq, + .mask = sc_mask_irq, + .unmask = sc_unmask_irq, +}; + +static void __init ap_init_irq(void) +{ + unsigned int i; + + /* Disable all interrupts initially. */ + /* Do the core module ones */ + writel(-1, VA_CMIC_BASE + IRQ_ENABLE_CLEAR); + + /* do the header card stuff next */ + writel(-1, VA_IC_BASE + IRQ_ENABLE_CLEAR); + writel(-1, VA_IC_BASE + FIQ_ENABLE_CLEAR); + + for (i = 0; i < NR_IRQS; i++) { + if (((1 << i) && INTEGRATOR_SC_VALID_INT) != 0) { + set_irq_chip(i, &sc_chip); + set_irq_handler(i, do_level_IRQ); + set_irq_flags(i, IRQF_VALID | IRQF_PROBE); + } + } +} + +#ifdef CONFIG_PM +static unsigned long ic_irq_enable; + +static int irq_suspend(struct sys_device *dev, u32 state) +{ + ic_irq_enable = readl(VA_IC_BASE + IRQ_ENABLE); + return 0; +} + +static int irq_resume(struct sys_device *dev) +{ + /* disable all irq sources */ + writel(-1, VA_CMIC_BASE + IRQ_ENABLE_CLEAR); + writel(-1, VA_IC_BASE + IRQ_ENABLE_CLEAR); + writel(-1, VA_IC_BASE + FIQ_ENABLE_CLEAR); + + writel(ic_irq_enable, VA_IC_BASE + IRQ_ENABLE_SET); + return 0; +} +#else +#define irq_suspend NULL +#define irq_resume NULL +#endif + +static struct sysdev_class irq_class = { + set_kset_name("irq"), + .suspend = irq_suspend, + .resume = irq_resume, +}; + +static struct sys_device irq_device = { + .id = 0, + .cls = &irq_class, +}; + +static int __init irq_init_sysfs(void) +{ + int ret = sysdev_class_register(&irq_class); + if (ret == 0) + ret = sys_device_register(&irq_device); + return ret; +} + +device_initcall(irq_init_sysfs); + +/* + * Flash handling. + */ +#define SC_CTRLC (VA_SC_BASE + INTEGRATOR_SC_CTRLC_OFFSET) +#define SC_CTRLS (VA_SC_BASE + INTEGRATOR_SC_CTRLS_OFFSET) +#define EBI_CSR1 (VA_EBI_BASE + INTEGRATOR_EBI_CSR1_OFFSET) +#define EBI_LOCK (VA_EBI_BASE + INTEGRATOR_EBI_LOCK_OFFSET) + +static int ap_flash_init(void) +{ + u32 tmp; + + writel(INTEGRATOR_SC_CTRL_nFLVPPEN | INTEGRATOR_SC_CTRL_nFLWP, SC_CTRLC); + + tmp = readl(EBI_CSR1) | INTEGRATOR_EBI_WRITE_ENABLE; + writel(tmp, EBI_CSR1); + + if (!(readl(EBI_CSR1) & INTEGRATOR_EBI_WRITE_ENABLE)) { + writel(0xa05f, EBI_LOCK); + writel(tmp, EBI_CSR1); + writel(0, EBI_LOCK); + } + return 0; +} + +static void ap_flash_exit(void) +{ + u32 tmp; + + writel(INTEGRATOR_SC_CTRL_nFLVPPEN | INTEGRATOR_SC_CTRL_nFLWP, SC_CTRLC); + + tmp = readl(EBI_CSR1) & ~INTEGRATOR_EBI_WRITE_ENABLE; + writel(tmp, EBI_CSR1); + + if (readl(EBI_CSR1) & INTEGRATOR_EBI_WRITE_ENABLE) { + writel(0xa05f, EBI_LOCK); + writel(tmp, EBI_CSR1); + writel(0, EBI_LOCK); + } +} + +static void ap_flash_set_vpp(int on) +{ + unsigned long reg = on ? SC_CTRLS : SC_CTRLC; + + writel(INTEGRATOR_SC_CTRL_nFLVPPEN, reg); +} + +static struct flash_platform_data ap_flash_data = { + .map_name = "cfi_probe", + .width = 4, + .init = ap_flash_init, + .exit = ap_flash_exit, + .set_vpp = ap_flash_set_vpp, +}; + +static struct resource cfi_flash_resource = { + .start = INTEGRATOR_FLASH_BASE, + .end = INTEGRATOR_FLASH_BASE + INTEGRATOR_FLASH_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +static struct platform_device cfi_flash_device = { + .name = "armflash", + .id = 0, + .dev = { + .platform_data = &ap_flash_data, + }, + .num_resources = 1, + .resource = &cfi_flash_resource, +}; + +static int __init ap_init(void) +{ + unsigned long sc_dec; + int i; + + platform_add_device(&cfi_flash_device); + + sc_dec = readl(VA_SC_BASE + INTEGRATOR_SC_DEC_OFFSET); + for (i = 0; i < 4; i++) { + struct lm_device *lmdev; + + if ((sc_dec & (16 << i)) == 0) + continue; + + lmdev = kmalloc(sizeof(struct lm_device), GFP_KERNEL); + if (!lmdev) + continue; + + memset(lmdev, 0, sizeof(struct lm_device)); + + lmdev->resource.start = 0xc0000000 + 0x10000000 * i; + lmdev->resource.end = lmdev->resource.start + 0x0fffffff; + lmdev->resource.flags = IORESOURCE_MEM; + lmdev->irq = IRQ_AP_EXPINT0 + i; + lmdev->id = i; + + lm_device_register(lmdev); + } + + return 0; +} + +arch_initcall(ap_init); + +MACHINE_START(INTEGRATOR, "ARM-Integrator") + MAINTAINER("ARM Ltd/Deep Blue Solutions Ltd") + BOOT_MEM(0x00000000, 0x16000000, 0xf1600000) + BOOT_PARAMS(0x00000100) + MAPIO(ap_map_io) + INITIRQ(ap_init_irq) +MACHINE_END --- linux-2.6.0-test6/arch/arm/mach-integrator/Kconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/mach-integrator/Kconfig 2003-10-05 00:33:23.000000000 -0700 @@ -1,8 +1,15 @@ menu "Integrator Options" depends on ARCH_INTEGRATOR +config ARCH_INTEGRATOR_AP + bool "Support Integrator/AP and Integrator/PP2 platforms" + help + Include support for the ARM(R) Integrator/AP and + Integrator/PP2 platforms. + config INTEGRATOR_IMPD1 tristate "Include support for Integrator/IM-PD1" + depends on ARCH_INTEGRATOR_AP help The IM-PD1 is an add-on logic module for the Integrator which allows ARM(R) Ltd PrimeCells to be developed and evaluated. --- linux-2.6.0-test6/arch/arm/mach-integrator/Makefile 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/mach-integrator/Makefile 2003-10-05 00:33:23.000000000 -0700 @@ -4,9 +4,10 @@ # Object file lists. -obj-y := core.o lm.o time.o +obj-y := core.o lm.o time.o +obj-$(CONFIG_ARCH_INTEGRATOR_AP) += integrator_ap.o -obj-$(CONFIG_LEDS) += leds.o -obj-$(CONFIG_PCI) += pci_v3.o pci.o +obj-$(CONFIG_LEDS) += leds.o +obj-$(CONFIG_PCI) += pci_v3.o pci.o obj-$(CONFIG_CPU_FREQ_INTEGRATOR) += cpu.o obj-$(CONFIG_INTEGRATOR_IMPD1) += impd1.o --- linux-2.6.0-test6/arch/arm/mach-integrator/pci.c 2003-06-14 12:18:51.000000000 -0700 +++ 25/arch/arm/mach-integrator/pci.c 2003-10-05 00:33:23.000000000 -0700 @@ -96,7 +96,7 @@ static u8 __init integrator_swizzle(stru } static int irq_tab[4] __initdata = { - IRQ_PCIINT0, IRQ_PCIINT1, IRQ_PCIINT2, IRQ_PCIINT3 + IRQ_AP_PCIINT0, IRQ_AP_PCIINT1, IRQ_AP_PCIINT2, IRQ_AP_PCIINT3 }; /* --- linux-2.6.0-test6/arch/arm/mach-integrator/pci_v3.c 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/arm/mach-integrator/pci_v3.c 2003-10-05 00:33:23.000000000 -0700 @@ -575,7 +575,7 @@ void __init pci_v3_preinit(void) /* * Grab the PCI error interrupt. */ - ret = request_irq(IRQ_V3INT, v3_irq, 0, "V3", NULL); + ret = request_irq(IRQ_AP_V3INT, v3_irq, 0, "V3", NULL); if (ret) printk(KERN_ERR "PCI: unable to grab PCI error " "interrupt: %d\n", ret); @@ -596,7 +596,7 @@ void __init pci_v3_postinit(void) v3_writeb(V3_LB_IMASK, 0x68); #if 0 - ret = request_irq(IRQ_LBUSTIMEOUT, lb_timeout, 0, "bus timeout", NULL); + ret = request_irq(IRQ_AP_LBUSTIMEOUT, lb_timeout, 0, "bus timeout", NULL); if (ret) printk(KERN_ERR "PCI: unable to grab local bus timeout " "interrupt: %d\n", ret); --- linux-2.6.0-test6/arch/arm/mach-pxa/irq.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/arm/mach-pxa/irq.c 2003-10-05 00:33:23.000000000 -0700 @@ -58,7 +58,19 @@ static int pxa_gpio_irq_type(unsigned in { int gpio, idx; - gpio = irq - ((irq >= IRQ_GPIO(2)) ? IRQ_GPIO(2) + 2 : IRQ_GPIO(0)); + gpio = IRQ_TO_GPIO(irq); + idx = gpio >> 5; + + if (type == IRQT_PROBE) { + /* Don't mess with enabled GPIOs using preconfigured edges or + GPIOs set to alternate function during probe */ + if ((GPIO_IRQ_rising_edge[idx] | GPIO_IRQ_falling_edge[idx]) & + GPIO_bit(gpio)) + return 0; + if (GAFR(gpio) & (0x3 << (((gpio) & 0xf)*2))) + return 0; + type = __IRQT_RISEDGE | __IRQT_FALEDGE; + } printk(KERN_DEBUG "IRQ%d (GPIO%d): ", irq, gpio); @@ -78,10 +90,8 @@ static int pxa_gpio_irq_type(unsigned in printk("edges\n"); - idx = gpio >> 5; GRER(gpio) = GPIO_IRQ_rising_edge[idx] & GPIO_IRQ_mask[idx]; GFER(gpio) = GPIO_IRQ_falling_edge[idx] & GPIO_IRQ_mask[idx]; - return 0; } --- linux-2.6.0-test6/arch/arm/mach-pxa/leds.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/mach-pxa/leds.c 2003-10-05 00:33:23.000000000 -0700 @@ -7,6 +7,7 @@ * * Copyright (c) 2001 Jeff Sutherland, Accelent Systems Inc. */ +#include #include #include --- linux-2.6.0-test6/arch/arm/mach-pxa/lubbock.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/arm/mach-pxa/lubbock.c 2003-10-05 00:33:23.000000000 -0700 @@ -78,7 +78,7 @@ static void __init lubbock_init_irq(void pxa_init_irq(); /* setup extra lubbock irqs */ - for (irq = LUBBOCK_IRQ(0); irq <= LUBBOCK_IRQ(5); irq++) { + for (irq = LUBBOCK_IRQ(0); irq <= LUBBOCK_LAST_IRQ; irq++) { set_irq_chip(irq, &lubbock_irq_chip); set_irq_handler(irq, do_level_IRQ); set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); @@ -124,6 +124,7 @@ static struct map_desc lubbock_io_desc[] { 0xf0000000, 0x08000000, 0x00100000, MT_DEVICE }, /* CPLD */ { 0xf1000000, 0x0c000000, 0x00100000, MT_DEVICE }, /* LAN91C96 IO */ { 0xf1100000, 0x0e000000, 0x00100000, MT_DEVICE }, /* LAN91C96 Attr */ + { 0xf4000000, 0x10000000, 0x00800000, MT_DEVICE }, /* SA1111 */ }; static void __init lubbock_map_io(void) --- linux-2.6.0-test6/arch/arm/mach-pxa/pm.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/mach-pxa/pm.c 2003-10-05 00:33:23.000000000 -0700 @@ -11,13 +11,14 @@ * modify it under the terms of the GNU General Public License. */ #include +#include +#include #include #include #include #include #include -#include /* @@ -60,13 +61,16 @@ enum { SLEEP_SAVE_START = 0, }; -int pm_do_suspend(void) +static int pxa_pm_enter(u32 state) { unsigned long sleep_save[SLEEP_SAVE_SIZE]; unsigned long checksum = 0; unsigned long delta; int i; + if (state != PM_SUSPEND_MEM) + return -EINVAL; + /* preserve current time */ delta = xtime.tv_sec - RCNR; @@ -194,3 +198,37 @@ unsigned long sleep_phys_sp(void *sp) { return virt_to_phys(sp); } + +/* + * Called after processes are frozen, but before we shut down devices. + */ +static int pxa_pm_prepare(u32 state) +{ + return 0; +} + +/* + * Called after devices are re-setup, but before processes are thawed. + */ +static int pxa_pm_finish(u32 state) +{ + return 0; +} + +/* + * Set to PM_DISK_FIRMWARE so we can quickly veto suspend-to-disk. + */ +static struct pm_ops pxa_pm_ops = { + .pm_disk_mode = PM_DISK_FIRMWARE, + .prepare = pxa_pm_prepare, + .enter = pxa_pm_enter, + .finish = pxa_pm_finish, +}; + +static int __init pxa_pm_init(void) +{ + pm_set_ops(&pxa_pm_ops); + return 0; +} + +late_initcall(pxa_pm_init); --- linux-2.6.0-test6/arch/arm/mach-sa1100/leds.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/arm/mach-sa1100/leds.c 2003-10-05 00:33:23.000000000 -0700 @@ -5,6 +5,7 @@ * * Copyright (C) 2001 Nicolas Pitre */ +#include #include #include --- linux-2.6.0-test6/arch/arm/mach-sa1100/pm.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/arm/mach-sa1100/pm.c 2003-10-05 00:33:23.000000000 -0700 @@ -22,6 +22,8 @@ * 2002-05-27: Nicolas Pitre Killed sleep.h and the kmalloced save array. * Storage is local on the stack now. */ +#include +#include #include #include @@ -54,11 +56,14 @@ enum { SLEEP_SAVE_SP = 0, }; -int pm_do_suspend(void) +static int sa11x0_pm_enter(u32 state) { unsigned long sleep_save[SLEEP_SAVE_SIZE]; unsigned long delta, gpio; + if (state != PM_SUSPEND_MEM) + return -EINVAL; + /* preserve current time */ delta = xtime.tv_sec - RCNR; gpio = GPLR; @@ -139,3 +144,37 @@ unsigned long sleep_phys_sp(void *sp) { return virt_to_phys(sp); } + +/* + * Called after processes are frozen, but before we shut down devices. + */ +static int sa11x0_pm_prepare(u32 state) +{ + return 0; +} + +/* + * Called after devices are re-setup, but before processes are thawed. + */ +static int sa11x0_pm_finish(u32 state) +{ + return 0; +} + +/* + * Set to PM_DISK_FIRMWARE so we can quickly veto suspend-to-disk. + */ +static struct pm_ops sa11x0_pm_ops = { + .pm_disk_mode = PM_DISK_FIRMWARE, + .prepare = sa11x0_pm_prepare, + .enter = sa11x0_pm_enter, + .finish = sa11x0_pm_finish, +}; + +static int __init sa11x0_pm_init(void) +{ + pm_set_ops(&sa11x0_pm_ops); + return 0; +} + +late_initcall(sa11x0_pm_init); --- linux-2.6.0-test6/arch/arm/Makefile 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/Makefile 2003-10-05 00:33:23.000000000 -0700 @@ -182,7 +182,6 @@ define archhelp echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/zImage)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' echo ' bootpImage - Combined zImage and initial RAM disk' - echo ' initrd - Create an initial image' echo ' install - Install uncompressed kernel' echo ' zinstall - Install compressed kernel' echo ' Install using (your) ~/bin/installkernel or' --- linux-2.6.0-test6/arch/arm/mm/discontig.c 2003-06-14 12:18:31.000000000 -0700 +++ 25/arch/arm/mm/discontig.c 2003-10-05 00:34:40.000000000 -0700 @@ -15,7 +15,7 @@ #include #include -#if NR_NODES != 4 +#if MAX_NUMNODES != 4 #error Fix Me Please #endif @@ -23,9 +23,9 @@ * Our node_data structure for discontiguous memory. */ -static bootmem_data_t node_bootmem_data[NR_NODES]; +static bootmem_data_t node_bootmem_data[MAX_NUMNODES]; -pg_data_t discontig_node_data[NR_NODES] = { +pg_data_t discontig_node_data[MAX_NUMNODES] = { { .bdata = &node_bootmem_data[0] }, { .bdata = &node_bootmem_data[1] }, { .bdata = &node_bootmem_data[2] }, --- linux-2.6.0-test6/arch/arm/mm/init.c 2003-07-10 18:50:30.000000000 -0700 +++ 25/arch/arm/mm/init.c 2003-10-05 00:34:40.000000000 -0700 @@ -33,12 +33,6 @@ #include #include -#ifndef CONFIG_DISCONTIGMEM -#define NR_NODES 1 -#else -#define NR_NODES 4 -#endif - #ifdef CONFIG_CPU_32 #define TABLE_OFFSET (PTRS_PER_PTE) #else @@ -178,7 +172,7 @@ find_memend_and_nodes(struct meminfo *mi { unsigned int i, bootmem_pages = 0, memend_pfn = 0; - for (i = 0; i < NR_NODES; i++) { + for (i = 0; i < MAX_NUMNODES; i++) { np[i].start = -1U; np[i].end = 0; np[i].bootmap_pages = 0; @@ -207,7 +201,7 @@ find_memend_and_nodes(struct meminfo *mi * we have, we're in trouble. (maybe we ought to * limit, instead of bugging?) */ - if (numnodes > NR_NODES) + if (numnodes > MAX_NUMNODES) BUG(); } @@ -365,7 +359,7 @@ static inline void free_bootmem_node_ban */ void __init bootmem_init(struct meminfo *mi) { - struct node_info node_info[NR_NODES], *np = node_info; + struct node_info node_info[MAX_NUMNODES], *np = node_info; unsigned int bootmap_pages, bootmap_pfn, map_pg; int node, initrd_node; --- linux-2.6.0-test6/arch/arm/mm/ioremap.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/arm/mm/ioremap.c 2003-10-05 00:33:23.000000000 -0700 @@ -150,7 +150,7 @@ __ioremap(unsigned long phys_addr, size_ if (!area) return NULL; addr = area->addr; - if (remap_area_pages(VMALLOC_VMADDR(addr), phys_addr, size, flags)) { + if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { vfree(addr); return NULL; } --- linux-2.6.0-test6/arch/cris/mm/ioremap.c 2003-07-10 18:50:30.000000000 -0700 +++ 25/arch/cris/mm/ioremap.c 2003-10-05 00:33:23.000000000 -0700 @@ -157,7 +157,7 @@ void * __ioremap(unsigned long phys_addr if (!area) return NULL; addr = area->addr; - if (remap_area_pages(VMALLOC_VMADDR(addr), phys_addr, size, flags)) { + if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { vfree(addr); return NULL; } --- linux-2.6.0-test6/arch/h8300/README 2003-06-14 12:18:26.000000000 -0700 +++ 25/arch/h8300/README 2003-10-05 00:33:23.000000000 -0700 @@ -16,7 +16,7 @@ H8S is planning. 3.H8MAX Under development - see http://www.strawbelly-linux.com (Japanese Only) + see http://www.strawberry-linux.com (Japanese Only) * Toolchain Version gcc-3.1 or higher and patch --- linux-2.6.0-test6/arch/i386/boot/setup.S 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/boot/setup.S 2003-10-05 00:36:48.000000000 -0700 @@ -162,7 +162,7 @@ cmd_line_ptr: .long 0 # (Header versio # can be located anywhere in # low memory 0x10000 or higher. -ramdisk_max: .long MAXMEM-1 # (Header version 0x0203 or later) +ramdisk_max: .long __MAXMEM-1 # (Header version 0x0203 or later) # The highest safe address for # the contents of an initrd --- linux-2.6.0-test6/arch/i386/Kconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/Kconfig 2003-10-05 00:36:48.000000000 -0700 @@ -397,6 +397,54 @@ config X86_OOSTORE depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 default y +config X86_4G + bool "4 GB kernel-space and 4 GB user-space virtual memory support" + help + This option is only useful for systems that have more than 1 GB + of RAM. + + The default kernel VM layout leaves 1 GB of virtual memory for + kernel-space mappings, and 3 GB of VM for user-space applications. + This option ups both the kernel-space VM and the user-space VM to + 4 GB. + + The cost of this option is additional TLB flushes done at + system-entry points that transition from user-mode into kernel-mode. + I.e. system calls and page faults, and IRQs that interrupt user-mode + code. There's also additional overhead to kernel operations that copy + memory to/from user-space. The overhead from this is hard to tell and + depends on the workload - it can be anything from no visible overhead + to 20-30% overhead. A good rule of thumb is to count with a runtime + overhead of 20%. + + The upside is the much increased kernel-space VM, which more than + quadruples the maximum amount of RAM supported. Kernels compiled with + this option boot on 64GB of RAM and still have more than 3.1 GB of + 'lowmem' left. Another bonus is that highmem IO bouncing decreases, + if used with drivers that still use bounce-buffers. + + There's also a 33% increase in user-space VM size - database + applications might see a boost from this. + + But the cost of the TLB flushes and the runtime overhead has to be + weighed against the bonuses offered by the larger VM spaces. The + dividing line depends on the actual workload - there might be 4 GB + systems that benefit from this option. Systems with less than 4 GB + of RAM will rarely see a benefit from this option - but it's not + out of question, the exact circumstances have to be considered. + +config X86_SWITCH_PAGETABLES + def_bool X86_4G + +config X86_4G_VM_LAYOUT + def_bool X86_4G + +config X86_UACCESS_INDIRECT + def_bool X86_4G + +config X86_HIGH_ENTRY + def_bool X86_4G + config HPET_TIMER bool "HPET Timer Support" help @@ -793,7 +841,8 @@ config HAVE_DEC_LOCK # Summit needs it only when NUMA is on config BOOT_IOREMAP bool - depends on ((X86_SUMMIT || X86_GENERICARCH) && NUMA) + depends on X86_PC +# depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA)) || X86_GENERICARCH default y endmenu @@ -1030,6 +1079,25 @@ config PCI_DIRECT depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS) default y +config PCI_USE_VECTOR + bool "Vector-based interrupt indexing" + depends on X86_LOCAL_APIC + default n + help + This replaces the current existing IRQ-based index interrupt scheme + with the vector-base index scheme. The advantages of vector base + over IRQ base are listed below: + 1) Support MSI implementation. + 2) Support future IOxAPIC hotplug + + Note that this enables MSI, Message Signaled Interrupt, on all + MSI capable device functions detected if users also install the + MSI patch. Message Signal Interrupt enables an MSI-capable + hardware device to send an inbound Memory Write on its PCI bus + instead of asserting IRQ signal on device IRQ pin. + + If you don't know what to do here, say N. + source "drivers/pci/Kconfig" config ISA @@ -1231,6 +1299,15 @@ config DEBUG_PAGEALLOC This results in a large slowdown, but helps to find certain types of memory corruptions. +config SPINLINE + bool "Spinlock inlining" + depends on DEBUG_KERNEL + help + This will change spinlocks from out of line to inline, making them + account cost to the callers in readprofile, rather than the lock + itself (as ".text.lock.filename"). This can be helpful for finding + the callers of locks. + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM @@ -1247,20 +1324,208 @@ config DEBUG_INFO Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. +config LOCKMETER + bool "Kernel lock metering" + depends on SMP && !PREEMPT + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + config DEBUG_SPINLOCK_SLEEP bool "Sleep-inside-spinlock checking" help If you say Y here, various routines which may sleep will become very noisy if they are called with a spinlock held. +config KGDB + bool "Include kgdb kernel debugger" + depends on DEBUG_KERNEL + help + If you say Y here, the system will be compiled with the debug + option (-g) and a debugging stub will be included in the + kernel. This stub communicates with gdb on another (host) + computer via a serial port. The host computer should have + access to the kernel binary file (vmlinux) and a serial port + that is connected to the target machine. Gdb can be made to + configure the serial port or you can use stty and setserial to + do this. See the 'target' command in gdb. This option also + configures in the ability to request a breakpoint early in the + boot process. To request the breakpoint just include 'kgdb' + as a boot option when booting the target machine. The system + will then break as soon as it looks at the boot options. This + option also installs a breakpoint in panic and sends any + kernel faults to the debugger. For more information see the + Documentation/i386/kgdb.txt file. + +choice + depends on KGDB + prompt "Debug serial port BAUD" + default KGDB_115200BAUD + help + Gdb and the kernel stub need to agree on the baud rate to be + used. Some systems (x86 family at this writing) allow this to + be configured. + +config KGDB_9600BAUD + bool "9600" + +config KGDB_19200BAUD + bool "19200" + +config KGDB_38400BAUD + bool "38400" + +config KGDB_57600BAUD + bool "57600" + +config KGDB_115200BAUD + bool "115200" +endchoice + +config KGDB_PORT + hex "hex I/O port address of the debug serial port" + depends on KGDB + default 3f8 + help + Some systems (x86 family at this writing) allow the port + address to be configured. The number entered is assumed to be + hex, don't put 0x in front of it. The standard address are: + COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx + will tell you what you have. It is good to test the serial + connection with a live system before trying to debug. + +config KGDB_IRQ + int "IRQ of the debug serial port" + depends on KGDB + default 4 + help + This is the irq for the debug port. If everything is working + correctly and the kernel has interrupts on a control C to the + port should cause a break into the kernel debug stub. + +config DEBUG_INFO + bool + depends on KGDB + default y + +config KGDB_MORE + bool "Add any additional compile options" + depends on KGDB + default n + help + Saying yes here turns on the ability to enter additional + compile options. + + +config KGDB_OPTIONS + depends on KGDB_MORE + string "Additional compile arguments" + default "-O1" + help + This option allows you enter additional compile options for + the whole kernel compile. Each platform will have a default + that seems right for it. For example on PPC "-ggdb -O1", and + for i386 "-O1". Note that by configuring KGDB "-g" is already + turned on. In addition, on i386 platforms + "-fomit-frame-pointer" is deleted from the standard compile + options. + +config NO_KGDB_CPUS + int "Number of CPUs" + depends on KGDB && SMP + default NR_CPUS + help + + This option sets the number of cpus for kgdb ONLY. It is used + to prune some internal structures so they look "nice" when + displayed with gdb. This is to overcome possibly larger + numbers that may have been entered above. Enter the real + number to get nice clean kgdb_info displays. + +config KGDB_TS + bool "Enable kgdb time stamp macros?" + depends on KGDB + default n + help + Kgdb event macros allow you to instrument your code with calls + to the kgdb event recording function. The event log may be + examined with gdb at a break point. Turning on this + capability also allows you to choose how many events to + keep. Kgdb always keeps the lastest events. + +choice + depends on KGDB_TS + prompt "Max number of time stamps to save?" + default KGDB_TS_128 + +config KGDB_TS_64 + bool "64" + +config KGDB_TS_128 + bool "128" + +config KGDB_TS_256 + bool "256" + +config KGDB_TS_512 + bool "512" + +config KGDB_TS_1024 + bool "1024" + +endchoice + +config STACK_OVERFLOW_TEST + bool "Turn on kernel stack overflow testing?" + depends on KGDB + default n + help + This option enables code in the front line interrupt handlers + to check for kernel stack overflow on interrupts and system + calls. This is part of the kgdb code on x86 systems. + +config KGDB_CONSOLE + bool "Enable serial console thru kgdb port" + depends on KGDB + default n + help + This option enables the command line "console=kgdb" option. + When the system is booted with this option in the command line + all kernel printk output is sent to gdb (as well as to other + consoles). For this to work gdb must be connected. For this + reason, this command line option will generate a breakpoint if + gdb has not yet connected. After the gdb continue command is + given all pent up console output will be printed by gdb on the + host machine. Neither this option, nor KGDB require the + serial driver to be configured. + +config KGDB_SYSRQ + bool "Turn on SysRq 'G' command to do a break?" + depends on KGDB + default y + help + This option includes an option in the SysRq code that allows + you to enter SysRq G which generates a breakpoint to the KGDB + stub. This will work if the keyboard is alive and can + interrupt the system. Because of constraints on when the + serial port interrupt can be enabled, this code may allow you + to interrupt the system before the serial port control C is + available. Just say yes here. + config FRAME_POINTER bool "Compile the kernel with frame pointers" + default KGDB help If you say Y here the resulting kernel image will be slightly larger and slower, but it will give very useful debugging information. If you don't debug the kernel, you can say N, but we may not be able to solve problems without frame pointers. +config MAGIC_SYSRQ + bool + depends on KGDB_SYSRQ + default y + config X86_EXTRA_IRQS bool depends on X86_LOCAL_APIC || X86_VOYAGER @@ -1303,3 +1568,8 @@ config X86_TRAMPOLINE bool depends on SMP || X86_VISWS default y + +config PC + bool + depends on X86 && !EMBEDDED + default y --- linux-2.6.0-test6/arch/i386/kernel/acpi/boot.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/i386/kernel/acpi/boot.c 2003-10-05 00:36:22.000000000 -0700 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -183,8 +184,7 @@ acpi_parse_lapic_nmi ( #endif /*CONFIG_X86_LOCAL_APIC*/ -#ifdef CONFIG_X86_IO_APIC - +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) static int __init acpi_parse_ioapic ( @@ -297,6 +297,10 @@ acpi_find_rsdp (void) { unsigned long rsdp_phys = 0; + if (efi.acpi20) + return __pa(efi.acpi20); + else if (efi.acpi) + return __pa(efi.acpi); /* * Scan memory looking for the RSDP signature. First search EBDA (low * memory) paragraphs and then search upper memory (E0000-FFFFF). @@ -368,7 +372,6 @@ acpi_boot_init (void) result = acpi_table_parse(ACPI_APIC, acpi_parse_madt); if (!result) { - printk(KERN_WARNING PREFIX "MADT not present\n"); return 0; } else if (result < 0) { @@ -416,7 +419,7 @@ acpi_boot_init (void) #endif /*CONFIG_X86_LOCAL_APIC*/ -#ifdef CONFIG_X86_IO_APIC +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) /* * I/O APIC @@ -472,7 +475,8 @@ acpi_boot_init (void) acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; acpi_ioapic = 1; -#endif /*CONFIG_X86_IO_APIC*/ + +#endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */ #ifdef CONFIG_X86_LOCAL_APIC if (acpi_lapic && acpi_ioapic) { @@ -480,6 +484,7 @@ acpi_boot_init (void) clustered_apic_check(); } #endif + #ifdef CONFIG_HPET_TIMER acpi_table_parse(ACPI_HPET, acpi_parse_hpet); #endif --- linux-2.6.0-test6/arch/i386/kernel/asm-offsets.c 2003-06-14 12:18:07.000000000 -0700 +++ 25/arch/i386/kernel/asm-offsets.c 2003-10-05 00:36:48.000000000 -0700 @@ -4,9 +4,11 @@ * to extract and format the required data. */ +#include #include #include #include "sigframe.h" +#include #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -28,4 +30,17 @@ void foo(void) DEFINE(RT_SIGFRAME_sigcontext, offsetof (struct rt_sigframe, uc.uc_mcontext)); + DEFINE(TI_task, offsetof (struct thread_info, task)); + DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain)); + DEFINE(TI_flags, offsetof (struct thread_info, flags)); + DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count)); + DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit)); + DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack)); + DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack)); + DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd)); + + DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr, __fix_to_virt(FIX_ENTRY_TRAMPOLINE_0)); + DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL)); + DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(task_thread_db7, offsetof (struct task_struct, thread.debugreg[7])); } --- linux-2.6.0-test6/arch/i386/kernel/cpu/common.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/cpu/common.c 2003-10-05 00:36:48.000000000 -0700 @@ -510,16 +510,20 @@ void __init cpu_init (void) BUG(); enter_lazy_tlb(&init_mm, current); - load_esp0(t, thread->esp0); - set_tss_desc(cpu,t); + t->esp0 = thread->esp0; + set_tss_desc(cpu, t); cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); - load_LDT(&init_mm.context); + if (cpu) + load_LDT(&init_mm.context); /* Set up doublefault TSS pointer in the GDT */ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff; + if (cpu) + trap_init_virtual_GDT(); + /* Clear %fs and %gs. */ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); --- linux-2.6.0-test6/arch/i386/kernel/cpu/cpufreq/acpi.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/cpu/cpufreq/acpi.c 2003-10-05 00:33:23.000000000 -0700 @@ -231,7 +231,7 @@ acpi_processor_set_performance ( int state) { u16 port = 0; - u8 value = 0; + u16 value = 0; int i = 0; struct cpufreq_freqs cpufreq_freqs; @@ -282,9 +282,9 @@ acpi_processor_set_performance ( value = (u16) perf->states[state].control; ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Writing 0x%02x to port 0x%04x\n", value, port)); + "Writing 0x%04x to port 0x%04x\n", value, port)); - outb(value, port); + outw(value, port); /* * Then we read the 'status_register' and compare the value with the @@ -296,12 +296,12 @@ acpi_processor_set_performance ( port = perf->status_register; ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Looking for 0x%02x from port 0x%04x\n", - (u8) perf->states[state].status, port)); + "Looking for 0x%04x from port 0x%04x\n", + (u16) perf->states[state].status, port)); for (i=0; i<100; i++) { - value = inb(port); - if (value == (u8) perf->states[state].status) + value = inw(port); + if (value == (u16) perf->states[state].status) break; udelay(10); } @@ -309,7 +309,7 @@ acpi_processor_set_performance ( /* notify cpufreq */ cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); - if (value != perf->states[state].status) { + if (value != (u16) perf->states[state].status) { unsigned int tmp = cpufreq_freqs.new; cpufreq_freqs.new = cpufreq_freqs.old; cpufreq_freqs.old = tmp; --- linux-2.6.0-test6/arch/i386/kernel/cpu/cpufreq/Kconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/cpu/cpufreq/Kconfig 2003-10-05 00:33:23.000000000 -0700 @@ -88,6 +88,16 @@ config X86_POWERNOW_K7 If in doubt, say N. +config X86_POWERNOW_K8 + tristate "AMD Opteron/Athlon64 PowerNow!" + depends on CPU_FREQ_TABLE + help + This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. + + For details, take a look at linux/Documentation/cpu-freq. + + If in doubt, say N. + config X86_GX_SUSPMOD tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" depends on CPU_FREQ --- linux-2.6.0-test6/arch/i386/kernel/cpu/cpufreq/longhaul.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/i386/kernel/cpu/cpufreq/longhaul.c 2003-10-05 00:33:23.000000000 -0700 @@ -70,21 +70,6 @@ static unsigned int calc_speed (int mult } -static unsigned int longhaul_get_cpu_fsb (void) -{ - unsigned long lo, hi; - unsigned int eblcr_fsb_table[] = { 66, 133, 100, -1 }; - unsigned int invalue=0; - - if (fsb == 0) { - rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); - invalue = (lo & (1<<18|1<<19)) >>18; - fsb = eblcr_fsb_table[invalue]; - } - return fsb; -} - - static int longhaul_get_cpu_mult (void) { unsigned long invalue=0,lo, hi; @@ -168,7 +153,7 @@ static void longhaul_setstate (unsigned break; /* - * Longhaul v3. (Ezra-T [C5M], Nehemiag [C5N]) + * Longhaul v3. (Ezra-T [C5M], Nehemiah [C5N]) * This can also do voltage scaling, but see above. * Ezra-T was alleged to do FSB scaling too, but it never worked in practice. */ @@ -193,6 +178,39 @@ static void longhaul_setstate (unsigned cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } +/* + * Centaur decided to make life a little more tricky. + * Only longhaul v1 is allowed to read EBLCR BSEL[0:1]. + * Samuel2 and above have to try and guess what the FSB is. + * We do this by assuming we booted at maximum multiplier, and interpolate + * between that value multiplied by possible FSBs and cpu_mhz which + * was calculated at boot time. Really ugly, but no other way to do this. + */ +static int _guess (int guess, int maxmult) +{ + int target; + + target = ((maxmult/10)*guess); + if (maxmult%10 != 0) + target += (guess/2); + target &= ~0xf; + return target; +} + +static int guess_fsb(int maxmult) +{ + int speed = (cpu_khz/1000) & ~0xf; + int i; + int speeds[3] = { 66, 100, 133 }; + + for (i=0; i<3; i++) { + if (_guess(speeds[i],maxmult) == speed) + return speeds[i]; + } + return 0; +} + + static int __init longhaul_get_ranges (void) { @@ -203,8 +221,8 @@ static int __init longhaul_get_ranges (v -1,110,120,-1,135,115,125,105,130,150,160,140,-1,155,-1,145 }; unsigned int j, k = 0; union msr_longhaul longhaul; - - fsb = longhaul_get_cpu_fsb(); + unsigned long lo, hi; + unsigned int eblcr_fsb_table[] = { 66, 133, 100, -1 }; switch (longhaul_version) { case 1: @@ -212,6 +230,9 @@ static int __init longhaul_get_ranges (v Assume min=3.0x & max = whatever we booted at. */ minmult = 30; maxmult = longhaul_get_cpu_mult(); + rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); + invalue = (lo & (1<<18|1<<19)) >>18; + fsb = eblcr_fsb_table[invalue]; break; case 2 ... 3: @@ -222,14 +243,13 @@ static int __init longhaul_get_ranges (v invalue += 16; maxmult=multipliers[invalue]; -#if 0 invalue = longhaul.bits.MinMHzBR; - if (longhaul.bits.MinMHzBR4); - invalue += 16; - minmult = multipliers[invalue]; -#else - minmult = 30; /* as per spec */ -#endif + if (longhaul.bits.MinMHzBR4 == 1) + minmult = 30; + else + minmult = multipliers[invalue]; + + fsb = guess_fsb(maxmult); break; } --- linux-2.6.0-test6/arch/i386/kernel/cpu/cpufreq/Makefile 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/cpu/cpufreq/Makefile 2003-10-05 00:33:23.000000000 -0700 @@ -1,5 +1,6 @@ obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o +obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_LONGHAUL) += longhaul.o obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/cpu/cpufreq/powernow-k8.c 2003-10-05 00:33:23.000000000 -0700 @@ -0,0 +1,1020 @@ +/* + * (c) 2003 Advanced Micro Devices, Inc. + * Your use of this code is subject to the terms and conditions of the + * GNU general public license version 2. See "../../../COPYING" or + * http://www.gnu.org/licenses/gpl.html + * + * Support : paul.devriendt@amd.com + * + * Based on the powernow-k7.c module written by Dave Jones. + * (C) 2003 Dave Jones on behalf of SuSE Labs + * Licensed under the terms of the GNU GPL License version 2. + * Based upon datasheets & sample CPUs kindly provided by AMD. + * + * Processor information obtained from Chapter 9 (Power and Thermal Management) + * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD + * Opteron Processors", revision 3.03, available for download from www.amd.com + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define PFX "powernow-k8: " +#define BFX PFX "BIOS error: " +#define VERSION "version 1.00.08 - September 26, 2003" +#include "powernow-k8.h" + +#ifdef CONFIG_PREEMPT +#warning this driver has not been tested on a preempt system +#endif + +static u32 vstable; /* voltage stabalization time, from PSB, units 20 us */ +static u32 plllock; /* pll lock time, from PSB, units 1 us */ +static u32 numps; /* number of p-states, from PSB */ +static u32 rvo; /* ramp voltage offset, from PSB */ +static u32 irt; /* isochronous relief time, from PSB */ +static u32 vidmvs; /* usable value calculated from mvs, from PSB */ +struct pst_s *ppst; /* array of p states, valid for this part */ +static u32 currvid; /* keep track of the current fid / vid */ +static u32 currfid; + +/* +The PSB table supplied by BIOS allows for the definition of the number of +p-states that can be used when running on a/c, and the number of p-states +that can be used when running on battery. This allows laptop manufacturers +to force the system to save power when running from battery. The relationship +is : + 1 <= number_of_battery_p_states <= maximum_number_of_p_states + +This driver does NOT have the support in it to detect transitions from +a/c power to battery power, and thus trigger the transition to a lower +p-state if required. This is because I need ACPI and the 2.6 kernel to do +this, and this is a 2.4 kernel driver. Check back for a new improved driver +for the 2.6 kernel soon. + +This code therefore assumes it is on battery at all times, and thus +restricts performance to number_of_battery_p_states. For desktops, + number_of_battery_p_states == maximum_number_of_pstates, +so this is not actually a restriction. +*/ + +static u32 batps; /* limit on the number of p states when on battery */ + /* - set by BIOS in the PSB/PST */ + +static struct cpufreq_driver cpufreq_amd64_driver = { + .verify = drv_verify, + .target = drv_target, + .init = drv_cpu_init, + .name = "cpufreq-amd64", + .owner = THIS_MODULE, +}; + +#define SEARCH_UP 1 +#define SEARCH_DOWN 0 + +/* Return a frequency in MHz, given an input fid */ +u32 +find_freq_from_fid(u32 fid) +{ + return 800 + (fid * 100); +} + +/* Return a fid matching an input frequency in MHz */ +u32 +find_fid_from_freq(u32 freq) +{ + return (freq - 800) / 100; +} + +/* Return the vco fid for an input fid */ +static u32 +convert_fid_to_vco_fid(u32 fid) +{ + if (fid < HI_FID_TABLE_BOTTOM) { + return 8 + (2 * fid); + } else { + return fid; + } +} + +/* Sort the fid/vid frequency table into ascending order by fid. The spec */ +/* implies that it will be sorted by BIOS, but, it only implies it, and I */ +/* prefer not to trust when I can check. */ +/* Yes, it is a simple bubble sort, but the PST is really small, so the */ +/* choice of algorithm is pretty irrelevant. */ +static inline void +sort_pst(struct pst_s *ppst, u32 numpstates) +{ + u32 i; + u8 tempfid; + u8 tempvid; + int swaps = 1; + + while (swaps) { + swaps = 0; + for (i = 0; i < (numpstates - 1); i++) { + if (ppst[i].fid > ppst[i + 1].fid) { + swaps = 1; + tempfid = ppst[i].fid; + tempvid = ppst[i].vid; + ppst[i].fid = ppst[i + 1].fid; + ppst[i].vid = ppst[i + 1].vid; + ppst[i + 1].fid = tempfid; + ppst[i + 1].vid = tempvid; + } + } + } + + return; +} + +/* Return 1 if the pending bit is set. Unless we are actually just told the */ +/* processor to transition a state, seeing this bit set is really bad news. */ +static inline int +pending_bit_stuck(void) +{ + u32 lo; + u32 hi; + + rdmsr(MSR_FIDVID_STATUS, lo, hi); + return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; +} + +/* Update the global current fid / vid values from the status msr. Returns 1 */ +/* on error. */ +static int +query_current_values_with_pending_wait(void) +{ + u32 lo; + u32 hi; + u32 i = 0; + + lo = MSR_S_LO_CHANGE_PENDING; + while (lo & MSR_S_LO_CHANGE_PENDING) { + if (i++ > 0x1000000) { + printk(KERN_ERR PFX "detected change pending stuck\n"); + return 1; + } + rdmsr(MSR_FIDVID_STATUS, lo, hi); + } + + currvid = hi & MSR_S_HI_CURRENT_VID; + currfid = lo & MSR_S_LO_CURRENT_FID; + + return 0; +} + +/* the isochronous relief time */ +static inline void +count_off_irt(void) +{ + udelay((1 << irt) * 10); + return; +} + +/* the voltage stabalization time */ +static inline void +count_off_vst(void) +{ + udelay(vstable * VST_UNITS_20US); + return; +} + +/* write the new fid value along with the other control fields to the msr */ +static int +write_new_fid(u32 fid) +{ + u32 lo; + u32 savevid = currvid; + + if ((fid & INVALID_FID_MASK) || (currvid & INVALID_VID_MASK)) { + printk(KERN_ERR PFX "internal error - overflow on fid write\n"); + return 1; + } + + lo = fid | (currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; + + dprintk(KERN_DEBUG PFX "writing fid %x, lo %x, hi %x\n", + fid, lo, plllock * PLL_LOCK_CONVERSION); + + wrmsr(MSR_FIDVID_CTL, lo, plllock * PLL_LOCK_CONVERSION); + + if (query_current_values_with_pending_wait()) + return 1; + + count_off_irt(); + + if (savevid != currvid) { + printk(KERN_ERR PFX + "vid changed on fid transition, save %x, currvid %x\n", + savevid, currvid); + return 1; + } + + if (fid != currfid) { + printk(KERN_ERR PFX + "fid transition failed, fid %x, currfid %x\n", + fid, currfid); + return 1; + } + + return 0; +} + +/* Write a new vid to the hardware */ +static int +write_new_vid(u32 vid) +{ + u32 lo; + u32 savefid = currfid; + + if ((currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { + printk(KERN_ERR PFX "internal error - overflow on vid write\n"); + return 1; + } + + lo = currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; + + dprintk(KERN_DEBUG PFX "writing vid %x, lo %x, hi %x\n", + vid, lo, STOP_GRANT_5NS); + + wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); + + if (query_current_values_with_pending_wait()) { + return 1; + } + + if (savefid != currfid) { + printk(KERN_ERR PFX + "fid changed on vid transition, save %x currfid %x\n", + savefid, currfid); + return 1; + } + + if (vid != currvid) { + printk(KERN_ERR PFX + "vid transition failed, vid %x, currvid %x\n", + vid, currvid); + return 1; + } + + return 0; +} + +/* Reduce the vid by the max of step or reqvid. */ +/* Decreasing vid codes represent increasing voltages : */ +/* vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of 0x1f is off. */ +static int +decrease_vid_code_by_step(u32 reqvid, u32 step) +{ + if ((currvid - reqvid) > step) + reqvid = currvid - step; + + if (write_new_vid(reqvid)) + return 1; + + count_off_vst(); + + return 0; +} + +/* Change the fid and vid, by the 3 phases. */ +static inline int +transition_fid_vid(u32 reqfid, u32 reqvid) +{ + if (core_voltage_pre_transition(reqvid)) + return 1; + + if (core_frequency_transition(reqfid)) + return 1; + + if (core_voltage_post_transition(reqvid)) + return 1; + + if (query_current_values_with_pending_wait()) + return 1; + + if ((reqfid != currfid) || (reqvid != currvid)) { + printk(KERN_ERR PFX "failed: req 0x%x 0x%x, curr 0x%x 0x%x\n", + reqfid, reqvid, currfid, currvid); + return 1; + } + + dprintk(KERN_INFO PFX + "transitioned: new fid 0x%x, vid 0x%x\n", currfid, currvid); + + return 0; +} + +/* Phase 1 - core voltage transition ... setup appropriate voltage for the */ +/* fid transition. */ +static inline int +core_voltage_pre_transition(u32 reqvid) +{ + u32 rvosteps = rvo; + u32 savefid = currfid; + + dprintk(KERN_DEBUG PFX + "ph1: start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo %x\n", + currfid, currvid, reqvid, rvo); + + while (currvid > reqvid) { + dprintk(KERN_DEBUG PFX "ph1: curr 0x%x, requesting vid 0x%x\n", + currvid, reqvid); + if (decrease_vid_code_by_step(reqvid, vidmvs)) + return 1; + } + + while (rvosteps > 0) { + if (currvid == 0) { + rvosteps = 0; + } else { + dprintk(KERN_DEBUG PFX + "ph1: changing vid for rvo, requesting 0x%x\n", + currvid - 1); + if (decrease_vid_code_by_step(currvid - 1, 1)) + return 1; + rvosteps--; + } + } + + if (query_current_values_with_pending_wait()) + return 1; + + if (savefid != currfid) { + printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", currfid); + return 1; + } + + dprintk(KERN_DEBUG PFX "ph1 complete, currfid 0x%x, currvid 0x%x\n", + currfid, currvid); + + return 0; +} + +/* Phase 2 - core frequency transition */ +static inline int +core_frequency_transition(u32 reqfid) +{ + u32 vcoreqfid; + u32 vcocurrfid; + u32 vcofiddiff; + u32 savevid = currvid; + + if ((reqfid < HI_FID_TABLE_BOTTOM) && (currfid < HI_FID_TABLE_BOTTOM)) { + printk(KERN_ERR PFX "ph2 illegal lo-lo transition 0x%x 0x%x\n", + reqfid, currfid); + return 1; + } + + if (currfid == reqfid) { + printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", currfid); + return 0; + } + + dprintk(KERN_DEBUG PFX + "ph2 starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n", + currfid, currvid, reqfid); + + vcoreqfid = convert_fid_to_vco_fid(reqfid); + vcocurrfid = convert_fid_to_vco_fid(currfid); + vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid + : vcoreqfid - vcocurrfid; + + while (vcofiddiff > 2) { + if (reqfid > currfid) { + if (currfid > LO_FID_TABLE_TOP) { + if (write_new_fid(currfid + 2)) { + return 1; + } + } else { + if (write_new_fid + (2 + convert_fid_to_vco_fid(currfid))) { + return 1; + } + } + } else { + if (write_new_fid(currfid - 2)) + return 1; + } + + vcocurrfid = convert_fid_to_vco_fid(currfid); + vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid + : vcoreqfid - vcocurrfid; + } + + if (write_new_fid(reqfid)) + return 1; + + if (query_current_values_with_pending_wait()) + return 1; + + if (currfid != reqfid) { + printk(KERN_ERR PFX + "ph2 mismatch, failed fid transition, curr %x, req %x\n", + currfid, reqfid); + return 1; + } + + if (savevid != currvid) { + printk(KERN_ERR PFX + "ph2 vid changed, save %x, curr %x\n", savevid, + currvid); + return 1; + } + + dprintk(KERN_DEBUG PFX "ph2 complete, currfid 0x%x, currvid 0x%x\n", + currfid, currvid); + + return 0; +} + +/* Phase 3 - core voltage transition flow ... jump to the final vid. */ +static inline int +core_voltage_post_transition(u32 reqvid) +{ + u32 savefid = currfid; + u32 savereqvid = reqvid; + + dprintk(KERN_DEBUG PFX "ph3 starting, currfid 0x%x, currvid 0x%x\n", + currfid, currvid); + + if (reqvid != currvid) { + if (write_new_vid(reqvid)) + return 1; + + if (savefid != currfid) { + printk(KERN_ERR PFX + "ph3: bad fid change, save %x, curr %x\n", + savefid, currfid); + return 1; + } + + if (currvid != reqvid) { + printk(KERN_ERR PFX + "ph3: failed vid transition\n, req %x, curr %x", + reqvid, currvid); + return 1; + } + } + + if (query_current_values_with_pending_wait()) + return 1; + + if (savereqvid != currvid) { + dprintk(KERN_ERR PFX "ph3 failed, currvid 0x%x\n", currvid); + return 1; + } + + if (savefid != currfid) { + dprintk(KERN_ERR PFX "ph3 failed, currfid changed 0x%x\n", + currfid); + return 1; + } + + dprintk(KERN_DEBUG PFX "ph3 complete, currfid 0x%x, currvid 0x%x\n", + currfid, currvid); + + return 0; +} + +static inline int +check_supported_cpu(void) +{ + struct cpuinfo_x86 *c = cpu_data; + u32 eax, ebx, ecx, edx; + + if (num_online_cpus() != 1) { + printk(KERN_INFO PFX "multiprocessor systems not supported\n"); + return 0; + } + + if (c->x86_vendor != X86_VENDOR_AMD) { + printk(KERN_INFO PFX "Not an AMD processor\n"); + return 0; + } + + eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + if ((eax & CPUID_XFAM_MOD) == ATHLON64_XFAM_MOD) { + dprintk(KERN_DEBUG PFX "AMD Althon 64 Processor found\n"); + if ((eax & CPUID_F1_STEP) < ATHLON64_REV_C0) { + printk(KERN_INFO PFX "Revision C0 or better " + "AMD Athlon 64 processor required\n"); + return 0; + } + } else if ((eax & CPUID_XFAM_MOD) == OPTERON_XFAM_MOD) { + dprintk(KERN_DEBUG PFX "AMD Opteron Processor found\n"); + } else { + printk(KERN_INFO PFX + "AMD Athlon 64 or AMD Opteron processor required\n"); + return 0; + } + + eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); + if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { + printk(KERN_INFO PFX + "No frequency change capabilities detected\n"); + return 0; + } + + cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); + if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { + printk(KERN_INFO PFX "Power state transitions not supported\n"); + return 0; + } + + printk(KERN_INFO PFX "Found AMD Athlon 64 / Opteron processor " + "supporting p-state transitions\n"); + + return 1; +} + +/* Find and validate the PSB/PST table in BIOS. */ +static inline int +find_psb_table(void) +{ + struct psb_s *psb; + struct pst_s *pst; + unsigned i, j; + u32 lastfid; + u32 mvs; + u8 maxvid; + + for (i = 0xc0000; i < 0xffff0; i += 0x10) { + /* Scan BIOS looking for the signature. */ + /* It can not be at ffff0 - it is too big. */ + + psb = phys_to_virt(i); + if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) + continue; + + dprintk(KERN_DEBUG PFX "found PSB header at 0x%p\n", psb); + + dprintk(KERN_DEBUG PFX "table vers: 0x%x\n", psb->tableversion); + if (psb->tableversion != PSB_VERSION_1_4) { + printk(KERN_INFO BFX "PSB table is not v1.4\n"); + return -ENODEV; + } + + dprintk(KERN_DEBUG PFX "flags: 0x%x\n", psb->flags1); + if (psb->flags1) { + printk(KERN_ERR BFX "unknown flags\n"); + return -ENODEV; + } + + vstable = psb->voltagestabilizationtime; + printk(KERN_INFO PFX "voltage stable time: %d (units 20us)\n", + vstable); + + dprintk(KERN_DEBUG PFX "flags2: 0x%x\n", psb->flags2); + rvo = psb->flags2 & 3; + irt = ((psb->flags2) >> 2) & 3; + mvs = ((psb->flags2) >> 4) & 3; + vidmvs = 1 << mvs; + batps = ((psb->flags2) >> 6) & 3; + printk(KERN_INFO PFX "p states on battery: %d ", batps); + switch (batps) { + case 0: + printk("- all available\n"); + break; + case 1: + printk("- only the minimum\n"); + break; + case 2: + printk("- only the 2 lowest\n"); + break; + case 3: + printk("- only the 3 lowest\n"); + break; + } + printk(KERN_INFO PFX "ramp voltage offset: %d\n", rvo); + printk(KERN_INFO PFX "isochronous relief time: %d\n", irt); + printk(KERN_INFO PFX "maximum voltage step: %d\n", mvs); + + dprintk(KERN_DEBUG PFX "numpst: 0x%x\n", psb->numpst); + if (psb->numpst != 1) { + printk(KERN_ERR BFX "numpst must be 1\n"); + return -ENODEV; + } + + dprintk(KERN_DEBUG PFX "cpuid: 0x%x\n", psb->cpuid); + + plllock = psb->plllocktime; + printk(KERN_INFO PFX "pll lock time: 0x%x\n", plllock); + + maxvid = psb->maxvid; + printk(KERN_INFO PFX "maxfid: 0x%x\n", psb->maxfid); + printk(KERN_INFO PFX "maxvid: 0x%x\n", maxvid); + + numps = psb->numpstates; + printk(KERN_INFO PFX "numpstates: 0x%x\n", numps); + if (numps < 2) { + printk(KERN_ERR BFX "no p states to transition\n"); + return -ENODEV; + } + + if (batps == 0) { + batps = numps; + } else if (batps > numps) { + printk(KERN_ERR BFX "batterypstates > numpstates\n"); + batps = numps; + } else { + printk(KERN_ERR PFX + "Restricting operation to %d p-states\n", batps); + printk(KERN_ERR PFX + "Check for an updated driver to access all " + "%d p-states\n", numps); + } + + if ((numps <= 1) || (batps <= 1)) { + printk(KERN_ERR PFX "only 1 p-state to transition\n"); + return -ENODEV; + } + + ppst = kmalloc(sizeof (struct pst_s) * numps, GFP_KERNEL); + if (!ppst) { + printk(KERN_ERR PFX "ppst memory alloc failure\n"); + return -ENOMEM; + } + + pst = (struct pst_s *) (psb + 1); + for (j = 0; j < numps; j++) { + ppst[j].fid = pst[j].fid; + ppst[j].vid = pst[j].vid; + printk(KERN_INFO PFX + " %d : fid 0x%x, vid 0x%x\n", j, + ppst[j].fid, ppst[j].vid); + } + sort_pst(ppst, numps); + + lastfid = ppst[0].fid; + if (lastfid > LO_FID_TABLE_TOP) + printk(KERN_INFO BFX "first fid not in lo freq tbl\n"); + + if ((lastfid > MAX_FID) || (lastfid & 1) || (ppst[0].vid > LEAST_VID)) { + printk(KERN_ERR BFX "first fid/vid bad (0x%x - 0x%x)\n", + lastfid, ppst[0].vid); + kfree(ppst); + return -ENODEV; + } + + for (j = 1; j < numps; j++) { + if ((lastfid >= ppst[j].fid) + || (ppst[j].fid & 1) + || (ppst[j].fid < HI_FID_TABLE_BOTTOM) + || (ppst[j].fid > MAX_FID) + || (ppst[j].vid > LEAST_VID)) { + printk(KERN_ERR BFX + "invalid fid/vid in pst(%x %x)\n", + ppst[j].fid, ppst[j].vid); + kfree(ppst); + return -ENODEV; + } + lastfid = ppst[j].fid; + } + + for (j = 0; j < numps; j++) { + if (ppst[j].vid < rvo) { /* vid+rvo >= 0 */ + printk(KERN_ERR BFX + "0 vid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (ppst[j].vid < maxvid+rvo) { /* vid+rvo >= maxvid */ + printk(KERN_ERR BFX + "maxvid exceeded with pstate %d\n", j); + return -ENODEV; + } + } + + if (query_current_values_with_pending_wait()) { + kfree(ppst); + return -EIO; + } + + printk(KERN_INFO PFX "currfid 0x%x, currvid 0x%x\n", + currfid, currvid); + + for (j = 0; j < numps; j++) + if ((ppst[j].fid==currfid) && (ppst[j].vid==currvid)) + return (0); + + printk(KERN_ERR BFX "currfid/vid do not match PST, ignoring\n"); + return 0; + } + + printk(KERN_ERR BFX "no PSB\n"); + return -ENODEV; +} + +/* Converts a frequency (that might not necessarily be a multiple of 200) */ +/* to a fid. */ +u32 +find_closest_fid(u32 freq, int searchup) +{ + if (searchup == SEARCH_UP) + freq += MIN_FREQ_RESOLUTION - 1; + + freq = (freq / MIN_FREQ_RESOLUTION) * MIN_FREQ_RESOLUTION; + + if (freq < MIN_FREQ) + freq = MIN_FREQ; + else if (freq > MAX_FREQ) + freq = MAX_FREQ; + + return find_fid_from_freq(freq); +} + +static int +find_match(u32 * ptargfreq, u32 * pmin, u32 * pmax, int searchup, u32 * pfid, + u32 * pvid) +{ + u32 availpstates = batps; + u32 targfid = find_closest_fid(*ptargfreq, searchup); + u32 minfid = find_closest_fid(*pmin, SEARCH_DOWN); + u32 maxfid = find_closest_fid(*pmax, SEARCH_UP); + u32 minidx = 0; + u32 maxidx = availpstates - 1; + u32 targidx = 0xffffffff; + int i; + + dprintk(KERN_DEBUG PFX "find match: freq %d MHz, min %d, max %d\n", + *ptargfreq, *pmin, *pmax); + + /* Restrict values to the frequency choices in the PST */ + if (minfid < ppst[0].fid) + minfid = ppst[0].fid; + if (maxfid > ppst[maxidx].fid) + maxfid = ppst[maxidx].fid; + + /* Find appropriate PST index for the minimim fid */ + for (i = 0; i < (int) availpstates; i++) { + if (minfid >= ppst[i].fid) + minidx = i; + } + + /* Find appropriate PST index for the maximum fid */ + for (i = availpstates - 1; i >= 0; i--) { + if (maxfid <= ppst[i].fid) + maxidx = i; + } + + if (minidx > maxidx) + maxidx = minidx; + + /* Frequency ids are now constrained by limits matching PST entries */ + minfid = ppst[minidx].fid; + maxfid = ppst[maxidx].fid; + + /* Limit the target frequency to these limits */ + if (targfid < minfid) + targfid = minfid; + else if (targfid > maxfid) + targfid = maxfid; + + /* Find the best target index into the PST, contrained by the range */ + if (searchup == SEARCH_UP) { + for (i = maxidx; i >= (int) minidx; i--) { + if (targfid <= ppst[i].fid) + targidx = i; + } + } else { + for (i = minidx; i <= (int) maxidx; i++) { + if (targfid >= ppst[i].fid) + targidx = i; + } + } + + if (targidx == 0xffffffff) { + printk(KERN_ERR PFX "could not find target\n"); + return 1; + } + + *pmin = find_freq_from_fid(minfid); + *pmax = find_freq_from_fid(maxfid); + *ptargfreq = find_freq_from_fid(ppst[targidx].fid); + + if (pfid) + *pfid = ppst[targidx].fid; + if (pvid) + *pvid = ppst[targidx].vid; + + return 0; +} + +/* Take a frequency, and issue the fid/vid transition command */ +static inline int +transition_frequency(u32 * preq, u32 * pmin, u32 * pmax, u32 searchup) +{ + u32 fid; + u32 vid; + int res; + struct cpufreq_freqs freqs; + + if (find_match(preq, pmin, pmax, searchup, &fid, &vid)) + return 1; + + dprintk(KERN_DEBUG PFX "table matched fid 0x%x, giving vid 0x%x\n", + fid, vid); + + if (query_current_values_with_pending_wait()) + return 1; + + if ((currvid == vid) && (currfid == fid)) { + dprintk(KERN_DEBUG PFX + "target matches current values (fid 0x%x, vid 0x%x)\n", + fid, vid); + return 0; + } + + if ((fid < HI_FID_TABLE_BOTTOM) && (currfid < HI_FID_TABLE_BOTTOM)) { + printk(KERN_ERR PFX + "ignoring illegal change in lo freq table-%x to %x\n", + currfid, fid); + return 1; + } + + dprintk(KERN_DEBUG PFX "changing to fid 0x%x, vid 0x%x\n", fid, vid); + + freqs.cpu = 0; /* only true because SMP not supported */ + + freqs.old = find_freq_from_fid(currfid); + freqs.new = find_freq_from_fid(fid); + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + res = transition_fid_vid(fid, vid); + + freqs.new = find_freq_from_fid(currfid); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + return res; +} + +/* Driver entry point to switch to the target frequency */ +static int +drv_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) +{ + u32 checkfid = currfid; + u32 checkvid = currvid; + u32 reqfreq = targfreq / 1000; + u32 minfreq = pol->min / 1000; + u32 maxfreq = pol->max / 1000; + + if (ppst == 0) { + printk(KERN_ERR PFX "targ: ppst 0\n"); + return -ENODEV; + } + + if (pending_bit_stuck()) { + printk(KERN_ERR PFX "drv targ fail: change pending bit set\n"); + return -EIO; + } + + dprintk(KERN_DEBUG PFX "targ: %d kHz, min %d, max %d, relation %d\n", + targfreq, pol->min, pol->max, relation); + + if (query_current_values_with_pending_wait()) + return -EIO; + + dprintk(KERN_DEBUG PFX "targ: curr fid 0x%x, vid 0x%x\n", + currfid, currvid); + + if ((checkvid != currvid) || (checkfid != currfid)) { + printk(KERN_ERR PFX + "error - out of sync, fid 0x%x 0x%x, vid 0x%x 0x%x\n", + checkfid, currfid, checkvid, currvid); + } + + if (transition_frequency(&reqfreq, &minfreq, &maxfreq, + relation == + CPUFREQ_RELATION_H ? SEARCH_UP : SEARCH_DOWN)) + { + printk(KERN_ERR PFX "transition frequency failed\n"); + return 1; + } + + pol->cur = 1000 * find_freq_from_fid(currfid); + + return 0; +} + +/* Driver entry point to verify the policy and range of frequencies */ +static int +drv_verify(struct cpufreq_policy *pol) +{ + u32 min = pol->min / 1000; + u32 max = pol->max / 1000; + u32 targ = min; + int res; + + if (ppst == 0) { + printk(KERN_ERR PFX "verify - ppst 0\n"); + return -ENODEV; + } + + if (pending_bit_stuck()) { + printk(KERN_ERR PFX "failing verify, change pending bit set\n"); + return -EIO; + } + + dprintk(KERN_DEBUG PFX + "ver: cpu%d, min %d, max %d, cur %d, pol %d\n", pol->cpu, + pol->min, pol->max, pol->cur, pol->policy); + + if (pol->cpu != 0) { + printk(KERN_ERR PFX "verify - cpu not 0\n"); + return -ENODEV; + } + + res = find_match(&targ, &min, &max, + pol->policy == CPUFREQ_POLICY_POWERSAVE ? + SEARCH_DOWN : SEARCH_UP, 0, 0); + if (!res) { + pol->min = min * 1000; + pol->max = max * 1000; + } + return res; +} + +/* per CPU init entry point to the driver */ +static int __init +drv_cpu_init(struct cpufreq_policy *pol) +{ + if (pol->cpu != 0) { + printk(KERN_ERR PFX "init not cpu 0\n"); + return -ENODEV; + } + + pol->policy = CPUFREQ_POLICY_PERFORMANCE; /* boot as fast as we can */ + + /* Take a crude guess here. */ + pol->cpuinfo.transition_latency = ((rvo + 8) * vstable * VST_UNITS_20US) + + (3 * (1 << irt) * 10); + + if (query_current_values_with_pending_wait()) + return -EIO; + + pol->cur = 1000 * find_freq_from_fid(currfid); + dprintk(KERN_DEBUG PFX "policy current frequency %d kHz\n", pol->cur); + + /* min/max the cpu is capable of */ + pol->cpuinfo.min_freq = 1000 * find_freq_from_fid(ppst[0].fid); + pol->cpuinfo.max_freq = 1000 * find_freq_from_fid(ppst[numps-1].fid); + pol->min = 1000 * find_freq_from_fid(ppst[0].fid); + pol->max = 1000 * find_freq_from_fid(ppst[batps - 1].fid); + + printk(KERN_INFO PFX "cpu_init done, current fid 0x%x, vid 0x%x\n", + currfid, currvid); + + return 0; +} + +/* driver entry point for init */ +static int __init +drv_init(void) +{ + int rc; + + printk(KERN_INFO PFX VERSION "\n"); + + if (check_supported_cpu() == 0) + return -ENODEV; + + rc = find_psb_table(); + if (rc) + return rc; + + if (pending_bit_stuck()) { + printk(KERN_ERR PFX "drv_init fail, change pending bit set\n"); + kfree(ppst); + return -EIO; + } + + return cpufreq_register_driver(&cpufreq_amd64_driver); +} + +/* driver entry point for term */ +static void __exit +drv_exit(void) +{ + dprintk(KERN_INFO PFX "drv_exit\n"); + + cpufreq_unregister_driver(&cpufreq_amd64_driver); + kfree(ppst); +} + +MODULE_AUTHOR("Paul Devriendt "); +MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); +MODULE_LICENSE("GPL"); + +module_init(drv_init); +module_exit(drv_exit); --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/cpu/cpufreq/powernow-k8.h 2003-10-05 00:33:23.000000000 -0700 @@ -0,0 +1,126 @@ +/* + * (c) 2003 Advanced Micro Devices, Inc. + * Your use of this code is subject to the terms and conditions of the + * GNU general public license version 2. See "../../../COPYING" or + * http://www.gnu.org/licenses/gpl.html + */ + +/* processor's cpuid instruction support */ +#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ +#define CPUID_F1_FAM 0x00000f00 /* family mask */ +#define CPUID_F1_XFAM 0x0ff00000 /* extended family mask */ +#define CPUID_F1_MOD 0x000000f0 /* model mask */ +#define CPUID_F1_STEP 0x0000000f /* stepping level mask */ +#define CPUID_XFAM_MOD 0x0ff00ff0 /* xtended fam, fam + model */ +#define ATHLON64_XFAM_MOD 0x00000f40 /* xtended fam, fam + model */ +#define OPTERON_XFAM_MOD 0x00000f50 /* xtended fam, fam + model */ +#define ATHLON64_REV_C0 8 +#define CPUID_GET_MAX_CAPABILITIES 0x80000000 +#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 +#define P_STATE_TRANSITION_CAPABLE 6 + +/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */ +/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */ +/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */ +/* the register number is placed in ecx, and the data is returned in edx:eax. */ + +#define MSR_FIDVID_CTL 0xc0010041 +#define MSR_FIDVID_STATUS 0xc0010042 + +/* Field definitions within the FID VID Low Control MSR : */ +#define MSR_C_LO_INIT_FID_VID 0x00010000 +#define MSR_C_LO_NEW_VID 0x00001f00 +#define MSR_C_LO_NEW_FID 0x0000002f +#define MSR_C_LO_VID_SHIFT 8 + +/* Field definitions within the FID VID High Control MSR : */ +#define MSR_C_HI_STP_GNT_TO 0x000fffff + +/* Field definitions within the FID VID Low Status MSR : */ +#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ +#define MSR_S_LO_MAX_RAMP_VID 0x1f000000 +#define MSR_S_LO_MAX_FID 0x003f0000 +#define MSR_S_LO_START_FID 0x00003f00 +#define MSR_S_LO_CURRENT_FID 0x0000003f + +/* Field definitions within the FID VID High Status MSR : */ +#define MSR_S_HI_MAX_WORKING_VID 0x001f0000 +#define MSR_S_HI_START_VID 0x00001f00 +#define MSR_S_HI_CURRENT_VID 0x0000001f + +/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ +#define LO_FID_TABLE_TOP 6 +#define HI_FID_TABLE_BOTTOM 8 + +#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ +#define HI_VCOFREQ_TABLE_BOTTOM 1600 + +#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ + +#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ + +#define LEAST_VID 0x1e /* Lowest (numerically highest) useful vid value */ + +#define MIN_FREQ 800 /* Min and max freqs, per spec */ +#define MAX_FREQ 5000 + +#define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */ + +#define INVALID_VID_MASK 0xffffffe0 /* not a valid vid if these bits are set */ + +#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ + +#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ + +#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ + +#define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */ + +/* +Version 1.4 of the PSB table. This table is constructed by BIOS and is +to tell the OS's power management driver which VIDs and FIDs are +supported by this particular processor. This information is obtained from +the data sheets for each processor model by the system vendor and +incorporated into the BIOS. +If the data in the PSB / PST is wrong, then this driver will program the +wrong values into hardware, which is very likely to lead to a crash. +*/ + +#define PSB_ID_STRING "AMDK7PNOW!" +#define PSB_ID_STRING_LEN 10 + +#define PSB_VERSION_1_4 0x14 + +struct psb_s { + u8 signature[10]; + u8 tableversion; + u8 flags1; + u16 voltagestabilizationtime; + u8 flags2; + u8 numpst; + u32 cpuid; + u8 plllocktime; + u8 maxfid; + u8 maxvid; + u8 numpstates; +}; + +/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */ +struct pst_s { + u8 fid; + u8 vid; +}; + +#ifdef DEBUG +#define dprintk(msg...) printk(msg) +#else +#define dprintk(msg...) do { } while(0) +#endif + +static inline int core_voltage_pre_transition(u32 reqvid); +static inline int core_voltage_post_transition(u32 reqvid); +static inline int core_frequency_transition(u32 reqfid); +static int drv_verify(struct cpufreq_policy *pol); +static int drv_target(struct cpufreq_policy *pol, unsigned targfreq, + unsigned relation); +static int __init drv_cpu_init(struct cpufreq_policy *pol); --- linux-2.6.0-test6/arch/i386/kernel/cpu/intel.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/cpu/intel.c 2003-10-05 00:36:48.000000000 -0700 @@ -8,11 +8,10 @@ #include #include #include +#include #include "cpu.h" -extern int trap_init_f00f_bug(void); - #ifdef CONFIG_X86_INTEL_USERCOPY /* * Alignment at which movsl is preferred for bulk memory copies. @@ -157,7 +156,7 @@ static void __init init_intel(struct cpu c->f00f_bug = 1; if ( !f00f_workaround_enabled ) { - trap_init_f00f_bug(); + trap_init_virtual_IDT(); printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); f00f_workaround_enabled = 1; } @@ -238,12 +237,9 @@ static void __init init_intel(struct cpu } /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ - if ( c->x86 == 6) { - unsigned model_mask = (c->x86_model << 8) + c->x86_mask; - if (model_mask < 0x0303) - clear_bit(X86_FEATURE_SEP, c->x86_capability); - } - + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + /* Names for the Pentium II/Celeron processors detectable only by also checking the cache size. Dixon is NOT a Celeron. */ --- linux-2.6.0-test6/arch/i386/kernel/cpu/mcheck/k7.c 2003-08-08 22:55:10.000000000 -0700 +++ 25/arch/i386/kernel/cpu/mcheck/k7.c 2003-10-05 00:33:23.000000000 -0700 @@ -17,7 +17,7 @@ #include "mce.h" /* Machine Check Handler For AMD Athlon/Duron */ -static void k7_machine_check(struct pt_regs * regs, long error_code) +static asmlinkage void k7_machine_check(struct pt_regs * regs, long error_code) { int recover=1; u32 alow, ahigh, high, low; @@ -31,7 +31,7 @@ static void k7_machine_check(struct pt_r printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); - for (i=0; i"), - MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1011"), NO_MATCH }}, - { force_acpi_ht, "ABIT i440BX-W83977", { MATCH(DMI_BOARD_VENDOR, "ABIT "), MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), @@ -978,7 +973,10 @@ static __initdata struct dmi_blacklist d { disable_acpi_pci, "ASUS A7V", { MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), MATCH(DMI_BOARD_NAME, ""), - MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), NO_MATCH }}, + /* newer BIOS, Revision 1011, does work */ + MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), + NO_MATCH }}, + #endif { NULL, } --- linux-2.6.0-test6/arch/i386/kernel/doublefault.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/kernel/doublefault.c 2003-10-05 00:36:48.000000000 -0700 @@ -7,12 +7,13 @@ #include #include #include +#include #define DOUBLEFAULT_STACKSIZE (1024) static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) -#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000) +#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START)) static void doublefault_fn(void) { @@ -38,8 +39,8 @@ static void doublefault_fn(void) printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", t->eax, t->ebx, t->ecx, t->edx); - printk("esi = %08lx, edi = %08lx\n", - t->esi, t->edi); + printk("esi = %08lx, edi = %08lx, ebp = %08lx\n", + t->esi, t->edi, t->ebp); } } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/efi.c 2003-10-05 00:36:25.000000000 -0700 @@ -0,0 +1,611 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999-2002 Hewlett-Packard Co. + * David Mosberger-Tang + * Stephane Eranian + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + * + * Goutham Rao: + * Skip non-WB memory and ignore empty memory ranges. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define EFI_DEBUG 0 +#define PFX "EFI: " + +extern efi_status_t asmlinkage efi_call_phys(void *, ...); + +struct efi efi; +struct efi efi_phys __initdata; +struct efi_memory_map memmap __initdata; + +/* + * We require an early boot_ioremap mapping mechanism initially + */ +extern void * boot_ioremap(unsigned long, unsigned long); + +/* + * efi_dir is allocated here, but the directory isn't created + * here, as proc_mkdir() doesn't work this early in the bootup + * process. Therefore, each module, like efivars, must test for + * if (!efi_dir) efi_dir = proc_mkdir("efi", NULL); + * prior to creating their own entries under /proc/efi. + */ +#ifdef CONFIG_PROC_FS +struct proc_dir_entry *efi_dir; +#endif + + +/* + * To make EFI call EFI runtime service in physical addressing mode we need + * prelog/epilog before/after the invocation to disable interrupt, to + * claim EFI runtime service handler exclusively and to duplicate a memory in + * low memory space say 0 - 3G. + */ + +static unsigned long efi_rt_eflags; +static spinlock_t efi_rt_lock = SPIN_LOCK_UNLOCKED; +static pgd_t efi_bak_pg_dir_pointer[2]; + +static void efi_call_phys_prelog(void) +{ + unsigned long cr4; + unsigned long temp; + + spin_lock(&efi_rt_lock); + local_irq_save(efi_rt_eflags); + + /* + * If I don't have PSE, I should just duplicate two entries in page + * directory. If I have PSE, I just need to duplicate one entry in + * page directory. + */ + __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); + + if (cr4 & X86_CR4_PSE) { + efi_bak_pg_dir_pointer[0].pgd = + swapper_pg_dir[pgd_index(0)].pgd; + swapper_pg_dir[0].pgd = + swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; + } else { + efi_bak_pg_dir_pointer[0].pgd = + swapper_pg_dir[pgd_index(0)].pgd; + efi_bak_pg_dir_pointer[1].pgd = + swapper_pg_dir[pgd_index(0x400000)].pgd; + swapper_pg_dir[pgd_index(0)].pgd = + swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; + temp = PAGE_OFFSET + 0x400000; + swapper_pg_dir[pgd_index(0x400000)].pgd = + swapper_pg_dir[pgd_index(temp)].pgd; + } + + /* + * After the lock is released, the original page table is restored. + */ + local_flush_tlb(); + + cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); + __asm__ __volatile__("lgdt %0":"=m" + (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]))); +} + +static void efi_call_phys_epilog(void) +{ + unsigned long cr4; + + cpu_gdt_descr[0].address = + (unsigned long) __va(cpu_gdt_descr[0].address); + __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); + __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); + + if (cr4 & X86_CR4_PSE) { + swapper_pg_dir[pgd_index(0)].pgd = + efi_bak_pg_dir_pointer[0].pgd; + } else { + swapper_pg_dir[pgd_index(0)].pgd = + efi_bak_pg_dir_pointer[0].pgd; + swapper_pg_dir[pgd_index(0x400000)].pgd = + efi_bak_pg_dir_pointer[1].pgd; + } + + /* + * After the lock is released, the original page table is restored. + */ + local_flush_tlb(); + + local_irq_restore(efi_rt_eflags); + spin_unlock(&efi_rt_lock); +} + +static efi_status_t +phys_efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + + efi_call_phys_prelog(); + status = efi_call_phys(efi_phys.set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + efi_call_phys_epilog(); + return status; +} + +efi_status_t +phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) +{ + efi_status_t status; + + efi_call_phys_prelog(); + status = efi_call_phys(efi_phys.get_time, tm, tc); + efi_call_phys_epilog(); + return status; +} + +void efi_gettimeofday(struct timespec *tv) +{ + efi_time_t tm; + + memset(tv, 0, sizeof(*tv)); + if ((*efi.get_time) (&tm, 0) != EFI_SUCCESS) + return; + + tv->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, + tm.second); + tv->tv_nsec = tm.nanosecond; +} + +int is_available_memory(efi_memory_desc_t * md) +{ + if (!(md->attribute & EFI_MEMORY_WB)) + return 0; + + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + return 1; + } + return 0; +} + +/* + * We need to map the EFI memory map again after paging_init(). + */ +void __init efi_map_memmap(void) +{ + memmap.map = NULL; + + memmap.map = (efi_memory_desc_t *) + bt_ioremap((unsigned long) memmap.phys_map, + (memmap.nr_map * sizeof(efi_memory_desc_t))); + + if (memmap.map == NULL) + printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); +} + +void __init print_efi_memmap(void) +{ + efi_memory_desc_t *md; + int i; + + for (i = 0; i < memmap.nr_map; i++) { + md = &memmap.map[i]; + printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " + "range=[0x%016llx-0x%016llx) (%lluMB)\n", + i, md->type, md->attribute, md->phys_addr, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), + (md->num_pages >> (20 - EFI_PAGE_SHIFT))); + } +} + +/* + * Walks the EFI memory map and calls CALLBACK once for each EFI + * memory descriptor that has memory that is available for kernel use. + */ +void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) +{ + int prev_valid = 0; + struct range { + unsigned long start; + unsigned long end; + } prev, curr; + efi_memory_desc_t *md; + unsigned long start, end; + int i; + + for (i = 0; i < memmap.nr_map; i++) { + md = &memmap.map[i]; + + if ((md->num_pages == 0) || (!is_available_memory(md))) + continue; + + curr.start = md->phys_addr; + curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); + + if (!prev_valid) { + prev = curr; + prev_valid = 1; + } else { + if (curr.start < prev.start) + printk(KERN_INFO PFX "Unordered memory map\n"); + if (prev.end == curr.start) + prev.end = curr.end; + else { + start = + (unsigned long) (PAGE_ALIGN(prev.start)); + end = (unsigned long) (prev.end & PAGE_MASK); + if ((end > start) + && (*callback) (start, end, arg) < 0) + return; + prev = curr; + } + } + } + if (prev_valid) { + start = (unsigned long) PAGE_ALIGN(prev.start); + end = (unsigned long) (prev.end & PAGE_MASK); + if (end > start) + (*callback) (start, end, arg); + } +} + +void __init efi_init(void) +{ + efi_config_table_t *config_tables; + efi_runtime_services_t *runtime; + efi_char16_t *c16; + char vendor[100] = "unknown"; + unsigned long num_config_tables; + int i = 0; + + memset(&efi, 0, sizeof(efi) ); + memset(&efi_phys, 0, sizeof(efi_phys)); + + efi_phys.systab = EFI_SYSTAB; + memmap.phys_map = EFI_MEMMAP; + memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; + memmap.desc_version = EFI_MEMDESC_VERSION; + + efi.systab = (efi_system_table_t *) + boot_ioremap((unsigned long) efi_phys.systab, + sizeof(efi_system_table_t)); + /* + * Verify the EFI Table + */ + if (efi.systab == NULL) + printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n"); + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n"); + if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0) + printk(KERN_ERR PFX + "Warning: EFI system table major version mismatch: " + "got %d.%02d, expected %d.%02d\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff, + EFI_SYSTEM_TABLE_REVISION >> 16, + EFI_SYSTEM_TABLE_REVISION & 0xffff); + /* + * Grab some details from the system table + */ + num_config_tables = efi.systab->nr_tables; + config_tables = (efi_config_table_t *)efi.systab->tables; + runtime = efi.systab->runtime; + + /* + * Show what we know for posterity + */ + c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2); + if (c16) { + for (i = 0; i < sizeof(vendor) && *c16; ++i) + vendor[i] = *c16++; + vendor[i] = '\0'; + } else + printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); + + printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff, vendor); + + /* + * Let's see what config tables the firmware passed to us. + */ + config_tables = (efi_config_table_t *) + boot_ioremap((unsigned long) config_tables, + num_config_tables * sizeof(efi_config_table_t)); + + if (config_tables == NULL) + printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n"); + + for (i = 0; i < num_config_tables; i++) { + if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { + efi.mps = (void *)config_tables[i].table; + printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table); + } else + if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { + efi.acpi20 = __va(config_tables[i].table); + printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table); + } else + if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { + efi.acpi = __va(config_tables[i].table); + printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table); + } else + if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { + efi.smbios = (void *) config_tables[i].table; + printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table); + } else + if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { + efi.hcdp = (void *)config_tables[i].table; + printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table); + } else + if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) { + efi.uga = (void *)config_tables[i].table; + printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table); + } + } + printk("\n"); + + /* + * Check out the runtime services table. We need to map + * the runtime services table so that we can grab the physical + * address of several of the EFI runtime functions, needed to + * set the firmware into virtual mode. + */ + + runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long) + runtime, + sizeof(efi_runtime_services_t)); + if (runtime != NULL) { + /* + * We will only need *early* access to the following + * two EFI runtime services before set_virtual_address_map + * is invoked. + */ + efi_phys.get_time = (efi_get_time_t *) runtime->get_time; + efi_phys.set_virtual_address_map = + (efi_set_virtual_address_map_t *) + runtime->set_virtual_address_map; + } else + printk(KERN_ERR PFX "Could not map the runtime service table!\n"); + + /* Map the EFI memory map for use until paging_init() */ + + memmap.map = (efi_memory_desc_t *) + boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); + + if (memmap.map == NULL) + printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); + + if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) { + printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't " + "match the one from EFI!\n"); + } +#if EFI_DEBUG + print_efi_memmap(); +#endif +} + +/* + * This function will switch the EFI runtime services to virtual mode. + * Essentially, look through the EFI memmap and map every region that + * has the runtime attribute bit set in its memory descriptor and update + * that memory descriptor with the virtual address obtained from ioremap(). + * This enables the runtime services to be called without having to + * thunk back into physical mode for every invocation. + */ + +void __init efi_enter_virtual_mode(void) +{ + efi_memory_desc_t *md; + efi_status_t status; + int i; + + efi.systab = NULL; + + for (i = 0; i < memmap.nr_map; i++) { + md = &memmap.map[i]; + + if (md->attribute & EFI_MEMORY_RUNTIME) { + md->virt_addr = + (unsigned long)ioremap(md->phys_addr, + md->num_pages << EFI_PAGE_SHIFT); + if (!(unsigned long)md->virt_addr) { + printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", + (unsigned long)md->phys_addr); + } + + if (((unsigned long)md->phys_addr <= + (unsigned long)efi_phys.systab) && + ((unsigned long)efi_phys.systab < + md->phys_addr + + ((unsigned long)md->num_pages << + EFI_PAGE_SHIFT))) { + unsigned long addr; + + addr = md->virt_addr - md->phys_addr + + (unsigned long)efi_phys.systab; + efi.systab = (efi_system_table_t *)addr; + } + } + } + + if (!efi.systab) + BUG(); + + status = phys_efi_set_virtual_address_map( + sizeof(efi_memory_desc_t) * memmap.nr_map, + sizeof(efi_memory_desc_t), + memmap.desc_version, + memmap.phys_map); + + if (status != EFI_SUCCESS) { + printk (KERN_ALERT "You are screwed! " + "Unable to switch EFI into virtual mode " + "(status=%lx)\n", status); + panic("EFI call to SetVirtualAddressMap() failed!"); + } + + /* + * Now that EFI is in virtual mode, update the function + * pointers in the runtime service table to the new virtual addresses. + */ + + efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time; + efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time; + efi.get_wakeup_time = (efi_get_wakeup_time_t *) + efi.systab->runtime->get_wakeup_time; + efi.set_wakeup_time = (efi_set_wakeup_time_t *) + efi.systab->runtime->set_wakeup_time; + efi.get_variable = (efi_get_variable_t *) + efi.systab->runtime->get_variable; + efi.get_next_variable = (efi_get_next_variable_t *) + efi.systab->runtime->get_next_variable; + efi.set_variable = (efi_set_variable_t *) + efi.systab->runtime->set_variable; + efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *) + efi.systab->runtime->get_next_high_mono_count; + efi.reset_system = (efi_reset_system_t *) + efi.systab->runtime->reset_system; +} + +void __init +efi_initialize_iomem_resources(struct resource *code_resource, + struct resource *data_resource) +{ + struct resource *res; + efi_memory_desc_t *md; + int i; + + for (i = 0; i < memmap.nr_map; i++) { + md = &memmap.map[i]; + + if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > + 0x100000000ULL) + continue; + res = alloc_bootmem_low(sizeof(struct resource)); + switch (md->type) { + case EFI_RESERVED_TYPE: + res->name = "Reserved Memory"; + break; + case EFI_LOADER_CODE: + res->name = "Loader Code"; + break; + case EFI_LOADER_DATA: + res->name = "Loader Data"; + break; + case EFI_BOOT_SERVICES_DATA: + res->name = "BootServices Data"; + break; + case EFI_BOOT_SERVICES_CODE: + res->name = "BootServices Code"; + break; + case EFI_RUNTIME_SERVICES_CODE: + res->name = "Runtime Service Code"; + break; + case EFI_RUNTIME_SERVICES_DATA: + res->name = "Runtime Service Data"; + break; + case EFI_CONVENTIONAL_MEMORY: + res->name = "Conventional Memory"; + break; + case EFI_UNUSABLE_MEMORY: + res->name = "Unusable Memory"; + break; + case EFI_ACPI_RECLAIM_MEMORY: + res->name = "ACPI Reclaim"; + break; + case EFI_ACPI_MEMORY_NVS: + res->name = "ACPI NVS"; + break; + case EFI_MEMORY_MAPPED_IO: + res->name = "Memory Mapped IO"; + break; + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: + res->name = "Memory Mapped IO Port Space"; + break; + default: + res->name = "Reserved"; + break; + } + res->start = md->phys_addr; + res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1); + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + if (request_resource(&iomem_resource, res) < 0) + printk(KERN_ERR PFX "Failed to allocate res %s : 0x%lx-0x%lx\n", + res->name, res->start, res->end); + /* + * We don't know which region contains kernel data so we try + * it repeatedly and let the resource manager test it. + */ + if (md->type == EFI_CONVENTIONAL_MEMORY) { + request_resource(res, code_resource); + request_resource(res, data_resource); + } + } +} + +/* + * Convenience functions to obtain memory types and attributes + */ + +u32 efi_mem_type(unsigned long phys_addr) +{ + efi_memory_desc_t *md; + int i; + + for (i = 0; i < memmap.nr_map; i++) { + md = &memmap.map[i]; + if ((md->phys_addr <= phys_addr) && (phys_addr < + (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) + return md->type; + } + return 0; +} + +u64 efi_mem_attributes(unsigned long phys_addr) +{ + efi_memory_desc_t *md; + int i; + + for (i = 0; i < memmap.nr_map; i++) { + md = &memmap.map[i]; + if ((md->phys_addr <= phys_addr) && (phys_addr < + (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) + return md->attribute; + } + return 0; +} --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/efi_stub.S 2003-10-05 00:36:22.000000000 -0700 @@ -0,0 +1,125 @@ +/* + * EFI call stub for IA32. + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. + */ + +#include +#include +#include +#include + +/* + * efi_call_phys(void *, ...) is a function with variable parameters. + * All the callers of this function assure that all the parameters are 4-bytes. + */ + +/* + * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. + * So we'd better save all of them at the beginning of this function and restore + * at the end no matter how many we use, because we can not assure EFI runtime + * service functions will comply with gcc calling convention, too. + */ + +.text +.section .text, "a" +ENTRY(efi_call_phys) + /* + * 0. The function can only be called in Linux kernel. So CS has been + * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found + * the values of these registers are the same. And, the corresponding + * GDT entries are identical. So I will do nothing about segment reg + * and GDT, but change GDT base register in prelog and epilog. + */ + + /* + * 1. Now I am running with EIP = + PAGE_OFFSET. + * But to make it smoothly switch from virtual mode to flat mode. + * The mapping of lower virtual memory has been created in prelog and + * epilog. + */ + movl $1f, %edx + subl $__PAGE_OFFSET, %edx + jmp *%edx +1: + + /* + * 2. Now on the top of stack is the return + * address in the caller of efi_call_phys(), then parameter 1, + * parameter 2, ..., param n. To make things easy, we save the return + * address of efi_call_phys in a global variable. + */ + popl %edx + movl %edx, saved_return_addr + /* get the function pointer into ECX*/ + popl %ecx + movl %ecx, efi_rt_function_ptr + movl $2f, %edx + subl $__PAGE_OFFSET, %edx + pushl %edx + + /* + * 3. Clear PG bit in %CR0. + */ + movl %cr0, %edx + andl $0x7fffffff, %edx + movl %edx, %cr0 + jmp 1f +1: + + /* + * 4. Adjust stack pointer. + */ + subl $__PAGE_OFFSET, %esp + + /* + * 5. Call the physical function. + */ + jmp *%ecx + +2: + /* + * 6. After EFI runtime service returns, control will return to + * following instruction. We'd better readjust stack pointer first. + */ + addl $__PAGE_OFFSET, %esp + + /* + * 7. Restore PG bit + */ + movl %cr0, %edx + orl $0x80000000, %edx + movl %edx, %cr0 + jmp 1f +1: + /* + * 8. Now restore the virtual mode from flat mode by + * adding EIP with PAGE_OFFSET. + */ + movl $1f, %edx + jmp *%edx +1: + + /* + * 9. Balance the stack. And because EAX contain the return value, + * we'd better not clobber it. + */ + leal efi_rt_function_ptr, %edx + movl (%edx), %ecx + pushl %ecx + + /* + * 10. Push the saved return address onto the stack and return. + */ + leal saved_return_addr, %edx + movl (%edx), %ecx + pushl %ecx + ret +.previous + +.data +saved_return_addr: + .long 0 +efi_rt_function_ptr: + .long 0 --- linux-2.6.0-test6/arch/i386/kernel/entry.S 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/kernel/entry.S 2003-10-05 00:36:48.000000000 -0700 @@ -43,11 +43,25 @@ #include #include #include +#include #include #include +#include #include #include #include "irq_vectors.h" + /* We do not recover from a stack overflow, but at least + * we know it happened and should be able to track it down. + */ +#ifdef CONFIG_STACK_OVERFLOW_TEST +#define STACK_OVERFLOW_TEST \ + testl $7680,%esp; \ + jnz 10f; \ + call stack_overflow; \ +10: +#else +#define STACK_OVERFLOW_TEST +#endif EBX = 0x00 ECX = 0x04 @@ -85,7 +99,102 @@ TSS_ESP0_OFFSET = (4 - 0x200) #define resume_kernel restore_all #endif -#define SAVE_ALL \ +#ifdef CONFIG_X86_HIGH_ENTRY + +#ifdef CONFIG_X86_SWITCH_PAGETABLES + +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +/* + * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu, + * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is + * left stale, so we must check whether to repeat the real stack calculation. + */ +#define repeat_if_esp_changed \ + xorl %esp, %ebp; \ + testl $0xffffe000, %ebp; \ + jnz 0b +#else +#define repeat_if_esp_changed +#endif + +/* clobbers ebx, edx and ebp */ + +#define __SWITCH_KERNELSPACE \ + cmpl $0xff000000, %esp; \ + jb 1f; \ + \ + /* \ + * switch pagetables and load the real stack, \ + * keep the stack offset: \ + */ \ + \ + movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \ + \ + /* GET_THREAD_INFO(%ebp) intermixed */ \ +0: \ + movl %esp, %ebp; \ + movl %esp, %ebx; \ + andl $0xffffe000, %ebp; \ + andl $0x00001fff, %ebx; \ + orl TI_real_stack(%ebp), %ebx; \ + repeat_if_esp_changed; \ + \ + movl %edx, %cr3; \ + movl %ebx, %esp; \ +1: + +#endif + + +#define __SWITCH_USERSPACE \ + /* interrupted any of the user return paths? */ \ + \ + movl EIP(%esp), %eax; \ + \ + cmpl $int80_ret_start_marker, %eax; \ + jb 33f; /* nope - continue with sysexit check */\ + cmpl $int80_ret_end_marker, %eax; \ + jb 22f; /* yes - switch to virtual stack */ \ +33: \ + cmpl $sysexit_ret_start_marker, %eax; \ + jb 44f; /* nope - continue with user check */ \ + cmpl $sysexit_ret_end_marker, %eax; \ + jb 22f; /* yes - switch to virtual stack */ \ + /* return to userspace? */ \ +44: \ + movl EFLAGS(%esp),%ecx; \ + movb CS(%esp),%cl; \ + testl $(VM_MASK | 3),%ecx; \ + jz 2f; \ +22: \ + /* \ + * switch to the virtual stack, then switch to \ + * the userspace pagetables. \ + */ \ + \ + GET_THREAD_INFO(%ebp); \ + movl TI_virtual_stack(%ebp), %edx; \ + movl TI_user_pgd(%ebp), %ecx; \ + \ + movl %esp, %ebx; \ + andl $0x1fff, %ebx; \ + orl %ebx, %edx; \ +int80_ret_start_marker: \ + movl %edx, %esp; \ + movl %ecx, %cr3; \ + \ + __RESTORE_ALL; \ +int80_ret_end_marker: \ +2: + +#else /* !CONFIG_X86_HIGH_ENTRY */ + +#define __SWITCH_KERNELSPACE +#define __SWITCH_USERSPACE + +#endif + +#define __SAVE_ALL \ cld; \ pushl %es; \ pushl %ds; \ @@ -100,7 +209,7 @@ TSS_ESP0_OFFSET = (4 - 0x200) movl %edx, %ds; \ movl %edx, %es; -#define RESTORE_INT_REGS \ +#define __RESTORE_INT_REGS \ popl %ebx; \ popl %ecx; \ popl %edx; \ @@ -109,29 +218,28 @@ TSS_ESP0_OFFSET = (4 - 0x200) popl %ebp; \ popl %eax -#define RESTORE_REGS \ - RESTORE_INT_REGS; \ -1: popl %ds; \ -2: popl %es; \ +#define __RESTORE_REGS \ + __RESTORE_INT_REGS; \ +111: popl %ds; \ +222: popl %es; \ .section .fixup,"ax"; \ -3: movl $0,(%esp); \ - jmp 1b; \ -4: movl $0,(%esp); \ - jmp 2b; \ +444: movl $0,(%esp); \ + jmp 111b; \ +555: movl $0,(%esp); \ + jmp 222b; \ .previous; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,3b; \ - .long 2b,4b; \ + .long 111b,444b;\ + .long 222b,555b;\ .previous - -#define RESTORE_ALL \ - RESTORE_REGS \ +#define __RESTORE_ALL \ + __RESTORE_REGS \ addl $4, %esp; \ -1: iret; \ +333: iret; \ .section .fixup,"ax"; \ -2: sti; \ +666: sti; \ movl $(__USER_DS), %edx; \ movl %edx, %ds; \ movl %edx, %es; \ @@ -140,10 +248,19 @@ TSS_ESP0_OFFSET = (4 - 0x200) .previous; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,2b; \ + .long 333b,666b;\ .previous +#define SAVE_ALL \ + __SAVE_ALL; \ + __SWITCH_KERNELSPACE; \ + STACK_OVERFLOW_TEST; + +#define RESTORE_ALL \ + __SWITCH_USERSPACE; \ + __RESTORE_ALL; +.section .entry.text,"ax" ENTRY(lcall7) pushfl # We get a different stack layout with call @@ -161,7 +278,7 @@ do_lcall: movl %edx,EIP(%ebp) # Now we move them to their "normal" places movl %ecx,CS(%ebp) # andl $-8192, %ebp # GET_THREAD_INFO - movl TI_EXEC_DOMAIN(%ebp), %edx # Get the execution domain + movl TI_exec_domain(%ebp), %edx # Get the execution domain call *4(%edx) # Call the lcall7 handler for the domain addl $4, %esp popl %eax @@ -206,7 +323,7 @@ ENTRY(resume_userspace) cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done on # int/exception return? jne work_pending @@ -214,18 +331,18 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - cmpl $0,TI_PRE_COUNT(%ebp) # non-zero preempt_count ? + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_all need_resched: - movl TI_FLAGS(%ebp), %ecx # need_resched set ? + movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl jz restore_all testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all - movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebp) + movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp) sti call schedule - movl $0,TI_PRE_COUNT(%ebp) + movl $0,TI_preempt_count(%ebp) cli jmp need_resched #endif @@ -244,37 +361,50 @@ sysenter_past_esp: pushl $(__USER_CS) pushl $SYSENTER_RETURN -/* - * Load the potential sixth argument from user stack. - * Careful about security. - */ - cmpl $__PAGE_OFFSET-3,%ebp - jae syscall_fault -1: movl (%ebp),%ebp -.section __ex_table,"a" - .align 4 - .long 1b,syscall_fault -.previous - pushl %eax SAVE_ALL GET_THREAD_INFO(%ebp) cmpl $(nr_syscalls), %eax jae syscall_badsys - testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) + testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) jnz syscall_trace_entry call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) cli - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work + +#ifdef CONFIG_X86_SWITCH_PAGETABLES + + GET_THREAD_INFO(%ebp) + movl TI_virtual_stack(%ebp), %edx + movl TI_user_pgd(%ebp), %ecx + movl %esp, %ebx + andl $0x1fff, %ebx + orl %ebx, %edx +sysexit_ret_start_marker: + movl %edx, %esp + movl %ecx, %cr3 +#endif + /* + * only ebx is not restored by the userspace sysenter vsyscall + * code, it assumes it to be callee-saved. + */ + movl EBX(%esp), %ebx + /* if something modifies registers it must also disable sysexit */ + movl EIP(%esp), %edx movl OLDESP(%esp), %ecx + sti sysexit +#ifdef CONFIG_X86_SWITCH_PAGETABLES +sysexit_ret_end_marker: + nop +#endif # system call handler stub @@ -285,7 +415,7 @@ ENTRY(system_call) cmpl $(nr_syscalls), %eax jae syscall_badsys # system call tracing in operation - testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) + testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) jnz syscall_trace_entry syscall_call: call *sys_call_table(,%eax,4) @@ -294,10 +424,23 @@ syscall_exit: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work restore_all: +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS + movl EFLAGS(%esp), %eax # mix EFLAGS and CS + movb CS(%esp), %al + testl $(VM_MASK | 3), %eax + jz resume_kernelX # returning to kernel or vm86-space + + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? + jz resume_kernelX + + int $3 + +resume_kernelX: +#endif RESTORE_ALL # perform work that needs to be done immediately before resumption @@ -310,7 +453,7 @@ work_resched: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all @@ -325,6 +468,22 @@ work_notifysig: # deal with pending s # vm86-space xorl %edx, %edx call do_notify_resume + +#if CONFIG_X86_HIGH_ENTRY + /* + * Reload db7 if necessary: + */ + movl TI_flags(%ebp), %ecx + testb $_TIF_DB7, %cl + jnz work_db7 + + jmp restore_all + +work_db7: + movl TI_task(%ebp), %edx; + movl task_thread_db7(%edx), %edx; + movl %edx, %db7; +#endif jmp restore_all ALIGN @@ -380,7 +539,7 @@ syscall_badsys: */ .data ENTRY(interrupt) -.text +.previous vector=0 ENTRY(irq_entries_start) @@ -390,7 +549,7 @@ ENTRY(irq_entries_start) jmp common_interrupt .data .long 1b -.text +.previous vector=vector+1 .endr @@ -431,12 +590,17 @@ error_code: movl ES(%esp), %edi # get the function address movl %eax, ORIG_EAX(%esp) movl %ecx, ES(%esp) - movl %esp, %edx pushl %esi # push the error code - pushl %edx # push the pt_regs pointer movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es + +/* clobbers edx, ebx and ebp */ + __SWITCH_KERNELSPACE + + leal 4(%esp), %edx # prepare pt_regs + pushl %edx # push pt_regs + call *%edi addl $8, %esp jmp ret_from_exception @@ -527,7 +691,7 @@ nmi_stack_correct: pushl %edx call do_nmi addl $8, %esp - RESTORE_ALL + jmp restore_all nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) @@ -595,7 +759,7 @@ ENTRY(page_fault) #ifdef CONFIG_X86_MCE ENTRY(machine_check) pushl $0 - pushl $do_machine_check + pushl machine_check_vector jmp error_code #endif @@ -604,6 +768,8 @@ ENTRY(spurious_interrupt_bug) pushl $do_spurious_interrupt_bug jmp error_code +.previous + .data ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ @@ -879,5 +1045,60 @@ ENTRY(sys_call_table) .long sys_tgkill /* 270 */ .long sys_utimes .long sys_fadvise64_64 + .long sys_ni_syscall /* sys_vserver */ nr_syscalls=(.-sys_call_table)/4 + + +# Here we do call frames. We cheat a bit as we only really need +# correct frames at locations we can actually look at from a +# debugger. Since the break instruction trap actually goes thru +# some of this code, we don't really need info on those areas, but +# only after the fact. I.e. if we can not step or break in a +# location or end up with a return address pointing at the +# location, we don't need a correct call frame for it. + +#if 0 + +#include +/* + * The register numbers as known by gdb + */ +#define _EAX 0 +#define _ECX 1 +#define _EDX 2 +#define _EBX 3 +#define _ESP 4 +#define _EBP 5 +#define _ESI 6 +#define _EDI 7 +#define _PC 8 +#define _EIP 8 +#define _PS 9 +#define _EFLAGS 9 +#define _CS 10 +#define _SS 11 +#define _DS 12 +#define _ES 13 +#define _FS 14 +#define _GS 15 + + CFI_preamble(c1,_PC,1,1) + CFA_define_reference(_ESP,OLDESP) + CFA_define_offset(_EIP,EIP) + CFA_define_offset(_EBX,EBX) + CFA_define_offset(_ECX,ECX) + CFA_define_offset(_EDX,EDX) + CFA_define_offset(_ESI,ESI) + CFA_define_offset(_EDI,EDI) + CFA_define_offset(_EBP,EBP) + CFA_define_offset(_EAX,EAX) + CFA_define_offset(_EFLAGS,EFLAGS) + CFA_define_offset(_CS,CS) + CFA_define_offset(_DS,DS) + CFA_define_offset(_ES,ES) + CFI_postamble(c1) + + FDE_preamble(c1,f1,ret_from_intr,(divide_error - ret_from_intr)) + FDE_postamble(f1) +#endif --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/entry_trampoline.c 2003-10-05 00:36:48.000000000 -0700 @@ -0,0 +1,75 @@ +/* + * linux/arch/i386/kernel/entry_trampoline.c + * + * (C) Copyright 2003 Ingo Molnar + * + * This file contains the needed support code for 4GB userspace + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text; + +void __init init_entry_mappings(void) +{ +#ifdef CONFIG_X86_HIGH_ENTRY + void *tramp; + + /* + * We need a high IDT and GDT for the 4G/4G split: + */ + trap_init_virtual_IDT(); + + __set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL); + __set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL); + tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0); + + printk("mapped 4G/4G trampoline to %p.\n", tramp); + BUG_ON((void *)&__start___entry_text != tramp); + /* + * Virtual kernel stack: + */ + BUG_ON(__kmap_atomic_vaddr(KM_VSTACK0) & 8191); + BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE); + BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE); + + /* + * set up the initial thread's virtual stack related + * fields: + */ + current->thread.stack_page0 = virt_to_page((char *)current->thread_info); + current->thread.stack_page1 = virt_to_page((char *)current->thread_info + PAGE_SIZE); + current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK0); + + __kunmap_atomic_type(KM_VSTACK0); + __kunmap_atomic_type(KM_VSTACK1); + __kmap_atomic(current->thread.stack_page0, KM_VSTACK0); + __kmap_atomic(current->thread.stack_page1, KM_VSTACK1); + +#endif + printk("current: %p\n", current); + printk("current->thread_info: %p\n", current->thread_info); + current->thread_info->real_stack = (void *)current->thread_info; + current->thread_info->user_pgd = NULL; + current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE; +} + + + +void __init entry_trampoline_setup(void) +{ + /* + * old IRQ entries set up by the boot code will still hang + * around - they are a sign of hw trouble anyway, now they'll + * produce a double fault message. + */ + trap_init_virtual_GDT(); +} --- linux-2.6.0-test6/arch/i386/kernel/head.S 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/kernel/head.S 2003-10-05 00:36:48.000000000 -0700 @@ -16,6 +16,7 @@ #include #include #include +#include #define OLD_CL_MAGIC_ADDR 0x90020 #define OLD_CL_MAGIC 0xA33F @@ -330,7 +331,7 @@ ENTRY(stack_start) /* This is the default interrupt "handler" :-) */ int_msg: - .asciz "Unknown interrupt\n" + .asciz "Unknown interrupt or fault at EIP %p %p %p\n" ALIGN ignore_int: cld @@ -342,9 +343,17 @@ ignore_int: movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es + pushl 16(%esp) + pushl 24(%esp) + pushl 32(%esp) + pushl 40(%esp) pushl $int_msg call printk popl %eax + popl %eax + popl %eax + popl %eax + popl %eax popl %ds popl %es popl %edx @@ -377,23 +386,27 @@ cpu_gdt_descr: .fill NR_CPUS-1,8,0 # space for the other GDT descriptors /* - * This is initialized to create an identity-mapping at 0-8M (for bootup - * purposes) and another mapping of the 0-8M area at virtual address + * This is initialized to create an identity-mapping at 0-16M (for bootup + * purposes) and another mapping of the 0-16M area at virtual address * PAGE_OFFSET. */ .org 0x1000 ENTRY(swapper_pg_dir) .long 0x00102007 .long 0x00103007 - .fill BOOT_USER_PGD_PTRS-2,4,0 - /* default: 766 entries */ + .long 0x00104007 + .long 0x00105007 + .fill BOOT_USER_PGD_PTRS-4,4,0 + /* default: 764 entries */ .long 0x00102007 .long 0x00103007 - /* default: 254 entries */ - .fill BOOT_KERNEL_PGD_PTRS-2,4,0 + .long 0x00104007 + .long 0x00105007 + /* default: 252 entries */ + .fill BOOT_KERNEL_PGD_PTRS-4,4,0 /* - * The page tables are initialized to only 8MB here - the final page + * The page tables are initialized to only 16MB here - the final page * tables are set up later depending on memory size. */ .org 0x2000 @@ -402,15 +415,21 @@ ENTRY(pg0) .org 0x3000 ENTRY(pg1) +.org 0x4000 +ENTRY(pg2) + +.org 0x5000 +ENTRY(pg3) + /* * empty_zero_page must immediately follow the page tables ! (The * initialization loop counts until empty_zero_page) */ -.org 0x4000 +.org 0x6000 ENTRY(empty_zero_page) -.org 0x5000 +.org 0x7000 /* * Real beginning of normal "text" segment @@ -419,12 +438,12 @@ ENTRY(stext) ENTRY(_stext) /* - * This starts the data section. Note that the above is all - * in the text section because it has alignment requirements - * that we cannot fulfill any other way. + * This starts the data section. */ .data +.align PAGE_SIZE_asm + /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ @@ -439,7 +458,9 @@ ENTRY(boot_gdt_table) .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ #endif - .align L1_CACHE_BYTES + +.align PAGE_SIZE_asm + ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ --- linux-2.6.0-test6/arch/i386/kernel/i386_ksyms.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/i386_ksyms.c 2003-10-05 00:36:48.000000000 -0700 @@ -98,7 +98,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter EXPORT_SYMBOL_NOVERS(__down_failed_trylock); EXPORT_SYMBOL_NOVERS(__up_wakeup); /* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_generic); /* Delay loops */ EXPORT_SYMBOL(__ndelay); EXPORT_SYMBOL(__udelay); @@ -112,13 +111,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); +#if !defined(CONFIG_X86_UACCESS_INDIRECT) EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__direct_strncpy_from_user); EXPORT_SYMBOL(clear_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_from_user_ll); EXPORT_SYMBOL(__copy_to_user_ll); EXPORT_SYMBOL(strnlen_user); +#else /* CONFIG_X86_UACCESS_INDIRECT */ +EXPORT_SYMBOL(direct_csum_partial_copy_generic); +#endif EXPORT_SYMBOL(dma_alloc_coherent); EXPORT_SYMBOL(dma_free_coherent); --- linux-2.6.0-test6/arch/i386/kernel/i387.c 2003-06-14 12:18:51.000000000 -0700 +++ 25/arch/i386/kernel/i387.c 2003-10-05 00:36:48.000000000 -0700 @@ -219,6 +219,7 @@ void set_fpu_mxcsr( struct task_struct * static int convert_fxsr_to_user( struct _fpstate __user *buf, struct i387_fxsave_struct *fxsave ) { + struct _fpreg tmp[8]; /* 80 bytes scratch area */ unsigned long env[7]; struct _fpreg __user *to; struct _fpxreg *from; @@ -235,23 +236,25 @@ static int convert_fxsr_to_user( struct if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) return 1; - to = &buf->_st[0]; + to = tmp; from = (struct _fpxreg *) &fxsave->st_space[0]; for ( i = 0 ; i < 8 ; i++, to++, from++ ) { unsigned long *t = (unsigned long *)to; unsigned long *f = (unsigned long *)from; - if (__put_user(*f, t) || - __put_user(*(f + 1), t + 1) || - __put_user(from->exponent, &to->exponent)) - return 1; + *t = *f; + *(t + 1) = *(f+1); + to->exponent = from->exponent; } + if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8]))) + return 1; return 0; } static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, struct _fpstate __user *buf ) { + struct _fpreg tmp[8]; /* 80 bytes scratch area */ unsigned long env[7]; struct _fpxreg *to; struct _fpreg __user *from; @@ -259,6 +262,8 @@ static int convert_fxsr_from_user( struc if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) return 1; + if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8]))) + return 1; fxsave->cwd = (unsigned short)(env[0] & 0xffff); fxsave->swd = (unsigned short)(env[1] & 0xffff); @@ -270,15 +275,14 @@ static int convert_fxsr_from_user( struc fxsave->fos = env[6]; to = (struct _fpxreg *) &fxsave->st_space[0]; - from = &buf->_st[0]; + from = tmp; for ( i = 0 ; i < 8 ; i++, to++, from++ ) { unsigned long *t = (unsigned long *)to; unsigned long *f = (unsigned long *)from; - if (__get_user(*t, f) || - __get_user(*(t + 1), f + 1) || - __get_user(to->exponent, &from->exponent)) - return 1; + *t = *f; + *(t + 1) = *(f + 1); + to->exponent = from->exponent; } return 0; } @@ -549,13 +553,3 @@ int dump_task_extended_fpu(struct task_s } return fpvalid; } - - -#ifdef CONFIG_SMP -void dump_smp_unlazy_fpu(void) -{ - unlazy_fpu(current); - return; -} -#endif - --- linux-2.6.0-test6/arch/i386/kernel/i8259.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/i386/kernel/i8259.c 2003-10-05 00:36:20.000000000 -0700 @@ -419,8 +419,10 @@ void __init init_IRQ(void) * us. (some of these will be overridden and become * 'special' SMP interrupts) */ - for (i = 0; i < NR_IRQS; i++) { + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { int vector = FIRST_EXTERNAL_VECTOR + i; + if (i >= NR_IRQS) + break; if (vector != SYSCALL_VECTOR) set_intr_gate(vector, interrupt[i]); } --- linux-2.6.0-test6/arch/i386/kernel/init_task.c 2003-06-14 12:18:35.000000000 -0700 +++ 25/arch/i386/kernel/init_task.c 2003-10-05 00:36:48.000000000 -0700 @@ -23,7 +23,7 @@ struct mm_struct init_mm = INIT_MM(init_ */ union thread_union init_thread_union __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; + { INIT_THREAD_INFO(init_task, init_thread_union) }; /* * Initial task structure. @@ -39,5 +39,5 @@ struct task_struct init_task = INIT_TASK * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; +struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS }; --- linux-2.6.0-test6/arch/i386/kernel/io_apic.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/io_apic.c 2003-10-05 00:36:27.000000000 -0700 @@ -76,6 +76,14 @@ static struct irq_pin_list { int apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; +#ifdef CONFIG_PCI_USE_VECTOR +int vector_irq[NR_IRQS] = { [0 ... NR_IRQS -1] = -1}; +#define vector_to_irq(vector) \ + (platform_legacy_irq(vector) ? vector : vector_irq[vector]) +#else +#define vector_to_irq(vector) (vector) +#endif + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -249,7 +257,7 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } -static void set_ioapic_affinity(unsigned int irq, cpumask_t cpumask) +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) { unsigned long flags; int pin; @@ -288,7 +296,7 @@ static void set_ioapic_affinity(unsigned extern cpumask_t irq_affinity[NR_IRQS]; -static cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; +cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; #define IRQBALANCE_CHECK_ARCH -999 static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; @@ -670,13 +678,11 @@ static int __init irqbalance_disable(cha __setup("noirqbalance", irqbalance_disable); -static void set_ioapic_affinity(unsigned int irq, cpumask_t mask); - static inline void move_irq(int irq) { /* note - we hold the desc->lock */ if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { - set_ioapic_affinity(irq, pending_irq_balance_cpumask[irq]); + set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); cpus_clear(pending_irq_balance_cpumask[irq]); } } @@ -853,7 +859,7 @@ void __init setup_ioapic_dest(cpumask_t if (irq_entry == -1) continue; irq = pin_2_irq(irq_entry, ioapic, pin); - set_ioapic_affinity(irq, mask); + set_ioapic_affinity_irq(irq, mask); } } @@ -1138,12 +1144,14 @@ static inline int IO_APIC_irq_trigger(in return 0; } -int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 }; +u8 *irq_vector; +int nr_irqs; -static int __init assign_irq_vector(int irq) +#ifndef CONFIG_PCI_USE_VECTOR +int __init assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; - BUG_ON(irq >= NR_IRQS); + BUG_ON(irq >= nr_irqs); if (IO_APIC_VECTOR(irq) > 0) return IO_APIC_VECTOR(irq); next: @@ -1157,11 +1165,36 @@ next: } IO_APIC_VECTOR(irq) = current_vector; + return current_vector; } +#endif -static struct hw_interrupt_type ioapic_level_irq_type; -static struct hw_interrupt_type ioapic_edge_irq_type; +static struct hw_interrupt_type ioapic_level_type; +static struct hw_interrupt_type ioapic_edge_type; + +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 + +static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) +{ + if (use_pci_vector() && !platform_legacy_irq(irq)) { + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) + irq_desc[vector].handler = &ioapic_level_type; + else + irq_desc[vector].handler = &ioapic_edge_type; + set_intr_gate(vector, interrupt[vector]); + } else { + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) + irq_desc[irq].handler = &ioapic_level_type; + else + irq_desc[irq].handler = &ioapic_edge_type; + set_intr_gate(vector, interrupt[irq]); + } +} void __init setup_IO_APIC_irqs(void) { @@ -1219,13 +1252,7 @@ void __init setup_IO_APIC_irqs(void) if (IO_APIC_IRQ(irq)) { vector = assign_irq_vector(irq); entry.vector = vector; - - if (IO_APIC_irq_trigger(irq)) - irq_desc[irq].handler = &ioapic_level_irq_type; - else - irq_desc[irq].handler = &ioapic_edge_irq_type; - - set_intr_gate(vector, interrupt[irq]); + ioapic_register_intr(irq, vector, IOAPIC_AUTO); if (!apic && (irq < 16)) disable_8259A_irq(irq); @@ -1272,7 +1299,7 @@ void __init setup_ExtINT_IRQ0_pin(unsign * The timer IRQ doesn't have to know that behind the * scene we have a 8259A-master in AEOI mode ... */ - irq_desc[0].handler = &ioapic_edge_irq_type; + irq_desc[0].handler = &ioapic_edge_type; /* * Add it to the IO-APIC irq-routing table: @@ -1762,9 +1789,6 @@ static int __init timer_irq_works(void) * that was delayed but this is now handled in the device * independent code. */ -#define enable_edge_ioapic_irq unmask_IO_APIC_irq - -static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ } /* * Starting up a edge-triggered IO-APIC interrupt is @@ -1775,7 +1799,6 @@ static void disable_edge_ioapic_irq (uns * This is not complete - we should be able to fake * an edge even if it isn't on the 8259A... */ - static unsigned int startup_edge_ioapic_irq(unsigned int irq) { int was_pending = 0; @@ -1793,8 +1816,6 @@ static unsigned int startup_edge_ioapic_ return was_pending; } -#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq - /* * Once we have recorded IRQ_PENDING already, we can mask the * interrupt for real. This prevents IRQ storms from unhandled @@ -1809,9 +1830,6 @@ static void ack_edge_ioapic_irq(unsigned ack_APIC_irq(); } -static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ } - - /* * Level triggered interrupts can just be masked, * and shutting down and starting up the interrupt @@ -1833,10 +1851,6 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } -#define shutdown_level_ioapic_irq mask_IO_APIC_irq -#define enable_level_ioapic_irq unmask_IO_APIC_irq -#define disable_level_ioapic_irq mask_IO_APIC_irq - static void end_level_ioapic_irq (unsigned int irq) { unsigned long v; @@ -1863,6 +1877,7 @@ static void end_level_ioapic_irq (unsign * The idea is from Manfred Spraul. --macro */ i = IO_APIC_VECTOR(irq); + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); ack_APIC_irq(); @@ -1897,7 +1912,57 @@ static void end_level_ioapic_irq (unsign } } -static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ } +#ifdef CONFIG_PCI_USE_VECTOR +static unsigned int startup_edge_ioapic_vector(unsigned int vector) +{ + int irq = vector_to_irq(vector); + + return startup_edge_ioapic_irq(irq); +} + +static void ack_edge_ioapic_vector(unsigned int vector) +{ + int irq = vector_to_irq(vector); + + ack_edge_ioapic_irq(irq); +} + +static unsigned int startup_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + return startup_level_ioapic_irq (irq); +} + +static void end_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + end_level_ioapic_irq(irq); +} + +static void mask_IO_APIC_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + mask_IO_APIC_irq(irq); +} + +static void unmask_IO_APIC_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + unmask_IO_APIC_irq(irq); +} + +static void set_ioapic_affinity_vector (unsigned int vector, + unsigned long cpu_mask) +{ + int irq = vector_to_irq(vector); + + set_ioapic_affinity_irq(irq, cpu_mask); +} +#endif /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1907,26 +1972,25 @@ static void mask_and_ack_level_ioapic_ir * edge-triggered handler, without risking IRQ storms and other ugly * races. */ - -static struct hw_interrupt_type ioapic_edge_irq_type = { +static struct hw_interrupt_type ioapic_edge_type = { .typename = "IO-APIC-edge", - .startup = startup_edge_ioapic_irq, - .shutdown = shutdown_edge_ioapic_irq, - .enable = enable_edge_ioapic_irq, - .disable = disable_edge_ioapic_irq, - .ack = ack_edge_ioapic_irq, - .end = end_edge_ioapic_irq, + .startup = startup_edge_ioapic, + .shutdown = shutdown_edge_ioapic, + .enable = enable_edge_ioapic, + .disable = disable_edge_ioapic, + .ack = ack_edge_ioapic, + .end = end_edge_ioapic, .set_affinity = set_ioapic_affinity, }; -static struct hw_interrupt_type ioapic_level_irq_type = { +static struct hw_interrupt_type ioapic_level_type = { .typename = "IO-APIC-level", - .startup = startup_level_ioapic_irq, - .shutdown = shutdown_level_ioapic_irq, - .enable = enable_level_ioapic_irq, - .disable = disable_level_ioapic_irq, - .ack = mask_and_ack_level_ioapic_irq, - .end = end_level_ioapic_irq, + .startup = startup_level_ioapic, + .shutdown = shutdown_level_ioapic, + .enable = enable_level_ioapic, + .disable = disable_level_ioapic, + .ack = mask_and_ack_level_ioapic, + .end = end_level_ioapic, .set_affinity = set_ioapic_affinity, }; @@ -1946,7 +2010,13 @@ static inline void init_IO_APIC_traps(vo * 0x80, because int 0x80 is hm, kind of importantish. ;) */ for (irq = 0; irq < NR_IRQS ; irq++) { - if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) { + int tmp = irq; + if (use_pci_vector()) { + if (!platform_legacy_irq(tmp)) + if ((tmp = vector_to_irq(tmp)) == -1) + continue; + } + if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -2378,10 +2448,12 @@ int io_apic_set_pci_routing (int ioapic, "IRQ %d Mode:%i Active:%i)\n", ioapic, mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); + if (use_pci_vector() && !platform_legacy_irq(irq)) + irq = IO_APIC_VECTOR(irq); if (edge_level) { - irq_desc[irq].handler = &ioapic_level_irq_type; + irq_desc[irq].handler = &ioapic_level_type; } else { - irq_desc[irq].handler = &ioapic_edge_irq_type; + irq_desc[irq].handler = &ioapic_edge_type; } set_intr_gate(entry.vector, interrupt[irq]); --- linux-2.6.0-test6/arch/i386/kernel/irq.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/kernel/irq.c 2003-10-05 00:33:50.000000000 -0700 @@ -44,6 +44,7 @@ #include #include #include +#include /* * Linux has a controller-independent x86 interrupt architecture. @@ -499,6 +500,17 @@ out: irq_exit(); +#ifdef CONFIG_KGDB + /* + * We need to do this after clearing out of all the interrupt + * machinery because kgdb will reenter the NIC driver and the IRQ + * system. synchronize_irq() (at least) will deadlock. + */ + if (kgdb_eth_need_breakpoint[smp_processor_id()]) { + kgdb_eth_need_breakpoint[smp_processor_id()] = 0; + BREAKPOINT; + } +#endif return 1; } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/kgdb_stub.c 2003-10-05 00:33:51.000000000 -0700 @@ -0,0 +1,2492 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (c) 2000 VERITAS Software Corporation. + * + */ +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * Updated by: David Grothe + * Updated by: Robert Walsh + * Updated by: wangdi + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Compatibility with 2.1.xx kernel by David Grothe + * + * Changes to allow auto initilization. All that is needed is that it + * be linked with the kernel and a break point (int 3) be executed. + * The header file defines BREAKPOINT to allow one to do + * this. It should also be possible, once the interrupt system is up, to + * call putDebugChar("+"). Once this is done, the remote debugger should + * get our attention by sending a ^C in a packet. George Anzinger + * + * Integrated into 2.2.5 kernel by Tigran Aivazian + * Added thread support, support for multiple processors, + * support for ia-32(x86) hardware debugging. + * Amit S. Kale ( akale@veritas.com ) + * + * Modified to support debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing an int 3. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: < two hex digits computed as modulo 256 sum of > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ +#define KGDB_VERSION "<20030915.1651.33>" +#include +#include +#include /* for strcpy */ +#include +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include +#include + +/************************************************************************ + * + * external low-level support routines + */ +typedef void (*Function) (void); /* pointer to a function */ + +/* Thread reference */ +typedef unsigned char threadref[8]; + +extern int tty_putDebugChar(int); /* write a single character */ +extern int tty_getDebugChar(void); /* read and return a single char */ +extern void tty_flushDebugChar(void); /* flush pending characters */ +extern int eth_putDebugChar(int); /* write a single character */ +extern int eth_getDebugChar(void); /* read and return a single char */ +extern void eth_flushDebugChar(void); /* flush pending characters */ +extern void kgdb_eth_set_trapmode(int); +extern void kgdb_eth_reply_arp(void); /*send arp request */ +extern volatile int kgdb_eth_is_initializing; + + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 400 + +char *kgdb_version = KGDB_VERSION; + +/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ +int debug_regs = 0; /* set to non-zero to print registers */ + +/* filled in by an external module */ +char *gdb_module_offsets; + +static const char hexchars[] = "0123456789abcdef"; + +/* Number of bytes of registers. */ +#define NUMREGBYTES 64 +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + */ +enum regnames { _EAX, /* 0 */ + _ECX, /* 1 */ + _EDX, /* 2 */ + _EBX, /* 3 */ + _ESP, /* 4 */ + _EBP, /* 5 */ + _ESI, /* 6 */ + _EDI, /* 7 */ + _PC /* 8 also known as eip */ , + _PS /* 9 also known as eflags */ , + _CS, /* 10 */ + _SS, /* 11 */ + _DS, /* 12 */ + _ES, /* 13 */ + _FS, /* 14 */ + _GS /* 15 */ +}; + +/*************************** ASSEMBLY CODE MACROS *************************/ +/* + * Put the error code here just in case the user cares. + * Likewise, the vector number here (since GDB only gets the signal + * number through the usual means, and that's not very specific). + * The called_from is the return address so he can tell how we entered kgdb. + * This will allow him to seperate out the various possible entries. + */ +#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ + +#define PID_MAX PID_MAX_DEFAULT + +#ifdef CONFIG_SMP +void smp_send_nmi_allbutself(void); +#define IF_SMP(x) x +#undef MAX_NO_CPUS +#ifndef CONFIG_NO_KGDB_CPUS +#define CONFIG_NO_KGDB_CPUS 2 +#endif +#if CONFIG_NO_KGDB_CPUS > NR_CPUS +#define MAX_NO_CPUS NR_CPUS +#else +#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS +#endif +#define hold_init hold_on_sstep: 1, +#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) +#define NUM_CPUS num_online_cpus() +#else +#define IF_SMP(x) +#define hold_init +#undef MAX_NO_CPUS +#define MAX_NO_CPUS 1 +#define NUM_CPUS 1 +#endif +#define NOCPU (struct task_struct *)0xbad1fbad +/* *INDENT-OFF* */ +struct kgdb_info { + int used_malloc; + void *called_from; + long long entry_tsc; + int errcode; + int vector; + int print_debug_info; +#ifdef CONFIG_SMP + int hold_on_sstep; + struct { + volatile struct task_struct *task; + int pid; + int hold; + struct pt_regs *regs; + } cpus_waiting[MAX_NO_CPUS]; +#endif +} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; + +/* *INDENT-ON* */ + +#define used_m kgdb_info.used_malloc +/* + * This is little area we set aside to contain the stack we + * need to build to allow gdb to call functions. We use one + * per cpu to avoid locking issues. We will do all this work + * with interrupts off so that should take care of the protection + * issues. + */ +#define LOOKASIDE_SIZE 200 /* should be more than enough */ +#define MALLOC_MAX 200 /* Max malloc size */ +struct { + unsigned int esp; + int array[LOOKASIDE_SIZE]; +} fn_call_lookaside[MAX_NO_CPUS]; + +static int trap_cpu; +static unsigned int OLD_esp; + +#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] +#define IF_BIT 0x200 +#define TF_BIT 0x100 + +#define MALLOC_ROUND 8-1 + +static char malloc_array[MALLOC_MAX]; +IF_SMP(static void to_gdb(const char *mess)); +void * +malloc(int size) +{ + + if (size <= (MALLOC_MAX - used_m)) { + int old_used = used_m; + used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); + return &malloc_array[old_used]; + } else { + return NULL; + } +} + +/* + * I/O dispatch functions... + * Based upon kgdb_eth, either call the ethernet + * handler or the serial one.. + */ +void +putDebugChar(int c) +{ + if (kgdb_eth == -1) { + tty_putDebugChar(c); + } else { + eth_putDebugChar(c); + } +} + +int +getDebugChar(void) +{ + if (kgdb_eth == -1) { + return tty_getDebugChar(); + } else { + return eth_getDebugChar(); + } +} + +void +flushDebugChar(void) +{ + if (kgdb_eth == -1) { + tty_flushDebugChar(); + } else { + eth_flushDebugChar(); + } +} + +/* + * Gdb calls functions by pushing agruments, including a return address + * on the stack and the adjusting EIP to point to the function. The + * whole assumption in GDB is that we are on a different stack than the + * one the "user" i.e. code that hit the break point, is on. This, of + * course is not true in the kernel. Thus various dodges are needed to + * do the call without directly messing with EIP (which we can not change + * as it is just a location and not a register. To adjust it would then + * require that we move every thing below EIP up or down as needed. This + * will not work as we may well have stack relative pointer on the stack + * (such as the pointer to regs, for example). + + * So here is what we do: + * We detect gdb attempting to store into the stack area and instead, store + * into the fn_call_lookaside.array at the same relative location as if it + * were the area ESP pointed at. We also trap ESP modifications + * and uses these to adjust fn_call_lookaside.esp. On entry + * fn_call_lookaside.esp will be set to point at the last entry in + * fn_call_lookaside.array. This allows us to check if it has changed, and + * if so, on exit, we add the registers we will use to do the move and a + * trap/ interrupt return exit sequence. We then adjust the eflags in the + * regs array (remember we now have a copy in the fn_call_lookaside.array) to + * kill the interrupt bit, AND we change EIP to point at our set up stub. + * As part of the register set up we preset the registers to point at the + * begining and end of the fn_call_lookaside.array, so all the stub needs to + * do is move words from the array to the stack until ESP= the desired value + * then do the rti. This will then transfer to the desired function with + * all the correct registers. Nifty huh? + */ +extern asmlinkage void fn_call_stub(void); +extern asmlinkage void fn_rtn_stub(void); +/* *INDENT-OFF* */ +__asm__("fn_rtn_stub:\n\t" + "movl %eax,%esp\n\t" + "fn_call_stub:\n\t" + "1:\n\t" + "addl $-4,%ebx\n\t" + "movl (%ebx), %eax\n\t" + "pushl %eax\n\t" + "cmpl %esp,%ecx\n\t" + "jne 1b\n\t" + "popl %eax\n\t" + "popl %ebx\n\t" + "popl %ecx\n\t" + "iret \n\t"); +/* *INDENT-ON* */ +#define gdb_i386vector kgdb_info.vector +#define gdb_i386errcode kgdb_info.errcode +#define waiting_cpus kgdb_info.cpus_waiting +#define remote_debug kgdb_info.print_debug_info +#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold +/* gdb locks */ + +#ifdef CONFIG_SMP +static int in_kgdb_called; +static spinlock_t waitlocks[MAX_NO_CPUS] = + {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; +/* + * The following array has the thread pointer of each of the "other" + * cpus. We make it global so it can be seen by gdb. + */ +volatile int in_kgdb_entry_log[MAX_NO_CPUS]; +volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; +/* +static spinlock_t continuelocks[MAX_NO_CPUS]; +*/ +spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; +/* waiters on our spinlock plus us */ +static atomic_t spinlock_waiters = ATOMIC_INIT(1); +static int spinlock_count = 0; +static int spinlock_cpu = 0; +/* + * Note we use nested spin locks to account for the case where a break + * point is encountered when calling a function by user direction from + * kgdb. Also there is the memory exception recursion to account for. + * Well, yes, but this lets other cpus thru too. Lets add a + * cpu id to the lock. + */ +#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ + spinlock_cpu != smp_processor_id()){\ + atomic_inc(&spinlock_waiters); \ + while (! spin_trylock(x)) {\ + in_kgdb(®s);\ + }\ + atomic_dec(&spinlock_waiters); \ + spinlock_count = 1; \ + spinlock_cpu = smp_processor_id(); \ + }else{ \ + spinlock_count++; \ + } +#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) +#else +unsigned kgdb_spinlock = 0; +#define KGDB_SPIN_LOCK(x) --*x +#define KGDB_SPIN_UNLOCK(x) ++*x +#endif + +int +hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return (ch - 'a' + 10); + if ((ch >= '0') && (ch <= '9')) + return (ch - '0'); + if ((ch >= 'A') && (ch <= 'F')) + return (ch - 'A' + 10); + return (-1); +} + +/* scan for the sequence $# */ +void +getpacket(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + char ch; + + do { + /* wait around for the start character, ignore all other characters */ + while ((ch = (getDebugChar() & 0x7f)) != '$') ; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < BUFMAX) { + ch = getDebugChar() & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(getDebugChar() & 0x7f) << 4; + xmitcsum += hex(getDebugChar() & 0x7f); + if ((remote_debug) && (checksum != xmitcsum)) { + printk + ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", + checksum, xmitcsum, buffer); + } + + if (checksum != xmitcsum) + putDebugChar('-'); /* failed checksum */ + else { + putDebugChar('+'); /* successful transfer */ + /* if a sequence char is present, reply the sequence ID */ + if (buffer[2] == ':') { + putDebugChar(buffer[0]); + putDebugChar(buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(buffer); + for (i = 3; i <= count; i++) + buffer[i - 3] = buffer[i]; + } + } + } + } while (checksum != xmitcsum); + + if (remote_debug) + printk("R:%s\n", buffer); + flushDebugChar(); +} + +/* send the packet in buffer. */ + +void +putpacket(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + /* $#. */ + + if (kgdb_eth == -1) { + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + putDebugChar(ch); + checksum += ch; + count += 1; + } + + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + + } while ((getDebugChar() & 0x7f) != '+'); + } else { + /* + * For udp, we can not transfer too much bytes once. + * We only transfer MAX_SEND_COUNT size bytes each time + */ + +#define MAX_SEND_COUNT 30 + + int send_count = 0, i = 0; + char send_buf[MAX_SEND_COUNT]; + + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + send_count = 0; + while ((ch = buffer[count])) { + if (send_count >= MAX_SEND_COUNT) { + for(i = 0; i < MAX_SEND_COUNT; i++) { + putDebugChar(send_buf[i]); + } + flushDebugChar(); + send_count = 0; + } else { + send_buf[send_count] = ch; + checksum += ch; + count ++; + send_count++; + } + } + for(i = 0; i < send_count; i++) + putDebugChar(send_buf[i]); + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + } while ((getDebugChar() & 0x7f) != '+'); + } +} + +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; +static short error; + +void +debug_error(char *format, char *parm) +{ + if (remote_debug) + printk(format, parm); +} + +static void +print_regs(struct pt_regs *regs) +{ + printk("EAX=%08lx ", regs->eax); + printk("EBX=%08lx ", regs->ebx); + printk("ECX=%08lx ", regs->ecx); + printk("EDX=%08lx ", regs->edx); + printk("\n"); + printk("ESI=%08lx ", regs->esi); + printk("EDI=%08lx ", regs->edi); + printk("EBP=%08lx ", regs->ebp); + printk("ESP=%08lx ", (long) ®s->esp); + printk("\n"); + printk(" DS=%08x ", regs->xds); + printk(" ES=%08x ", regs->xes); + printk(" SS=%08x ", __KERNEL_DS); + printk(" FL=%08lx ", regs->eflags); + printk("\n"); + printk(" CS=%08x ", regs->xcs); + printk(" IP=%08lx ", regs->eip); +#if 0 + printk(" FS=%08x ", regs->fs); + printk(" GS=%08x ", regs->gs); +#endif + printk("\n"); + +} /* print_regs */ + +#define NEW_esp fn_call_lookaside[trap_cpu].esp + +static void +regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_EAX] = regs->eax; + gdb_regs[_EBX] = regs->ebx; + gdb_regs[_ECX] = regs->ecx; + gdb_regs[_EDX] = regs->edx; + gdb_regs[_ESI] = regs->esi; + gdb_regs[_EDI] = regs->edi; + gdb_regs[_EBP] = regs->ebp; + gdb_regs[_DS] = regs->xds; + gdb_regs[_ES] = regs->xes; + gdb_regs[_PS] = regs->eflags; + gdb_regs[_CS] = regs->xcs; + gdb_regs[_PC] = regs->eip; + /* Note, as we are a debugging the kernel, we will always + * trap in kernel code, this means no priviledge change, + * and so the pt_regs structure is not completely valid. In a non + * privilege change trap, only EFLAGS, CS and EIP are put on the stack, + * SS and ESP are not stacked, this means that the last 2 elements of + * pt_regs is not valid (they would normally refer to the user stack) + * also, using regs+1 is no good because you end up will a value that is + * 2 longs (8) too high. This used to cause stepping over functions + * to fail, so my fix is to use the address of regs->esp, which + * should point at the end of the stack frame. Note I have ignored + * completely exceptions that cause an error code to be stacked, such + * as double fault. Stuart Hughes, Zentropix. + * original code: gdb_regs[_ESP] = (int) (regs + 1) ; + + * this is now done on entry and moved to OLD_esp (as well as NEW_esp). + */ + gdb_regs[_ESP] = NEW_esp; + gdb_regs[_SS] = __KERNEL_DS; + gdb_regs[_FS] = 0xFFFF; + gdb_regs[_GS] = 0xFFFF; +} /* regs_to_gdb_regs */ + +static void +gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) +{ + regs->eax = gdb_regs[_EAX]; + regs->ebx = gdb_regs[_EBX]; + regs->ecx = gdb_regs[_ECX]; + regs->edx = gdb_regs[_EDX]; + regs->esi = gdb_regs[_ESI]; + regs->edi = gdb_regs[_EDI]; + regs->ebp = gdb_regs[_EBP]; + regs->xds = gdb_regs[_DS]; + regs->xes = gdb_regs[_ES]; + regs->eflags = gdb_regs[_PS]; + regs->xcs = gdb_regs[_CS]; + regs->eip = gdb_regs[_PC]; + NEW_esp = gdb_regs[_ESP]; /* keep the value */ +#if 0 /* can't change these */ + regs->esp = gdb_regs[_ESP]; + regs->xss = gdb_regs[_SS]; + regs->fs = gdb_regs[_FS]; + regs->gs = gdb_regs[_GS]; +#endif + +} /* gdb_regs_to_regs */ +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched ((unsigned long) scheduling_functions_start_here) +#define last_sched ((unsigned long) scheduling_functions_end_here) + +int thread_list = 0; + +void +get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) +{ + unsigned long stack_page; + int count = 0; + IF_SMP(int i); + if (!p || p == current) { + regs_to_gdb_regs(gdb_regs, regs); + return; + } +#ifdef CONFIG_SMP + for (i = 0; i < MAX_NO_CPUS; i++) { + if (p == kgdb_info.cpus_waiting[i].task) { + regs_to_gdb_regs(gdb_regs, + kgdb_info.cpus_waiting[i].regs); + gdb_regs[_ESP] = + (int) &kgdb_info.cpus_waiting[i].regs->esp; + + return; + } + } +#endif + memset(gdb_regs, 0, NUMREGBYTES); + gdb_regs[_ESP] = p->thread.esp; + gdb_regs[_PC] = p->thread.eip; + gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; + gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); + gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); + +/* + * This code is to give a more informative notion of where a process + * is waiting. It is used only when the user asks for a thread info + * list. If he then switches to the thread, s/he will find the task + * is in schedule, but a back trace should show the same info we come + * up with. This code was shamelessly purloined from process.c. It was + * then enhanced to provide more registers than simply the program + * counter. + */ + + if (!thread_list) { + return; + } + + if (p->state == TASK_RUNNING) + return; + stack_page = (unsigned long) p->thread_info; + if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > 8188 + stack_page) + return; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + do { + if (gdb_regs[_EBP] < stack_page || + gdb_regs[_EBP] > 8184 + stack_page) + return; + gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); + gdb_regs[_ESP] = gdb_regs[_EBP] + 8; + gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; + if (gdb_regs[_PC] < first_sched || gdb_regs[_PC] >= last_sched) + return; + } while (count++ < 16); + return; +} + +/* Indicate to caller of mem2hex or hex2mem that there has been an + error. */ +static volatile int mem_err = 0; +static volatile int mem_err_expected = 0; +static volatile int mem_err_cnt = 0; +static int garbage_loc = -1; + +int +get_char(char *addr) +{ + return *addr; +} + +void +set_char(char *addr, int val, int may_fault) +{ + /* + * This code traps references to the area mapped to the kernel + * stack as given by the regs and, instead, stores to the + * fn_call_lookaside[cpu].array + */ + if (may_fault && + (unsigned int) addr < OLD_esp && + ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { + addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); + } + *addr = val; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ +/* If MAY_FAULT is non-zero, then we should set mem_err in response to + a fault; if zero treat a fault like any other fault in the stub. */ +char * +mem2hex(char *mem, char *buf, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + /* printk("%lx = ", mem) ; */ + + ch = get_char(mem++); + + /* printk("%02x\n", ch & 0xFF) ; */ + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault fetching from addr %lx\n", + (long) (mem - 1)); + *buf = 0; /* truncate buffer */ + return (buf); + } + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (may_fault) + mem_err_expected = 0; + return (buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +/* NOTE: We use the may fault flag to also indicate if the write is to + * the registers (0) or "other" memory (!=0) + */ +char * +hex2mem(char *buf, char *mem, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + ch = hex(*buf++) << 4; + ch = ch + hex(*buf++); + set_char(mem++, ch, may_fault); + + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault storing to addr %lx\n", + (long) (mem - 1)); + return (mem); + } + } + if (may_fault) + mem_err_expected = 0; + return (mem); +} + +/**********************************************/ +/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ +/* RETURN NUMBER OF CHARS PROCESSED */ +/**********************************************/ +int +hexToInt(char **ptr, int *intValue) +{ + int numChars = 0; + int hexValue; + + *intValue = 0; + + while (**ptr) { + hexValue = hex(**ptr); + if (hexValue >= 0) { + *intValue = (*intValue << 4) | hexValue; + numChars++; + } else + break; + + (*ptr)++; + } + + return (numChars); +} + +#define stubhex(h) hex(h) +#ifdef old_thread_list + +static int +stub_unpack_int(char *buff, int fieldlength) +{ + int nibble; + int retval = 0; + + while (fieldlength) { + nibble = stubhex(*buff++); + retval |= nibble; + fieldlength--; + if (fieldlength) + retval = retval << 4; + } + return retval; +} +#endif +static char * +pack_hex_byte(char *pkt, int byte) +{ + *pkt++ = hexchars[(byte >> 4) & 0xf]; + *pkt++ = hexchars[(byte & 0xf)]; + return pkt; +} + +#define BUF_THREAD_ID_SIZE 16 + +static char * +pack_threadid(char *pkt, threadref * id) +{ + char *limit; + unsigned char *altid; + + altid = (unsigned char *) id; + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *altid++); + return pkt; +} + +#ifdef old_thread_list +static char * +unpack_byte(char *buf, int *value) +{ + *value = stub_unpack_int(buf, 2); + return buf + 2; +} + +static char * +unpack_threadid(char *inbuf, threadref * id) +{ + char *altref; + char *limit = inbuf + BUF_THREAD_ID_SIZE; + int x, y; + + altref = (char *) id; + + while (inbuf < limit) { + x = stubhex(*inbuf++); + y = stubhex(*inbuf++); + *altref++ = (x << 4) | y; + } + return inbuf; +} +#endif +void +int_to_threadref(threadref * id, int value) +{ + unsigned char *scan; + + scan = (unsigned char *) id; + { + int i = 4; + while (i--) + *scan++ = 0; + } + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} +int +int_to_hex_v(unsigned char * id, int value) +{ + unsigned char *start = id; + int shift; + int ch; + + for (shift = 28; shift >= 0; shift -= 4) { + if ((ch = (value >> shift) & 0xf) || (id != start)) { + *id = hexchars[ch]; + id++; + } + } + if (id == start) + *id++ = '0'; + return id - start; +} +#ifdef old_thread_list + +static int +threadref_to_int(threadref * ref) +{ + int i, value = 0; + unsigned char *scan; + + scan = (char *) ref; + scan += 4; + i = 4; + while (i-- > 0) + value = (value << 8) | ((*scan++) & 0xff); + return value; +} +#endif +static int +cmp_str(char *s1, char *s2, int count) +{ + while (count--) { + if (*s1++ != *s2++) + return 0; + } + return 1; +} + +#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ +extern struct task_struct *kgdb_get_idle(int cpu); +#define idle_task(cpu) kgdb_get_idle(cpu) +#else +#define idle_task(cpu) init_tasks[cpu] +#endif + +extern int kgdb_pid_init_done; + +struct task_struct * +getthread(int pid) +{ + struct task_struct *thread; + if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { + + return idle_task(pid - PID_MAX); + } else { + /* + * find_task_by_pid is relatively safe all the time + * Other pid functions require lock downs which imply + * that we may be interrupting them (as we get here + * in the middle of most any lock down). + * Still we don't want to call until the table exists! + */ + if (kgdb_pid_init_done){ + thread = find_task_by_pid(pid); + if (thread) { + return thread; + } + } + } + return NULL; +} +/* *INDENT-OFF* */ +struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned addr; +} breakinfo[4] = { {enabled:0}, + {enabled:0}, + {enabled:0}, + {enabled:0}}; +/* *INDENT-ON* */ +unsigned hw_breakpoint_status; +void +correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned dr7; + + asm volatile ("movl %%db7, %0\n":"=r" (dr7) + :); + /* *INDENT-OFF* */ + do { + unsigned addr0, addr1, addr2, addr3; + asm volatile ("movl %%db0, %0\n" + "movl %%db1, %1\n" + "movl %%db2, %2\n" + "movl %%db3, %3\n" + :"=r" (addr0), "=r"(addr1), + "=r"(addr2), "=r"(addr3) + :); + } while (0); + /* *INDENT-ON* */ + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + asm volatile ("movl %0, %%dr0\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 1: + asm volatile ("movl %0, %%dr1\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 2: + asm volatile ("movl %0, %%dr2\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 3: + asm volatile ("movl %0, %%dr3\n"::"r" + (breakinfo[breakno].addr)); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) { + asm volatile ("movl %0, %%db7\n"::"r" (dr7)); + } +} + +int +remove_hw_break(unsigned breakno) +{ + if (!breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 0; + return 0; +} + +int +set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) +{ + if (breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 1; + breakinfo[breakno].type = type; + breakinfo[breakno].len = len; + breakinfo[breakno].addr = addr; + return 0; +} + +#ifdef CONFIG_SMP +static int in_kgdb_console = 0; + +int +in_kgdb(struct pt_regs *regs) +{ + unsigned flags; + int cpu = smp_processor_id(); + in_kgdb_called = 1; + if (!spin_is_locked(&kgdb_spinlock)) { + if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ + in_kgdb_console) { /* or we are doing slow i/o */ + return 1; + } + return 0; + } + + /* As I see it the only reason not to let all cpus spin on + * the same spin_lock is to allow selected ones to proceed. + * This would be a good thing, so we leave it this way. + * Maybe someday.... Done ! + + * in_kgdb() is called from an NMI so we don't pretend + * to have any resources, like printk() for example. + */ + + kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ + /* + * log arival of this cpu + * The NMI keeps on ticking. Protect against recurring more + * than once, and ignor the cpu that has the kgdb lock + */ + in_kgdb_entry_log[cpu]++; + in_kgdb_here_log[cpu] = regs; + if (cpu == spinlock_cpu || waiting_cpus[cpu].task) { + goto exit_in_kgdb; + } + /* + * For protection of the initilization of the spin locks by kgdb + * it locks the kgdb spinlock before it gets the wait locks set + * up. We wait here for the wait lock to be taken. If the + * kgdb lock goes away first?? Well, it could be a slow exit + * sequence where the wait lock is removed prior to the kgdb lock + * so if kgdb gets unlocked, we just exit. + */ + while (spin_is_locked(&kgdb_spinlock) && + !spin_is_locked(waitlocks + cpu)) ; + if (!spin_is_locked(&kgdb_spinlock)) { + goto exit_in_kgdb; + } + waiting_cpus[cpu].task = current; + waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); + waiting_cpus[cpu].regs = regs; + + spin_unlock_wait(waitlocks + cpu); + /* + * log departure of this cpu + */ + waiting_cpus[cpu].task = 0; + waiting_cpus[cpu].pid = 0; + waiting_cpus[cpu].regs = 0; + correct_hw_break(); + exit_in_kgdb: + in_kgdb_here_log[cpu] = 0; + kgdb_local_irq_restore(flags); + return 1; + /* + spin_unlock(continuelocks + smp_processor_id()); + */ +} + +void +smp__in_kgdb(struct pt_regs regs) +{ + ack_APIC_irq(); + in_kgdb(®s); +} +#else +int +in_kgdb(struct pt_regs *regs) +{ + return (kgdb_spinlock); +} +#endif + +void +printexceptioninfo(int exceptionNo, int errorcode, char *buffer) +{ + unsigned dr6; + int i; + switch (exceptionNo) { + case 1: /* debug exception */ + break; + case 3: /* breakpoint */ + sprintf(buffer, "Software breakpoint"); + return; + default: + sprintf(buffer, "Details not available"); + return; + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (dr6 & 0x4000) { + sprintf(buffer, "Single step"); + return; + } + for (i = 0; i < 4; ++i) { + if (dr6 & (1 << i)) { + sprintf(buffer, "Hardware breakpoint %d", i); + return; + } + } + sprintf(buffer, "Unknown trap"); + return; +} + +/* + * This function does all command procesing for interfacing to gdb. + * + * NOTE: The INT nn instruction leaves the state of the interrupt + * enable flag UNCHANGED. That means that when this routine + * is entered via a breakpoint (INT 3) instruction from code + * that has interrupts enabled, then interrupts will STILL BE + * enabled when this routine is entered. The first thing that + * we do here is disable interrupts so as to prevent recursive + * entries and bothersome serial interrupts while we are + * trying to run the serial port in polled mode. + * + * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so + * it is always necessary to do a restore_flags before returning + * so as to let go of that lock. + */ +int +kgdb_handle_exception(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs) +{ + struct task_struct *usethread = NULL; + struct task_struct *thread_list_start = 0, *thread = NULL; + int addr, length; + unsigned long address; + int breakno, breaktype; + char *ptr; + int newPC; + threadref thref; + int threadid; + int thread_min = PID_MAX + MAX_NO_CPUS; +#ifdef old_thread_list + int maxthreads; +#endif + int nothreads; + unsigned long flags; + int gdb_regs[NUMREGBYTES / 4]; + int dr6; + IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ +#define NO_NMI 1 +#define NO_SYNC 2 +#define regs (*linux_regs) +#define NUMREGS NUMREGBYTES/4 + /* + * If the entry is not from the kernel then return to the Linux + * trap handler and let it process the interrupt normally. + */ + if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { + printk("ignoring non-kernel exception\n"); + print_regs(®s); + return (0); + } + /* + * If we're using eth mode, set the 'mode' in the netdevice. + */ + + __asm__("movl %%cr2,%0":"=r" (address)); + + if (kgdb_eth != -1) { + kgdb_eth_set_trapmode(1); + } + + kgdb_local_irq_save(flags); + + /* Get kgdb spinlock */ + + KGDB_SPIN_LOCK(&kgdb_spinlock); + rdtscll(kgdb_info.entry_tsc); + /* + * We depend on this spinlock and the NMI watch dog to control the + * other cpus. They will arrive at "in_kgdb()" as a result of the + * NMI and will wait there for the following spin locks to be + * released. + */ +#ifdef CONFIG_SMP + +#if 0 + if (cpu_callout_map & ~MAX_CPU_MASK) { + printk("kgdb : too many cpus, possibly not mapped" + " in contiguous space, change MAX_NO_CPUS" + " in kgdb_stub and make new kernel.\n" + " cpu_callout_map is %lx\n", cpu_callout_map); + goto exit_just_unlock; + } +#endif + if (spinlock_count == 1) { + int time, end_time, dum; + int i; + int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) + }; + if (remote_debug) { + printk("kgdb : cpu %d entry, syncing others\n", + smp_processor_id()); + } + for (i = 0; i < MAX_NO_CPUS; i++) { + /* + * Use trylock as we may already hold the lock if + * we are holding the cpu. Net result is all + * locked. + */ + spin_trylock(&waitlocks[i]); + } + for (i = 0; i < MAX_NO_CPUS; i++) + cpu_logged_in[i] = 0; + /* + * Wait for their arrival. We know the watch dog is active if + * in_kgdb() has ever been called, as it is always called on a + * watchdog tick. + */ + rdtsc(dum, time); + end_time = time + 2; /* Note: we use the High order bits! */ + i = 1; + if (num_online_cpus() > 1) { + int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; + smp_send_nmi_allbutself(); + while (i < num_online_cpus() && time != end_time) { + int j; + for (j = 0; j < MAX_NO_CPUS; j++) { + if (waiting_cpus[j].task && + !cpu_logged_in[j]) { + i++; + cpu_logged_in[j] = 1; + if (remote_debug) { + printk + ("kgdb : cpu %d arrived at kgdb\n", + j); + } + break; + } else if (!waiting_cpus[j].task && + !cpu_online(j)) { + waiting_cpus[j].task = NOCPU; + cpu_logged_in[j] = 1; + waiting_cpus[j].hold = 1; + break; + } + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + + int wait = 100000; + while (wait--) ; + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + printk + ("kgdb : cpu %d stall" + " in in_kgdb\n", + j); + i++; + cpu_logged_in[j] = 1; + waiting_cpus[j].task = + (struct task_struct + *) 1; + } + } + } + + if (in_kgdb_entry_log[smp_processor_id()] > + (me_in_kgdb + 10)) { + break; + } + + rdtsc(dum, time); + } + if (i < num_online_cpus()) { + printk + ("kgdb : time out, proceeding without sync\n"); +#if 0 + printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", + waiting_cpus[0].task != 0, + waiting_cpus[1].task != 0); + printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", + cpu_logged_in[0], cpu_logged_in[1]); + printk + ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", + in_kgdb_here_log[0] != 0, + in_kgdb_here_log[1] != 0); +#endif + entry_state = NO_SYNC; + } else { +#if 0 + int ent = + in_kgdb_entry_log[smp_processor_id()] - + me_in_kgdb; + printk("kgdb : sync after %d entries\n", ent); +#endif + } + } else { + if (remote_debug) { + printk + ("kgdb : %d cpus, but watchdog not active\n" + "proceeding without locking down other cpus\n", + num_online_cpus()); + entry_state = NO_NMI; + } + } + } +#endif + + if (remote_debug) { + printk("handle_exception(exceptionVector=%d, " + "signo=%d, err_code=%d, linux_regs=%p)\n", + exceptionVector, signo, err_code, linux_regs); + printk(" address: %lx\n", address); + + if (debug_regs) { + print_regs(®s); + show_trace(current, (unsigned long *)®s); + } + } + + /* Disable hardware debugging while we are in kgdb */ + /* Get the debug register status register */ +/* *INDENT-OFF* */ + __asm__("movl %0,%%db7" + : /* no output */ + :"r"(0)); + + asm volatile ("movl %%db6, %0\n" + :"=r" (hw_breakpoint_status) + :); + +/* *INDENT-ON* */ + switch (exceptionVector) { + case 0: /* divide error */ + case 1: /* debug exception */ + case 2: /* NMI */ + case 3: /* breakpoint */ + case 4: /* overflow */ + case 5: /* bounds check */ + case 6: /* invalid opcode */ + case 7: /* device not available */ + case 8: /* double fault (errcode) */ + case 10: /* invalid TSS (errcode) */ + case 12: /* stack fault (errcode) */ + case 16: /* floating point error */ + case 17: /* alignment check (errcode) */ + default: /* any undocumented */ + break; + case 11: /* segment not present (errcode) */ + case 13: /* general protection (errcode) */ + case 14: /* page fault (special errcode) */ + case 19: /* cache flush denied */ + if (mem_err_expected) { + /* + * This fault occured because of the + * get_char or set_char routines. These + * two routines use either eax of edx to + * indirectly reference the location in + * memory that they are working with. + * For a page fault, when we return the + * instruction will be retried, so we + * have to make sure that these + * registers point to valid memory. + */ + mem_err = 1; /* set mem error flag */ + mem_err_expected = 0; + mem_err_cnt++; /* helps in debugging */ + /* make valid address */ + regs.eax = (long) &garbage_loc; + /* make valid address */ + regs.edx = (long) &garbage_loc; + if (remote_debug) + printk("Return after memory error: " + "mem_err_cnt=%d\n", mem_err_cnt); + if (debug_regs) + print_regs(®s); + goto exit_kgdb; + } + break; + } + if (remote_debug) + printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); + + gdb_i386vector = exceptionVector; + gdb_i386errcode = err_code; + kgdb_info.called_from = __builtin_return_address(0); +#ifdef CONFIG_SMP + /* + * OK, we can now communicate, lets tell gdb about the sync. + * but only if we had a problem. + */ + switch (entry_state) { + case NO_NMI: + to_gdb("NMI not active, other cpus not stopped\n"); + break; + case NO_SYNC: + to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); + default:; + } + +#endif +/* + * Set up the gdb function call area. + */ + trap_cpu = smp_processor_id(); + OLD_esp = NEW_esp = (int) (&linux_regs->esp); + + IF_SMP(once_again:) + /* reply to host that an exception has occurred */ + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + + if (kgdb_eth_is_initializing) { + kgdb_eth_is_initializing = 0; + } else { + putpacket(remcomOutBuffer); + } + + kgdb_eth_reply_arp(); + while (1 == 1) { + error = 0; + remcomOutBuffer[0] = 0; + getpacket(remcomInBuffer); + switch (remcomInBuffer[0]) { + case '?': + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + break; + case 'd': + remote_debug = !(remote_debug); /* toggle debug flag */ + printk("Remote debug %s\n", + remote_debug ? "on" : "off"); + break; + case 'g': /* return the value of the CPU registers */ + get_gdb_regs(usethread, ®s, gdb_regs); + mem2hex((char *) gdb_regs, + remcomOutBuffer, NUMREGBYTES, 0); + break; + case 'G': /* set the value of the CPU registers - return OK */ + hex2mem(&remcomInBuffer[1], + (char *) gdb_regs, NUMREGBYTES, 0); + if (!usethread || usethread == current) { + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "E00"); + } + break; + + case 'P':{ /* set the value of a single CPU register - + return OK */ + /* + * For some reason, gdb wants to talk about psudo + * registers (greater than 15). These may have + * meaning for ptrace, but for us it is safe to + * ignor them. We do this by dumping them into + * _GS which we also ignor, but do have memory for. + */ + int regno; + + ptr = &remcomInBuffer[1]; + regs_to_gdb_regs(gdb_regs, ®s); + if ((!usethread || usethread == current) && + hexToInt(&ptr, ®no) && + *ptr++ == '=' && (regno >= 0)) { + regno = + (regno >= NUMREGS ? _GS : regno); + hex2mem(ptr, (char *) &gdb_regs[regno], + 4, 0); + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + break; + } + strcpy(remcomOutBuffer, "E01"); + break; + } + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr) && + (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { + ptr = 0; + /* + * hex doubles the byte count + */ + if (length > (BUFMAX / 2)) + length = BUFMAX / 2; + mem2hex((char *) addr, + remcomOutBuffer, length, 1); + if (mem_err) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } + } + + if (ptr) { + strcpy(remcomOutBuffer, "E01"); + debug_error + ("malformed read memory command: %s\n", + remcomInBuffer); + } + break; + + /* MAA..AA,LLLL: + Write LLLL bytes at address AA.AA return OK */ + case 'M': + /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr) && + (*(ptr++) == ',') && + (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { + hex2mem(ptr, (char *) addr, length, 1); + + if (mem_err) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } else { + strcpy(remcomOutBuffer, "OK"); + } + + ptr = 0; + } + if (ptr) { + strcpy(remcomOutBuffer, "E02"); + debug_error + ("malformed write memory command: %s\n", + remcomInBuffer); + } + break; + case 'S': + remcomInBuffer[0] = 's'; + case 'C': + /* Csig;AA..AA where ;AA..AA is optional + * continue with signal + * Since signals are meaning less to us, delete that + * part and then fall into the 'c' code. + */ + ptr = &remcomInBuffer[1]; + length = 2; + while (*ptr && *ptr != ';') { + length++; + ptr++; + } + if (*ptr) { + do { + ptr++; + *(ptr - length++) = *ptr; + } while (*ptr); + } else { + remcomInBuffer[1] = 0; + } + + /* cAA..AA Continue at address AA..AA(optional) */ + /* sAA..AA Step one instruction from AA..AA(optional) */ + /* D detach, reply OK and then continue */ + case 'c': + case 's': + case 'D': + + /* try to read optional parameter, + pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr)) { + if (remote_debug) + printk("Changing EIP to 0x%x\n", addr); + + regs.eip = addr; + } + + newPC = regs.eip; + + if (kgdb_eth != -1) { + kgdb_eth_set_trapmode(0); + } + + /* clear the trace bit */ + regs.eflags &= 0xfffffeff; + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') + regs.eflags |= 0x100; + + /* detach is a friendly version of continue. Note that + debugging is still enabled (e.g hit control C) + */ + if (remcomInBuffer[0] == 'D') { + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + } + + if (remote_debug) { + printk("Resuming execution\n"); + print_regs(®s); + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (!(dr6 & 0x4000)) { + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno) && + (breakinfo[breakno].type == 0)) { + /* Set restore flag */ + regs.eflags |= 0x10000; + break; + } + } + } + correct_hw_break(); + asm volatile ("movl %0, %%db6\n"::"r" (0)); + goto exit_kgdb; + + /* kill the program */ + case 'k': /* do nothing */ + break; + + /* query */ + case 'q': + nothreads = 0; + switch (remcomInBuffer[1]) { + case 'f': + threadid = 1; + thread_list = 2; + thread_list_start = (usethread ? : current); + case 's': + if (!cmp_str(&remcomInBuffer[2], + "ThreadInfo", 10)) + break; + + remcomOutBuffer[nothreads++] = 'm'; + for (; threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + nothreads += int_to_hex_v( + &remcomOutBuffer[ + nothreads], + threadid); + if (thread_min > threadid) + thread_min = threadid; + remcomOutBuffer[ + nothreads] = ','; + nothreads++; + if (nothreads > BUFMAX - 10) + break; + } + } + if (remcomOutBuffer[nothreads - 1] == 'm') { + remcomOutBuffer[nothreads - 1] = 'l'; + } else { + nothreads--; + } + remcomOutBuffer[nothreads] = 0; + break; + +#ifdef old_thread_list /* Old thread info request */ + case 'L': + /* List threads */ + thread_list = 2; + thread_list_start = (usethread ? : current); + unpack_byte(remcomInBuffer + 3, &maxthreads); + unpack_threadid(remcomInBuffer + 5, &thref); + do { + int buf_thread_limit = + (BUFMAX - 22) / BUF_THREAD_ID_SIZE; + if (maxthreads > buf_thread_limit) { + maxthreads = buf_thread_limit; + } + } while (0); + remcomOutBuffer[0] = 'q'; + remcomOutBuffer[1] = 'M'; + remcomOutBuffer[4] = '0'; + pack_threadid(remcomOutBuffer + 5, &thref); + + threadid = threadref_to_int(&thref); + for (nothreads = 0; + nothreads < maxthreads && + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + int_to_threadref(&thref, + threadid); + pack_threadid(remcomOutBuffer + + 21 + + nothreads * 16, + &thref); + nothreads++; + if (thread_min > threadid) + thread_min = threadid; + } + } + + if (threadid == PID_MAX + MAX_NO_CPUS) { + remcomOutBuffer[4] = '1'; + } + pack_hex_byte(remcomOutBuffer + 2, nothreads); + remcomOutBuffer[21 + nothreads * 16] = '\0'; + break; +#endif + case 'C': + /* Current thread id */ + remcomOutBuffer[0] = 'Q'; + remcomOutBuffer[1] = 'C'; + threadid = current->pid; + if (!threadid) { + /* + * idle thread + */ + for (threadid = PID_MAX; + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + if (current == + idle_task(threadid - + PID_MAX)) + break; + } + } + int_to_threadref(&thref, threadid); + pack_threadid(remcomOutBuffer + 2, &thref); + remcomOutBuffer[18] = '\0'; + break; + + case 'E': + /* Print exception info */ + printexceptioninfo(exceptionVector, + err_code, remcomOutBuffer); + break; + case 'T':{ + char * nptr; + /* Thread extra info */ + if (!cmp_str(&remcomInBuffer[2], + "hreadExtraInfo,", 15)) { + break; + } + ptr = &remcomInBuffer[17]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + nptr = &thread->comm[0]; + length = 0; + ptr = &remcomOutBuffer[0]; + do { + length++; + ptr = pack_hex_byte(ptr, *nptr++); + } while (*nptr && length < 16); + /* + * would like that 16 to be the size of + * task_struct.comm but don't know the + * syntax.. + */ + *ptr = 0; + } + } + break; + + /* task related */ + case 'H': + switch (remcomInBuffer[1]) { + case 'g': + ptr = &remcomInBuffer[2]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (!thread) { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + break; + } + /* + * Just in case I forget what this is all about, + * the "thread info" command to gdb causes it + * to ask for a thread list. It then switches + * to each thread and asks for the registers. + * For this (and only this) usage, we want to + * fudge the registers of tasks not on the run + * list (i.e. waiting) to show the routine that + * called schedule. Also, gdb, is a minimalist + * in that if the current thread is the last + * it will not re-read the info when done. + * This means that in this case we must show + * the real registers. So here is how we do it: + * Each entry we keep track of the min + * thread in the list (the last that gdb will) + * get info for. We also keep track of the + * starting thread. + * "thread_list" is cleared when switching back + * to the min thread if it is was current, or + * if it was not current, thread_list is set + * to 1. When the switch to current comes, + * if thread_list is 1, clear it, else do + * nothing. + */ + usethread = thread; + if ((thread_list == 1) && + (thread == thread_list_start)) { + thread_list = 0; + } + if (thread_list && (threadid == thread_min)) { + if (thread == thread_list_start) { + thread_list = 0; + } else { + thread_list = 1; + } + } + /* follow through */ + case 'c': + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + break; + } + break; + + /* Query thread status */ + case 'T': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (thread) { + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + if (thread_min > threadid) + thread_min = threadid; + } else { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + } + break; + + case 'Y': /* set up a hardware breakpoint */ + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + ptr++; + hexToInt(&ptr, &breaktype); + ptr++; + hexToInt(&ptr, &length); + ptr++; + hexToInt(&ptr, &addr); + if (set_hw_break(breakno & 0x3, + breaktype & 0x3, + length & 0x3, addr) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + /* Remove hardware breakpoint */ + case 'y': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + if (remove_hw_break(breakno & 0x3) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + case 'r': /* reboot */ + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + /*to_gdb("Rebooting\n"); */ + /* triplefault no return from here */ + { + static long no_idt[2]; + __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); + BREAKPOINT; + } + + } /* switch */ + + /* reply to the request */ + putpacket(remcomOutBuffer); + } /* while(1==1) */ + /* + * reached by goto only. + */ + exit_kgdb: + /* + * Here is where we set up to trap a gdb function call. NEW_esp + * will be changed if we are trying to do this. We handle both + * adding and subtracting, thus allowing gdb to put grung on + * the stack which it removes later. + */ + if (NEW_esp != OLD_esp) { + int *ptr = END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) + ptr -= (OLD_esp - NEW_esp) / sizeof (int); + *--ptr = linux_regs->eflags; + *--ptr = linux_regs->xcs; + *--ptr = linux_regs->eip; + *--ptr = linux_regs->ecx; + *--ptr = linux_regs->ebx; + *--ptr = linux_regs->eax; + linux_regs->ecx = NEW_esp - (sizeof (int) * 6); + linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) { + linux_regs->eip = (unsigned int) fn_call_stub; + } else { + linux_regs->eip = (unsigned int) fn_rtn_stub; + linux_regs->eax = NEW_esp; + } + linux_regs->eflags &= ~(IF_BIT | TF_BIT); + } +#ifdef CONFIG_SMP + /* + * Release gdb wait locks + * Sanity check time. Must have at least one cpu to run. Also single + * step must not be done if the current cpu is on hold. + */ + if (spinlock_count == 1) { + int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; + int cpu_avail = 0; + int i; + + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!cpu_online(i)) + break; + if (!hold_cpu(i)) { + cpu_avail = 1; + } + } + /* + * Early in the bring up there will be NO cpus on line... + */ + if (!cpu_avail && !cpus_empty(cpu_online_map)) { + to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); + goto once_again; + } + if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { + to_gdb + ("Current cpu must be unblocked to single step\n"); + goto once_again; + } + if (!(ss_hold)) { + int i; + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!hold_cpu(i)) { + spin_unlock(&waitlocks[i]); + } + } + } else { + spin_unlock(&waitlocks[smp_processor_id()]); + } + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + /* + * If this cpu is on hold, this is where we + * do it. Note, the NMI will pull us out of here, + * but will return as the above lock is not held. + * We will stay here till another cpu releases the lock for us. + */ + spin_unlock_wait(waitlocks + smp_processor_id()); + kgdb_local_irq_restore(flags); + return (0); + } +#if 0 +exit_just_unlock: +#endif +#endif + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + kgdb_local_irq_restore(flags); + return (0); +} + +/* this function is used to set up exception handlers for tracing and + * breakpoints. + * This function is not needed as the above line does all that is needed. + * We leave it for backward compatitability... + */ +void +set_debug_traps(void) +{ + /* + * linux_debug_hook is defined in traps.c. We store a pointer + * to our own exception handler into it. + + * But really folks, every hear of labeled common, an old Fortran + * concept. Lots of folks can reference it and it is define if + * anyone does. Only one can initialize it at link time. We do + * this with the hook. See the statement above. No need for any + * executable code and it is ready as soon as the kernel is + * loaded. Very desirable in kernel debugging. + + linux_debug_hook = handle_exception ; + */ + + /* In case GDB is started before us, ack any packets (presumably + "$?#xx") sitting there. + putDebugChar ('+'); + + initialized = 1; + */ +} + +/* This function will generate a breakpoint exception. It is used at the + beginning of a program to sync up with a debugger and can be used + otherwise as a quick means to stop program execution and "break" into + the debugger. */ +/* But really, just use the BREAKPOINT macro. We will handle the int stuff + */ + +#ifdef later +/* + * possibly we should not go thru the traps.c code at all? Someday. + */ +void +do_kgdb_int3(struct pt_regs *regs, long error_code) +{ + kgdb_handle_exception(3, 5, error_code, regs); + return; +} +#endif +#undef regs +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS +asmlinkage void +bad_sys_call_exit(int stuff) +{ + struct pt_regs *regs = (struct pt_regs *) &stuff; + printk("Sys call %d return with %x preempt_count\n", + (int) regs->orig_eax, preempt_count()); +} +#endif +#ifdef CONFIG_STACK_OVERFLOW_TEST +#include +asmlinkage void +stack_overflow(void) +{ +#ifdef BREAKPOINT + BREAKPOINT; +#else + printk("Kernel stack overflow, looping forever\n"); +#endif + while (1) { + } +} +#endif + +#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) +char gdbconbuf[BUFMAX]; + +static void +kgdb_gdb_message(const char *s, unsigned count) +{ + int i; + int wcount; + char *bufptr; + /* + * This takes care of NMI while spining out chars to gdb + */ + IF_SMP(in_kgdb_console = 1); + gdbconbuf[0] = 'O'; + bufptr = gdbconbuf + 1; + while (count > 0) { + if ((count << 1) > (BUFMAX - 2)) { + wcount = (BUFMAX - 2) >> 1; + } else { + wcount = count; + } + count -= wcount; + for (i = 0; i < wcount; i++) { + bufptr = pack_hex_byte(bufptr, s[i]); + } + *bufptr = '\0'; + s += wcount; + + putpacket(gdbconbuf); + + } + IF_SMP(in_kgdb_console = 0); +} +#endif +#ifdef CONFIG_SMP +static void +to_gdb(const char *s) +{ + int count = 0; + while (s[count] && (count++ < BUFMAX)) ; + kgdb_gdb_message(s, count); +} +#endif +#ifdef CONFIG_KGDB_CONSOLE +#include +#include +#include +#include +#include + +void +kgdb_console_write(struct console *co, const char *s, unsigned count) +{ + + if (gdb_i386vector == -1) { + /* + * We have not yet talked to gdb. What to do... + * lets break, on continue we can do the write. + * But first tell him whats up. Uh, well no can do, + * as this IS the console. Oh well... + * We do need to wait or the messages will be lost. + * Other option would be to tell the above code to + * ignore this breakpoint and do an auto return, + * but that might confuse gdb. Also this happens + * early enough in boot up that we don't have the traps + * set up yet, so... + */ + breakpoint(); + } + kgdb_gdb_message(s, count); +} + +/* + * ------------------------------------------------------------ + * Serial KGDB driver + * ------------------------------------------------------------ + */ + +static struct console kgdbcons = { + name:"kgdb", + write:kgdb_console_write, +#ifdef CONFIG_KGDB_USER_CONSOLE + device:kgdb_console_device, +#endif + flags:CON_PRINTBUFFER | CON_ENABLED, + index:-1, +}; + +/* + * The trick here is that this file gets linked before printk.o + * That means we get to peer at the console info in the command + * line before it does. If we are up, we register, otherwise, + * do nothing. By returning 0, we allow printk to look also. + */ +static int kgdb_console_enabled; + +int __init +kgdb_console_init(char *str) +{ + if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { + register_console(&kgdbcons); + kgdb_console_enabled = 1; + } + return 0; /* let others look at the string */ +} + +__setup("console=", kgdb_console_init); + +#ifdef CONFIG_KGDB_USER_CONSOLE +static kdev_t kgdb_console_device(struct console *c); +/* This stuff sort of works, but it knocks out telnet devices + * we are leaving it here in case we (or you) find time to figure it out + * better.. + */ + +/* + * We need a real char device as well for when the console is opened for user + * space activities. + */ + +static int +kgdb_consdev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t +kgdb_consdev_write(struct file *file, const char *buf, + size_t count, loff_t * ppos) +{ + int size, ret = 0; + static char kbuf[128]; + static DECLARE_MUTEX(sem); + + /* We are not reentrant... */ + if (down_interruptible(&sem)) + return -ERESTARTSYS; + + while (count > 0) { + /* need to copy the data from user space */ + size = count; + if (size > sizeof (kbuf)) + size = sizeof (kbuf); + if (copy_from_user(kbuf, buf, size)) { + ret = -EFAULT; + break;; + } + kgdb_console_write(&kgdbcons, kbuf, size); + count -= size; + ret += size; + buf += size; + } + + up(&sem); + + return ret; +} + +struct file_operations kgdb_consdev_fops = { + open:kgdb_consdev_open, + write:kgdb_consdev_write +}; +static kdev_t +kgdb_console_device(struct console *c) +{ + return MKDEV(TTYAUX_MAJOR, 1); +} + +/* + * This routine gets called from the serial stub in the i386/lib + * This is so it is done late in bring up (just before the console open). + */ +void +kgdb_console_finit(void) +{ + if (kgdb_console_enabled) { + char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); + char *cp = cptr; + while (*cptr && *cptr != '(') + cptr++; + *cptr = 0; + unregister_chrdev(TTYAUX_MAJOR, cp); + register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); + } +} +#endif +#endif +#ifdef CONFIG_KGDB_TS +#include /* time stamp code */ +#include /* in_interrupt */ +#ifdef CONFIG_KGDB_TS_64 +#define DATA_POINTS 64 +#endif +#ifdef CONFIG_KGDB_TS_128 +#define DATA_POINTS 128 +#endif +#ifdef CONFIG_KGDB_TS_256 +#define DATA_POINTS 256 +#endif +#ifdef CONFIG_KGDB_TS_512 +#define DATA_POINTS 512 +#endif +#ifdef CONFIG_KGDB_TS_1024 +#define DATA_POINTS 1024 +#endif +#ifndef DATA_POINTS +#define DATA_POINTS 128 /* must be a power of two */ +#endif +#define INDEX_MASK (DATA_POINTS - 1) +#if (INDEX_MASK & DATA_POINTS) +#error "CONFIG_KGDB_TS_COUNT must be a power of 2" +#endif +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + int data0; + int data1; +}; +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + struct task_struct *t1; + struct task_struct *t2; +}; +struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; + +struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; +int kgdb_and_then_count; + +void +kgdb_tstamp(int line, char *source, int data0, int data1) +{ + static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; + int flags; + kgdb_local_irq_save(flags); + spin_lock(&ts_spin); + rdtscll(kgdb_and_then->at_time); +#ifdef CONFIG_SMP + kgdb_and_then->on_cpu = smp_processor_id(); +#endif + kgdb_and_then->task = current; + kgdb_and_then->from_ln = line; + kgdb_and_then->in_src = source; + kgdb_and_then->from = __builtin_return_address(0); + kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | + (preempt_count() << 8)); + kgdb_and_then->data0 = data0; + kgdb_and_then->data1 = data1; + kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; + spin_unlock(&ts_spin); + kgdb_local_irq_restore(flags); +#ifdef CONFIG_PREEMPT + +#endif + return; +} +#endif +typedef int gdb_debug_hook(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs); +gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ + +static int __init kgdb_opt_kgdbeth(char *str) +{ + kgdb_eth = simple_strtoul(str, NULL, 10); + return 1; +} + +static int __init kgdb_opt_kgdbeth_remoteip(char *str) +{ + kgdb_remoteip = in_aton(str); + return 1; +} + +static int __init kgdb_opt_kgdbeth_listenport(char *str) +{ + kgdb_listenport = simple_strtoul(str, NULL, 10); + kgdb_sendport = kgdb_listenport - 1; + return 1; +} + +static int __init parse_hw_addr(char *str, unsigned char *addr) +{ + int i; + char *p; + + p = str; + i = 0; + while(1) + { + unsigned int c; + + sscanf(p, "%x:", &c); + addr[i++] = c; + while((*p != 0) && (*p != ':')) { + p++; + } + if (*p == 0) { + break; + } + p++; + } + + return 1; +} + +static int __init kgdb_opt_kgdbeth_remotemac(char *str) +{ + return parse_hw_addr(str, kgdb_remotemac); +} +static int __init kgdb_opt_kgdbeth_localmac(char *str) +{ + return parse_hw_addr(str, kgdb_localmac); +} + + +__setup("gdbeth=", kgdb_opt_kgdbeth); +__setup("gdbeth_remoteip=", kgdb_opt_kgdbeth_remoteip); +__setup("gdbeth_listenport=", kgdb_opt_kgdbeth_listenport); +__setup("gdbeth_remotemac=", kgdb_opt_kgdbeth_remotemac); +__setup("gdbeth_localmac=", kgdb_opt_kgdbeth_localmac); + --- linux-2.6.0-test6/arch/i386/kernel/ldt.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/kernel/ldt.c 2003-10-05 00:36:48.000000000 -0700 @@ -2,7 +2,7 @@ * linux/kernel/ldt.c * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar + * Copyright (C) 1999, 2003 Ingo Molnar */ #include @@ -18,6 +18,8 @@ #include #include #include +#include +#include #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ static void flush_ldt(void *null) @@ -29,34 +31,31 @@ static void flush_ldt(void *null) static int alloc_ldt(mm_context_t *pc, int mincount, int reload) { - void *oldldt; - void *newldt; - int oldsize; + int oldsize, newsize, i; if (mincount <= pc->size) return 0; + /* + * LDT got larger - reallocate if necessary. + */ oldsize = pc->size; mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - pc->ldt = newldt; - wmb(); + newsize = mincount*LDT_ENTRY_SIZE; + for (i = 0; i < newsize; i += PAGE_SIZE) { + int nr = i/PAGE_SIZE; + BUG_ON(i >= 64*1024); + if (!pc->ldt_pages[nr]) { + pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER); + if (!pc->ldt_pages[nr]) + return -ENOMEM; + clear_highpage(pc->ldt_pages[nr]); + } + } pc->size = mincount; - wmb(); - if (reload) { #ifdef CONFIG_SMP cpumask_t mask; + preempt_disable(); load_LDT(pc); mask = cpumask_of_cpu(smp_processor_id()); @@ -67,21 +66,20 @@ static int alloc_ldt(mm_context_t *pc, i load_LDT(pc); #endif } - if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } return 0; } static inline int copy_ldt(mm_context_t *new, mm_context_t *old) { - int err = alloc_ldt(new, old->size, 0); - if (err < 0) + int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE; + + err = alloc_ldt(new, size, 0); + if (err < 0) { + new->size = 0; return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); + } + for (i = 0; i < nr_pages; i++) + copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0); return 0; } @@ -96,6 +94,7 @@ int init_new_context(struct task_struct init_MUTEX(&mm->context.sem); mm->context.size = 0; + memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES); old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { down(&old_mm->context.sem); @@ -107,23 +106,21 @@ int init_new_context(struct task_struct /* * No need to lock the MM as we are the last user + * Do not touch the ldt register, we are already + * in the next thread. */ void destroy_context(struct mm_struct *mm) { - if (mm->context.size) { - if (mm == current->active_mm) - clear_LDT(); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } + int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; + + for (i = 0; i < nr_pages; i++) + __free_page(mm->context.ldt_pages[i]); + mm->context.size = 0; } static int read_ldt(void __user * ptr, unsigned long bytecount) { - int err; + int err, i; unsigned long size; struct mm_struct * mm = current->mm; @@ -138,8 +135,25 @@ static int read_ldt(void __user * ptr, u size = bytecount; err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; + /* + * This is necessary just in case we got here straight from a + * context-switch where the ptes were set but no tlb flush + * was done yet. We rather avoid doing a TLB flush in the + * context-switch path and do it here instead. + */ + __flush_tlb_global(); + + for (i = 0; i < size; i += PAGE_SIZE) { + int nr = i / PAGE_SIZE, bytes; + char *kaddr = kmap(mm->context.ldt_pages[nr]); + + bytes = size - i; + if (bytes > PAGE_SIZE) + bytes = PAGE_SIZE; + if (copy_to_user(ptr + i, kaddr, size - i)) + err = -EFAULT; + kunmap(mm->context.ldt_pages[nr]); + } up(&mm->context.sem); if (err < 0) return err; @@ -158,7 +172,7 @@ static int read_default_ldt(void __user err = 0; address = &default_ldt[0]; - size = 5*sizeof(struct desc_struct); + size = 5*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -200,7 +214,15 @@ static int write_ldt(void __user * ptr, goto out_unlock; } - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); + /* + * No rescheduling allowed from this point to the install. + * + * We do a TLB flush for the same reason as in the read_ldt() path. + */ + preempt_disable(); + __flush_tlb_global(); + lp = (__u32 *) ((ldt_info.entry_number << 3) + + (char *) __kmap_atomic_vaddr(KM_LDT_PAGE0)); /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { @@ -221,6 +243,7 @@ install: *lp = entry_1; *(lp+1) = entry_2; error = 0; + preempt_enable(); out_unlock: up(&mm->context.sem); @@ -248,3 +271,26 @@ asmlinkage int sys_modify_ldt(int func, } return ret; } + +/* + * load one particular LDT into the current CPU + */ +void load_LDT_nolock(mm_context_t *pc, int cpu) +{ + struct page **pages = pc->ldt_pages; + int count = pc->size; + int nr_pages, i; + + if (likely(!count)) { + pages = &default_ldt_page; + count = 5; + } + nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; + + for (i = 0; i < nr_pages; i++) { + __kunmap_atomic_type(KM_LDT_PAGE0 - i); + __kmap_atomic(pages[i], KM_LDT_PAGE0 - i); + } + set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count); + load_LDT_desc(); +} --- linux-2.6.0-test6/arch/i386/kernel/Makefile 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/Makefile 2003-10-05 00:36:48.000000000 -0700 @@ -7,13 +7,14 @@ extra-y := head.o init_task.o vmlinux.ld obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o + doublefault.o efi.o efi_stub.o entry_trampoline.o obj-y += cpu/ obj-y += timers/ obj-$(CONFIG_ACPI_BOOT) += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o +obj-$(CONFIG_KGDB) += kgdb_stub.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o --- linux-2.6.0-test6/arch/i386/kernel/mca.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/kernel/mca.c 2003-10-05 00:36:10.000000000 -0700 @@ -132,7 +132,9 @@ struct resource mca_standard_resources[] #define MCA_STANDARD_RESOURCES (sizeof(mca_standard_resources)/sizeof(struct resource)) /** - * mca_read_pos - read the POS registers into a memory buffer + * mca_read_and_store_pos - read the POS registers into a memory buffer + * @pos: a char pointer to 8 bytes, contains the POS register value on + * successful return * * Returns 1 if a card actually exists (i.e. the pos isn't * all 0xff) or 0 otherwise --- linux-2.6.0-test6/arch/i386/kernel/mpparse.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/mpparse.c 2003-10-05 00:36:48.000000000 -0700 @@ -169,7 +169,7 @@ void __init MP_processor_info (struct mp if (num_processors >= NR_CPUS) { printk(KERN_WARNING "NR_CPUS limit of %i reached. Cannot " - "boot CPU(apicid 0x%d).\n", NR_CPUS, m->mpc_apicid); + "boot CPU(apicid 0x%x).\n", NR_CPUS, m->mpc_apicid); return; } num_processors++; @@ -616,6 +616,31 @@ static inline void __init construct_defa } } +#ifdef CONFIG_X86_IO_APIC +/* irq_vector must be have an entry for all RTEs of all I/O APICs. */ +void __init alloc_irq_vector_array(void) +{ + int total = 0; + int idx; + union IO_APIC_reg_01 reg_01; + + /* The I/O APIC fixmaps aren't inited yet, so use the first one. */ + for (idx = 0; idx < nr_ioapics; idx++) { + set_fixmap_nocache(FIX_IO_APIC_BASE_0, mp_ioapics[idx].mpc_apicaddr); + reg_01.raw = io_apic_read(0, 1); + total += reg_01.bits.entries + 1; + } + + /* Always alloc at least NR_IRQS vectors. */ + nr_irqs = max(total, NR_IRQS); + irq_vector = (u8 *) alloc_bootmem(nr_irqs); + memset(irq_vector, 0, nr_irqs); + irq_vector[0] = FIRST_DEVICE_VECTOR; +} +#else +void __init alloc_irq_vector_array(void) { } +#endif /* CONFIG_X86_IO_APIC */ + static struct intel_mp_floating *mpf_found; /* @@ -633,6 +658,7 @@ void __init get_smp_config (void) */ if (acpi_lapic && acpi_ioapic) { printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); + alloc_irq_vector_array(); return; } else if (acpi_lapic) @@ -661,10 +687,11 @@ void __init get_smp_config (void) * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc((void *)mpf->mpf_physptr)) { + if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) { smp_found_config = 0; printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); + alloc_irq_vector_array(); return; } /* @@ -688,6 +715,7 @@ void __init get_smp_config (void) } else BUG(); + alloc_irq_vector_array(); printk(KERN_INFO "Processors: %d\n", num_processors); /* * Only use the first configuration found. @@ -830,7 +858,7 @@ void __init mp_register_lapic ( MP_processor_info(&processor); } -#ifdef CONFIG_X86_IO_APIC +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) #define MP_ISA_BUS 0 #define MP_MAX_IOAPIC_PIN 127 @@ -1019,10 +1047,6 @@ void __init mp_config_acpi_legacy_irqs ( } } -#ifdef CONFIG_ACPI - -/* Ensure the ACPI SCI interrupt level is active low, edge-triggered */ - extern FADT_DESCRIPTOR acpi_fadt; void __init mp_config_ioapic_for_sci(int irq) @@ -1031,6 +1055,7 @@ void __init mp_config_ioapic_for_sci(int int ioapic_pin; struct acpi_table_madt *madt; struct acpi_table_int_src_ovr *entry = NULL; + acpi_interrupt_flags flags; void *madt_end; acpi_status status; @@ -1049,32 +1074,37 @@ void __init mp_config_ioapic_for_sci(int while ((void *) entry < madt_end) { if (entry->header.type == ACPI_MADT_INT_SRC_OVR && - acpi_fadt.sci_int == entry->bus_irq) { - /* - * See the note at the end of ACPI 2.0b section - * 5.2.10.8 for what this is about. - */ - if (entry->bus_irq != entry->global_irq) { - acpi_fadt.sci_int = entry->global_irq; - irq = entry->global_irq; - break; - } - else - return; - } - + acpi_fadt.sci_int == entry->bus_irq) + goto found; + entry = (struct acpi_table_int_src_ovr *) ((unsigned long) entry + entry->header.length); } } + /* + * Although the ACPI spec says that the SCI should be level/low + * don't reprogram it unless there is an explicit MADT OVR entry + * instructing us to do so -- otherwise we break Tyan boards which + * have the SCI wired edge/high but no MADT OVR. + */ + return; + +found: + /* + * See the note at the end of ACPI 2.0b section + * 5.2.10.8 for what this is about. + */ + flags = entry->flags; + acpi_fadt.sci_int = entry->global_irq; + irq = entry->global_irq; ioapic = mp_find_ioapic(irq); ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start; - io_apic_set_pci_routing(ioapic, ioapic_pin, irq, 1, 1); // Active low, level triggered + io_apic_set_pci_routing(ioapic, ioapic_pin, irq, + (flags.trigger >> 1) , (flags.polarity >> 1)); } -#endif /* CONFIG_ACPI */ #ifdef CONFIG_ACPI_PCI @@ -1110,8 +1140,10 @@ void __init mp_parse_prt (void) } /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_fadt.sci_int == irq) + if (acpi_fadt.sci_int == irq) { + entry->irq = irq; /*we still need to set entry's irq*/ continue; + } ioapic = mp_find_ioapic(irq); if (ioapic < 0) @@ -1136,15 +1168,19 @@ void __init mp_parse_prt (void) if ((1<irq = irq; + if (use_pci_vector() && !platform_legacy_irq(irq)) + irq = IO_APIC_VECTOR(irq); + entry->irq = irq; continue; } mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<irq = irq; - + if (!io_apic_set_pci_routing(ioapic, ioapic_pin, irq, edge_level, active_high_low)) { + if (use_pci_vector() && !platform_legacy_irq(irq)) + irq = IO_APIC_VECTOR(irq); + entry->irq = irq; + } printk(KERN_DEBUG "%02x:%02x:%02x[%c] -> %d-%d -> IRQ %d\n", entry->id.segment, entry->id.bus, entry->id.device, ('A' + entry->pin), @@ -1154,5 +1190,5 @@ void __init mp_parse_prt (void) } #endif /*CONFIG_ACPI_PCI*/ -#endif /* CONFIG_X86_IO_APIC */ +#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/ #endif /*CONFIG_ACPI_BOOT*/ --- linux-2.6.0-test6/arch/i386/kernel/nmi.c 2003-08-08 22:55:10.000000000 -0700 +++ 25/arch/i386/kernel/nmi.c 2003-10-05 00:33:38.000000000 -0700 @@ -31,7 +31,16 @@ #include #include +#ifdef CONFIG_KGDB +#include +#ifdef CONFIG_SMP +unsigned int nmi_watchdog = NMI_IO_APIC; +#else +unsigned int nmi_watchdog = NMI_LOCAL_APIC; +#endif +#else unsigned int nmi_watchdog = NMI_NONE; +#endif static unsigned int nmi_hz = HZ; unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ extern void show_registers(struct pt_regs *regs); @@ -408,6 +417,9 @@ void touch_nmi_watchdog (void) for (i = 0; i < NR_CPUS; i++) alert_counter[i] = 0; } +#ifdef CONFIG_KGDB +int tune_watchdog = 5*HZ; +#endif void nmi_watchdog_tick (struct pt_regs * regs) { @@ -421,12 +433,24 @@ void nmi_watchdog_tick (struct pt_regs * sum = irq_stat[cpu].apic_timer_irqs; +#ifdef CONFIG_KGDB + if (! in_kgdb(regs) && last_irq_sums[cpu] == sum ) { + +#else if (last_irq_sums[cpu] == sum) { +#endif /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; +#ifdef CONFIG_KGDB + if (alert_counter[cpu] == tune_watchdog) { + kgdb_handle_exception(2, SIGPWR, 0, regs); + last_irq_sums[cpu] = sum; + alert_counter[cpu] = 0; + } +#endif if (alert_counter[cpu] == 5*nmi_hz) { spin_lock(&nmi_print_lock); /* --- linux-2.6.0-test6/arch/i386/kernel/process.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/process.c 2003-10-05 00:36:48.000000000 -0700 @@ -47,6 +47,7 @@ #include #include #include +#include #ifdef CONFIG_MATH_EMULATION #include #endif @@ -298,6 +299,9 @@ void flush_thread(void) struct task_struct *tsk = current; memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); +#ifdef CONFIG_X86_HIGH_ENTRY + clear_thread_flag(TIF_DB7); +#endif memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -311,9 +315,8 @@ void release_thread(struct task_struct * if (dead_task->mm) { // temporary debugging check if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", + printk("WARNING: dead process %8s still has LDT? <%d>\n", dead_task->comm, - dead_task->mm->context.ldt, dead_task->mm->context.size); BUG(); } @@ -348,7 +351,17 @@ int copy_thread(int nr, unsigned long cl p->thread.esp = (unsigned long) childregs; p->thread.esp0 = (unsigned long) (childregs+1); + /* + * get the two stack pages, for the virtual stack. + * + * IMPORTANT: this code relies on the fact that the task + * structure is an 8K aligned piece of physical memory. + */ + p->thread.stack_page0 = virt_to_page((unsigned long)p->thread_info); + p->thread.stack_page1 = virt_to_page((unsigned long)p->thread_info + PAGE_SIZE); + p->thread.eip = (unsigned long) ret_from_fork; + p->thread_info->real_stack = p->thread_info; savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); @@ -500,10 +513,40 @@ struct task_struct * __switch_to(struct __unlazy_fpu(prev_p); +#ifdef CONFIG_X86_HIGH_ENTRY + /* + * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is + * needed because otherwise NMIs could interrupt the + * user-return code with a virtual stack and stale TLBs.) + */ + __kunmap_atomic_type(KM_VSTACK0); + __kunmap_atomic_type(KM_VSTACK1); + __kmap_atomic(next->stack_page0, KM_VSTACK0); + __kmap_atomic(next->stack_page1, KM_VSTACK1); + + /* + * NOTE: here we rely on the task being the stack as well + */ + next_p->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK0); + +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) + /* + * If next was preempted on entry from userspace to kernel, + * and now it's on a different cpu, we need to adjust %esp. + * This assumes that entry.S does not copy %esp while on the + * virtual stack (with interrupts enabled): which is so, + * except within __SWITCH_KERNELSPACE itself. + */ + if (unlikely(next->esp >= TASK_SIZE)) { + next->esp &= THREAD_SIZE - 1; + next->esp |= (unsigned long) next_p->thread_info->virtual_stack; + } +#endif +#endif /* - * Reload esp0, LDT and the page table pointer: + * Reload esp0: */ - load_esp0(tss, next->esp0); + load_esp0(tss, virtual_esp0(next_p)); /* * Load the per-thread Thread-Local Storage descriptor. --- linux-2.6.0-test6/arch/i386/kernel/reboot.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/reboot.c 2003-10-05 00:36:48.000000000 -0700 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include "mach_reboot.h" @@ -153,12 +154,11 @@ void machine_real_restart(unsigned char CMOS_WRITE(0x00, 0x8f); spin_unlock_irqrestore(&rtc_lock, flags); - /* Remap the kernel at virtual address zero, as well as offset zero - from the kernel segment. This assumes the kernel segment starts at - virtual address PAGE_OFFSET. */ - - memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); + /* + * Remap the first 16 MB of RAM (which includes the kernel image) + * at virtual address zero: + */ + setup_identity_mappings(swapper_pg_dir, 0, 16*1024*1024); /* * Use `swapper_pg_dir' as our page directory. @@ -262,7 +262,12 @@ void machine_restart(char * __unused) disable_IO_APIC(); #endif - if(!reboot_thru_bios) { + if (!reboot_thru_bios) { + if (efi_enabled) { + efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, 0); + __asm__ __volatile__("lidt %0": :"m" (no_idt)); + __asm__ __volatile__("int3"); + } /* rebooting needs to touch the page at absolute addr 0 */ *((unsigned short *)__va(0x472)) = reboot_mode; for (;;) { @@ -272,6 +277,8 @@ void machine_restart(char * __unused) __asm__ __volatile__("int3"); } } + if (efi_enabled) + efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, 0); machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); } @@ -282,6 +289,8 @@ void machine_halt(void) void machine_power_off(void) { + if (efi_enabled) + efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, 0); if (pm_power_off) pm_power_off(); } --- linux-2.6.0-test6/arch/i386/kernel/setup.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/setup.c 2003-10-05 00:36:22.000000000 -0700 @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include