From efc75006b820c9b8e186c7650fc24edd5d0509af Mon Sep 17 00:00:00 2001 From: alex Date: Thu, 15 Jan 2004 00:06:25 +0000 Subject: [PATCH] - 2.6 fixes landed --- lnet/include/lnet/list.h | 1 - lnet/klnds/socklnd/socklnd_cb.c | 4 + lnet/libcfs/debug.c | 6 +- lustre/configure.in | 2 +- lustre/include/liblustre.h | 4 + lustre/include/linux/lustre_compat25.h | 23 +- lustre/include/linux/lustre_mds.h | 8 +- lustre/kernel_patches/patches/2.6.0-mm2.patch | 203869 ++++++++++++++++++ .../patches/dev_read_only_2.6.0.patch | 96 +- .../patches/export_symbols-2.6.0.patch | 61 + .../patches/ext3-ea-in-inode-2.6.0.patch | 832 + .../patches/ext3-init-generation-2.6.0.patch | 12 + .../patches/ext3-map_inode_page-2.6.0.patch | 21 +- .../ext3-start_this_handle-must-return-error.patch | 22 + .../patches/invalidate_show-2.6.0.patch | 54 + .../kernel_patches/patches/iopen-2.6.0-test6.patch | 107 +- .../patches/kernel_text_address-2.6.0.patch | 45 + .../kernel_patches/patches/vfs_intent_2.6.0.patch | 762 + .../patches/vfs_nointent_2.6.0-test6.patch | 76 +- lustre/kernel_patches/series/kgdb_2.6.0 | 20 + lustre/llite/Makefile.mk | 2 +- lustre/llite/dcache.c | 12 +- lustre/llite/llite_lib.c | 2 +- lustre/llite/lproc_llite.c | 2 +- lustre/llite/namei.c | 2 +- lustre/llite/rw26.c | 90 +- lustre/llite/super25.c | 25 +- lustre/lov/lproc_lov.c | 2 +- lustre/lvfs/fsfilt_ext3.c | 5 +- lustre/mds/mds_open.c | 3 +- lustre/obdclass/class_obd.c | 2 +- lustre/obdclass/lprocfs_status.c | 2 +- lustre/obdfilter/lproc_obdfilter.c | 7 +- lustre/osc/Makefile.mk | 2 +- lustre/osc/lproc_osc.c | 2 +- lustre/portals/include/portals/list.h | 1 - lustre/portals/knals/socknal/socknal_cb.c | 4 + lustre/portals/libcfs/debug.c | 6 +- lustre/ptlrpc/Makefile.mk | 3 +- lustre/tests/llmount.sh | 2 +- lustre/tests/llmountcleanup.sh | 3 +- lustre/tests/llrmount.sh | 2 +- lustre/utils/Makefile.mk | 4 +- 43 files changed, 206009 insertions(+), 201 deletions(-) create mode 100644 lustre/kernel_patches/patches/2.6.0-mm2.patch create mode 100644 lustre/kernel_patches/patches/export_symbols-2.6.0.patch create mode 100644 lustre/kernel_patches/patches/ext3-ea-in-inode-2.6.0.patch create mode 100644 lustre/kernel_patches/patches/ext3-init-generation-2.6.0.patch create mode 100644 lustre/kernel_patches/patches/ext3-start_this_handle-must-return-error.patch create mode 100644 lustre/kernel_patches/patches/invalidate_show-2.6.0.patch create mode 100644 lustre/kernel_patches/patches/kernel_text_address-2.6.0.patch create mode 100644 lustre/kernel_patches/patches/vfs_intent_2.6.0.patch create mode 100644 lustre/kernel_patches/series/kgdb_2.6.0 diff --git a/lnet/include/lnet/list.h b/lnet/include/lnet/list.h index 57713cb..9cab047 100644 --- a/lnet/include/lnet/list.h +++ b/lnet/include/lnet/list.h @@ -1,5 +1,4 @@ #ifndef _LINUX_LIST_H - /* * Simple doubly linked list implementation. * diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 64278c6..985b432 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -2137,7 +2137,11 @@ ksocknal_setup_sock (struct socket *sock) int option; struct linger linger; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) + sock->sk->sk_allocation = GFP_NOFS; +#else sock->sk->allocation = GFP_NOFS; +#endif /* Ensure this socket aborts active sends immediately when we close * it. */ diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 52e7493..d563a76 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -687,6 +687,7 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len) list_for_each(pos, &my_pages) { unsigned long to_copy; page = list_entry(pos, struct page, list); + void *addr; to_copy = min(total - off, PAGE_SIZE); if (to_copy == 0) { @@ -694,8 +695,9 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len) to_copy = min(debug_size - off, PAGE_SIZE); } finish_partial: - memcpy(kmap(page), debug_buf + off, to_copy); - kunmap(page); + addr = kmap_atomic(page, KM_USER0); + memcpy(addr, debug_buf + off, to_copy); + kunmap_atomic(addr, KM_USER0); copied += to_copy; if (copied >= total) break; diff --git a/lustre/configure.in b/lustre/configure.in index c60f868..d1aa6aa 100644 --- a/lustre/configure.in +++ b/lustre/configure.in @@ -78,6 +78,6 @@ AC_OUTPUT([Makefile lvfs/Makefile portals/Makefile portals/Kernelenv \ lov/Makefile osc/Makefile mdc/Makefile mds/Makefile ost/Makefile \ cobd/Makefile ptlbd/Makefile conf/Makefile tests/Makefile \ utils/Makefile utils/Lustre/Makefile obdfilter/Makefile \ - obdclass/Makefile snapfs/Makefile snapfs/utils/Makefile \ + obdclass/Makefile snapfs/Makefile snapfs/utils/Makefile ldlm/Makefile \ include/Makefile include/linux/Makefile llite/Makefile doc/Makefile scripts/Makefile \ scripts/lustre.spec]) diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index 872bc5b..6c6ac1d 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -716,8 +716,12 @@ typedef struct { volatile int counter; } atomic_t; #define atomic_add(b,a) do {(a)->counter += b;} while (0) #define atomic_sub(b,a) do {(a)->counter -= b;} while (0) +#ifndef likely #define likely(exp) (exp) +#endif +#ifndef unlikely #define unlikely(exp) (exp) +#endif /* log related */ static inline int llog_init_commit_master(void) { return 0; } diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index ed544ab..40620ac 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -38,6 +38,7 @@ * initialization routines must be called after device * driver initialization */ +#undef module_init #define module_init(a) late_initcall(a) /* XXX our code should be using the 2.6 calls, not the other way around */ @@ -68,9 +69,12 @@ static inline void lustre_daemonize_helper(void) { LASSERT(current->signal != NULL); - current->signal->session = 1; - current->signal->pgrp = 1; - current->signal->tty = NULL; + current->session = 1; + if (current->group_leader) + current->group_leader->__pgrp = 1; + else + CERROR("we aren't group leader\n"); + current->tty = NULL; } #define rb_node_s rb_node @@ -78,6 +82,14 @@ static inline void lustre_daemonize_helper(void) typedef struct rb_root_s rb_root_t; typedef struct rb_node_s rb_node_t; +#define smp_num_cpus NR_CPUS + +#ifndef conditional_schedule +#define conditional_schedule() cond_resched() +#endif + +#include + #else /* 2.4.. */ #define ll_vfs_create(a,b,c,d) vfs_create(a,b,c) @@ -145,6 +157,11 @@ static inline void lustre_daemonize_helper(void) #define conditional_schedule() if (unlikely(need_resched())) schedule() #endif +/* to find proc_dir_entry from inode. 2.6 has native one -bzzz */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) +#define PDE(ii) ((ii)->u.generic_ip) +#endif + #endif /* end of 2.4 compat macros */ #endif /* __KERNEL__ */ diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h index 704436b..d303272 100644 --- a/lustre/include/linux/lustre_mds.h +++ b/lustre/include/linux/lustre_mds.h @@ -95,18 +95,18 @@ struct mds_update_record { /* i_attr_flags holds the open count in the inode in 2.4 */ //XXX Alex implement on 2.4 with i_attr_flags and find soln for 2.5 please #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -# define mds_open_orphan_count(inode) (0) -# define mds_open_orphan_inc(inode) do { } while (0); -# define mds_open_orphan_dec_test(inode) (0) +# define mds_inode_oatomic(inode) ((atomic_t *)&(inode)->i_cindex) #else # define mds_inode_oatomic(inode) ((atomic_t *)&(inode)->i_attr_flags) +#endif + # define mds_open_orphan_count(inode) \ atomic_read(mds_inode_oatomic(inode)) # define mds_open_orphan_inc(inode) \ atomic_inc(mds_inode_oatomic(inode)) # define mds_open_orphan_dec_test(inode) \ atomic_dec_and_test(mds_inode_oatomic(inode)) -#endif + #define mds_inode_is_orphan(inode) ((inode)->i_flags & 0x4000000) #define mds_inode_set_orphan(inode) (inode)->i_flags |= 0x4000000 diff --git a/lustre/kernel_patches/patches/2.6.0-mm2.patch b/lustre/kernel_patches/patches/2.6.0-mm2.patch new file mode 100644 index 0000000..d01f3c7 --- /dev/null +++ b/lustre/kernel_patches/patches/2.6.0-mm2.patch @@ -0,0 +1,203869 @@ +--- linux-2.6.0/arch/alpha/kernel/irq.c 2003-10-08 15:07:08.000000000 -0700 ++++ 25/arch/alpha/kernel/irq.c 2003-12-28 23:22:10.000000000 -0800 +@@ -252,9 +252,11 @@ static int + irq_affinity_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) + { +- if (count < HEX_DIGITS+1) ++ int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); ++ if (count - len < 2) + return -EINVAL; +- return sprintf (page, "%016lx\n", irq_affinity[(long)data]); ++ len += sprintf(page + len, "\n"); ++ return len; + } + + static unsigned int +@@ -331,10 +333,11 @@ static int + prof_cpu_mask_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) + { +- unsigned long *mask = (unsigned long *) data; +- if (count < HEX_DIGITS+1) ++ int len = cpumask_snprintf(page, count, *(cpumask_t *)data); ++ if (count - len < 2) + return -EINVAL; +- return sprintf (page, "%016lx\n", *mask); ++ len += sprintf(page + len, "\n"); ++ return len; + } + + static int +@@ -529,19 +532,21 @@ show_interrupts(struct seq_file *p, void + #ifdef CONFIG_SMP + int j; + #endif +- int i; ++ int i = *(loff_t *) v; + struct irqaction * action; + unsigned long flags; + + #ifdef CONFIG_SMP +- seq_puts(p, " "); +- for (i = 0; i < NR_CPUS; i++) +- if (cpu_online(i)) +- seq_printf(p, "CPU%d ", i); +- seq_putc(p, '\n'); ++ if (i == 0) { ++ seq_puts(p, " "); ++ for (i = 0; i < NR_CPUS; i++) ++ if (cpu_online(i)) ++ seq_printf(p, "CPU%d ", i); ++ seq_putc(p, '\n'); ++ } + #endif + +- for (i = 0; i < ACTUAL_NR_IRQS; i++) { ++ if (i < ACTUAL_NR_IRQS) { + spin_lock_irqsave(&irq_desc[i].lock, flags); + action = irq_desc[i].action; + if (!action) +@@ -568,15 +573,16 @@ show_interrupts(struct seq_file *p, void + seq_putc(p, '\n'); + unlock: + spin_unlock_irqrestore(&irq_desc[i].lock, flags); +- } ++ } else if (i == ACTUAL_NR_IRQS) { + #ifdef CONFIG_SMP +- seq_puts(p, "IPI: "); +- for (i = 0; i < NR_CPUS; i++) +- if (cpu_online(i)) +- seq_printf(p, "%10lu ", cpu_data[i].ipi_count); +- seq_putc(p, '\n'); ++ seq_puts(p, "IPI: "); ++ for (i = 0; i < NR_CPUS; i++) ++ if (cpu_online(i)) ++ seq_printf(p, "%10lu ", cpu_data[i].ipi_count); ++ seq_putc(p, '\n'); + #endif +- seq_printf(p, "ERR: %10lu\n", irq_err_count); ++ seq_printf(p, "ERR: %10lu\n", irq_err_count); ++ } + return 0; + } + +--- linux-2.6.0/arch/alpha/kernel/traps.c 2003-10-08 15:07:08.000000000 -0700 ++++ 25/arch/alpha/kernel/traps.c 2003-12-28 23:22:11.000000000 -0800 +@@ -636,6 +636,7 @@ do_entUna(void * va, unsigned long opcod + lock_kernel(); + printk("Bad unaligned kernel access at %016lx: %p %lx %ld\n", + pc, va, opcode, reg); ++ dump_stack(); + do_exit(SIGSEGV); + + got_exception: +--- linux-2.6.0/arch/arm26/kernel/irq.c 2003-10-08 15:07:08.000000000 -0700 ++++ 25/arch/arm26/kernel/irq.c 2003-12-28 23:22:06.000000000 -0800 +@@ -135,10 +135,10 @@ void enable_irq(unsigned int irq) + + int show_interrupts(struct seq_file *p, void *v) + { +- int i; ++ int i = *(loff_t *) v; + struct irqaction * action; + +- for (i = 0 ; i < NR_IRQS ; i++) { ++ if (i < NR_IRQS) { + action = irq_desc[i].action; + if (!action) + continue; +@@ -148,10 +148,10 @@ int show_interrupts(struct seq_file *p, + seq_printf(p, ", %s", action->name); + } + seq_putc(p, '\n'); ++ } else if (i == NR_IRQS) { ++ show_fiq_list(p, v); ++ seq_printf(p, "Err: %10lu\n", irq_err_count); + } +- +- show_fiq_list(p, v); +- seq_printf(p, "Err: %10lu\n", irq_err_count); + return 0; + } + +--- linux-2.6.0/arch/arm/kernel/irq.c 2003-10-08 15:07:08.000000000 -0700 ++++ 25/arch/arm/kernel/irq.c 2003-12-28 23:22:06.000000000 -0800 +@@ -169,11 +169,11 @@ void disable_irq_wake(unsigned int irq) + + int show_interrupts(struct seq_file *p, void *v) + { +- int i; ++ int i = *(loff_t *) v; + struct irqaction * action; + unsigned long flags; + +- for (i = 0 ; i < NR_IRQS ; i++) { ++ if (i < NR_IRQS) { + spin_lock_irqsave(&irq_controller_lock, flags); + action = irq_desc[i].action; + if (!action) +@@ -187,12 +187,12 @@ int show_interrupts(struct seq_file *p, + seq_putc(p, '\n'); + unlock: + spin_unlock_irqrestore(&irq_controller_lock, flags); +- } +- ++ } else if (i == NR_IRQS) { + #ifdef CONFIG_ARCH_ACORN +- show_fiq_list(p, v); ++ show_fiq_list(p, v); + #endif +- seq_printf(p, "Err: %10lu\n", irq_err_count); ++ seq_printf(p, "Err: %10lu\n", irq_err_count); ++ } + return 0; + } + +--- linux-2.6.0/arch/arm/mach-sa1100/Kconfig 2003-06-14 12:18:52.000000000 -0700 ++++ 25/arch/arm/mach-sa1100/Kconfig 2003-12-28 23:23:06.000000000 -0800 +@@ -304,7 +304,7 @@ config SA1100_YOPY + depends on ARCH_SA1100 + help + Say Y here to support the Yopy PDA. Product information at +- . See Documentation/arm/SA110/Yopy ++ . See Documentation/arm/SA1100/Yopy + for more. + + config SA1100_STORK +--- linux-2.6.0/arch/arm/Makefile 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/arm/Makefile 2003-12-28 23:21:55.000000000 -0800 +@@ -14,8 +14,6 @@ OBJCOPYFLAGS :=-O binary -R .note -R .co + GZFLAGS :=-9 + #CFLAGS +=-pipe + +-CFLAGS :=$(CFLAGS:-O2=-Os) +- + ifeq ($(CONFIG_FRAME_POINTER),y) + CFLAGS +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog + endif +--- linux-2.6.0/arch/cris/kernel/irq.c 2003-10-08 15:07:08.000000000 -0700 ++++ 25/arch/cris/kernel/irq.c 2003-12-28 23:22:06.000000000 -0800 +@@ -89,11 +89,11 @@ static struct irqaction *irq_action[NR_I + + int show_interrupts(struct seq_file *p, void *v) + { +- int i; ++ int i = *(loff_t *) v; + struct irqaction * action; + unsigned long flags; + +- for (i = 0; i < NR_IRQS; i++) { ++ if (i < NR_IRQS) { + local_irq_save(flags); + action = irq_action[i]; + if (!action) +--- linux-2.6.0/arch/h8300/Kconfig 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/h8300/Kconfig 2003-12-28 23:21:55.000000000 -0800 +@@ -5,6 +5,10 @@ + + mainmenu "uClinux/h8300 (w/o MMU) Kernel Configuration" + ++config H8300 ++ bool ++ default y ++ + config MMU + bool + default n +--- linux-2.6.0/arch/h8300/Makefile 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/h8300/Makefile 2003-12-28 23:21:55.000000000 -0800 +@@ -34,7 +34,7 @@ cflags-$(CONFIG_CPU_H8S) := -ms + ldflags-$(CONFIG_CPU_H8S) := -mh8300self + + CFLAGS += $(cflags-y) +-CFLAGS += -mint32 -fno-builtin -Os ++CFLAGS += -mint32 -fno-builtin + CFLAGS += -g + CFLAGS += -D__linux__ + CFLAGS += -DUTS_SYSNAME=\"uClinux\" +--- linux-2.6.0/arch/h8300/platform/h8300h/ints.c 2003-10-08 15:07:08.000000000 -0700 ++++ 25/arch/h8300/platform/h8300h/ints.c 2003-12-28 23:22:06.000000000 -0800 +@@ -228,9 +228,9 @@ asmlinkage void process_int(int vec, str + + int show_interrupts(struct seq_file *p, void *v) + { +- int i; ++ int i = *(loff_t *) v; + +- for (i = 0; i < NR_IRQS; i++) { ++ if (i < NR_IRQS) { + if (irq_list[i]) { + seq_printf(p, "%3d: %10u ",i,irq_list[i]->count); + seq_printf(p, "%s\n", irq_list[i]->devname); +--- linux-2.6.0/arch/h8300/platform/h8s/ints.c 2003-10-17 15:58:03.000000000 -0700 ++++ 25/arch/h8300/platform/h8s/ints.c 2003-12-28 23:22:06.000000000 -0800 +@@ -280,9 +280,9 @@ asmlinkage void process_int(unsigned lon + + int show_interrupts(struct seq_file *p, void *v) + { +- int i; ++ int i = *(loff_t *) v; + +- for (i = 0; i < NR_IRQS; i++) { ++ if (i < NR_IRQS) { + if (irq_list[i]) { + seq_printf(p, "%3d: %10u ",i,irq_list[i]->count); + seq_printf(p, "%s\n", irq_list[i]->devname); +--- linux-2.6.0/arch/i386/boot/setup.S 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/boot/setup.S 2003-12-28 23:26:36.000000000 -0800 +@@ -162,7 +162,7 @@ cmd_line_ptr: .long 0 # (Header versio + # can be located anywhere in + # low memory 0x10000 or higher. + +-ramdisk_max: .long MAXMEM-1 # (Header version 0x0203 or later) ++ramdisk_max: .long __MAXMEM-1 # (Header version 0x0203 or later) + # The highest safe address for + # the contents of an initrd + +--- linux-2.6.0/arch/i386/Kconfig 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/Kconfig 2003-12-28 23:26:36.000000000 -0800 +@@ -115,10 +115,15 @@ config ACPI_SRAT + default y + depends on NUMA && (X86_SUMMIT || X86_GENERICARCH) + ++config X86_SUMMIT_NUMA ++ bool ++ default y ++ depends on NUMA && (X86_SUMMIT || X86_GENERICARCH) ++ + config X86_CYCLONE_TIMER +- bool +- default y +- depends on X86_SUMMIT || X86_GENERICARCH ++ bool ++ default y ++ depends on X86_SUMMIT || X86_GENERICARCH + + config ES7000_CLUSTERED_APIC + bool +@@ -397,6 +402,54 @@ config X86_OOSTORE + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 + default y + ++config X86_4G ++ bool "4 GB kernel-space and 4 GB user-space virtual memory support" ++ help ++ This option is only useful for systems that have more than 1 GB ++ of RAM. ++ ++ The default kernel VM layout leaves 1 GB of virtual memory for ++ kernel-space mappings, and 3 GB of VM for user-space applications. ++ This option ups both the kernel-space VM and the user-space VM to ++ 4 GB. ++ ++ The cost of this option is additional TLB flushes done at ++ system-entry points that transition from user-mode into kernel-mode. ++ I.e. system calls and page faults, and IRQs that interrupt user-mode ++ code. There's also additional overhead to kernel operations that copy ++ memory to/from user-space. The overhead from this is hard to tell and ++ depends on the workload - it can be anything from no visible overhead ++ to 20-30% overhead. A good rule of thumb is to count with a runtime ++ overhead of 20%. ++ ++ The upside is the much increased kernel-space VM, which more than ++ quadruples the maximum amount of RAM supported. Kernels compiled with ++ this option boot on 64GB of RAM and still have more than 3.1 GB of ++ 'lowmem' left. Another bonus is that highmem IO bouncing decreases, ++ if used with drivers that still use bounce-buffers. ++ ++ There's also a 33% increase in user-space VM size - database ++ applications might see a boost from this. ++ ++ But the cost of the TLB flushes and the runtime overhead has to be ++ weighed against the bonuses offered by the larger VM spaces. The ++ dividing line depends on the actual workload - there might be 4 GB ++ systems that benefit from this option. Systems with less than 4 GB ++ of RAM will rarely see a benefit from this option - but it's not ++ out of question, the exact circumstances have to be considered. ++ ++config X86_SWITCH_PAGETABLES ++ def_bool X86_4G ++ ++config X86_4G_VM_LAYOUT ++ def_bool X86_4G ++ ++config X86_UACCESS_INDIRECT ++ def_bool X86_4G ++ ++config X86_HIGH_ENTRY ++ def_bool X86_4G ++ + config HPET_TIMER + bool "HPET Timer Support" + help +@@ -784,6 +837,25 @@ config MTRR + + See for more information. + ++config EFI ++ bool "Boot from EFI support (EXPERIMENTAL)" ++ depends on ACPI ++ default n ++ ---help--- ++ ++ This enables the the kernel to boot on EFI platforms using ++ system configuration information passed to it from the firmware. ++ This also enables the kernel to use any EFI runtime services that are ++ available (such as the EFI variable services). ++ ++ This option is only useful on systems that have EFI firmware ++ and will result in a kernel image that is ~8k larger. In addition, ++ you must use the latest ELILO loader available at ++ ftp.hpl.hp.com/pub/linux-ia64/ in order to take advantage of kernel ++ initialization using EFI information (neither GRUB nor LILO know ++ anything about EFI). However, even with this option, the resultant ++ kernel should continue to boot on existing non-EFI platforms. ++ + config HAVE_DEC_LOCK + bool + depends on (SMP || PREEMPT) && X86_CMPXCHG +@@ -793,7 +865,7 @@ config HAVE_DEC_LOCK + # Summit needs it only when NUMA is on + config BOOT_IOREMAP + bool +- depends on ((X86_SUMMIT || X86_GENERICARCH) && NUMA) ++ depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) + default y + + endmenu +@@ -1030,6 +1102,25 @@ config PCI_DIRECT + depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS) + default y + ++config PCI_USE_VECTOR ++ bool "Vector-based interrupt indexing" ++ depends on X86_LOCAL_APIC ++ default n ++ help ++ This replaces the current existing IRQ-based index interrupt scheme ++ with the vector-base index scheme. The advantages of vector base ++ over IRQ base are listed below: ++ 1) Support MSI implementation. ++ 2) Support future IOxAPIC hotplug ++ ++ Note that this enables MSI, Message Signaled Interrupt, on all ++ MSI capable device functions detected if users also install the ++ MSI patch. Message Signal Interrupt enables an MSI-capable ++ hardware device to send an inbound Memory Write on its PCI bus ++ instead of asserting IRQ signal on device IRQ pin. ++ ++ If you don't know what to do here, say N. ++ + source "drivers/pci/Kconfig" + + config ISA +@@ -1187,6 +1278,15 @@ config DEBUG_PAGEALLOC + This results in a large slowdown, but helps to find certain types + of memory corruptions. + ++config SPINLINE ++ bool "Spinlock inlining" ++ depends on DEBUG_KERNEL ++ help ++ This will change spinlocks from out of line to inline, making them ++ account cost to the callers in readprofile, rather than the lock ++ itself (as ".text.lock.filename"). This can be helpful for finding ++ the callers of locks. ++ + config DEBUG_HIGHMEM + bool "Highmem debugging" + depends on DEBUG_KERNEL && HIGHMEM +@@ -1203,20 +1303,208 @@ config DEBUG_INFO + Say Y here only if you plan to use gdb to debug the kernel. + If you don't debug the kernel, you can say N. + ++config LOCKMETER ++ bool "Kernel lock metering" ++ depends on SMP ++ help ++ Say Y to enable kernel lock metering, which adds overhead to SMP locks, ++ but allows you to see various statistics using the lockstat command. ++ + config DEBUG_SPINLOCK_SLEEP + bool "Sleep-inside-spinlock checking" + help + If you say Y here, various routines which may sleep will become very + noisy if they are called with a spinlock held. + ++config KGDB ++ bool "Include kgdb kernel debugger" ++ depends on DEBUG_KERNEL ++ help ++ If you say Y here, the system will be compiled with the debug ++ option (-g) and a debugging stub will be included in the ++ kernel. This stub communicates with gdb on another (host) ++ computer via a serial port. The host computer should have ++ access to the kernel binary file (vmlinux) and a serial port ++ that is connected to the target machine. Gdb can be made to ++ configure the serial port or you can use stty and setserial to ++ do this. See the 'target' command in gdb. This option also ++ configures in the ability to request a breakpoint early in the ++ boot process. To request the breakpoint just include 'kgdb' ++ as a boot option when booting the target machine. The system ++ will then break as soon as it looks at the boot options. This ++ option also installs a breakpoint in panic and sends any ++ kernel faults to the debugger. For more information see the ++ Documentation/i386/kgdb.txt file. ++ ++choice ++ depends on KGDB ++ prompt "Debug serial port BAUD" ++ default KGDB_115200BAUD ++ help ++ Gdb and the kernel stub need to agree on the baud rate to be ++ used. Some systems (x86 family at this writing) allow this to ++ be configured. ++ ++config KGDB_9600BAUD ++ bool "9600" ++ ++config KGDB_19200BAUD ++ bool "19200" ++ ++config KGDB_38400BAUD ++ bool "38400" ++ ++config KGDB_57600BAUD ++ bool "57600" ++ ++config KGDB_115200BAUD ++ bool "115200" ++endchoice ++ ++config KGDB_PORT ++ hex "hex I/O port address of the debug serial port" ++ depends on KGDB ++ default 3f8 ++ help ++ Some systems (x86 family at this writing) allow the port ++ address to be configured. The number entered is assumed to be ++ hex, don't put 0x in front of it. The standard address are: ++ COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx ++ will tell you what you have. It is good to test the serial ++ connection with a live system before trying to debug. ++ ++config KGDB_IRQ ++ int "IRQ of the debug serial port" ++ depends on KGDB ++ default 4 ++ help ++ This is the irq for the debug port. If everything is working ++ correctly and the kernel has interrupts on a control C to the ++ port should cause a break into the kernel debug stub. ++ ++config DEBUG_INFO ++ bool ++ depends on KGDB ++ default y ++ ++config KGDB_MORE ++ bool "Add any additional compile options" ++ depends on KGDB ++ default n ++ help ++ Saying yes here turns on the ability to enter additional ++ compile options. ++ ++ ++config KGDB_OPTIONS ++ depends on KGDB_MORE ++ string "Additional compile arguments" ++ default "-O1" ++ help ++ This option allows you enter additional compile options for ++ the whole kernel compile. Each platform will have a default ++ that seems right for it. For example on PPC "-ggdb -O1", and ++ for i386 "-O1". Note that by configuring KGDB "-g" is already ++ turned on. In addition, on i386 platforms ++ "-fomit-frame-pointer" is deleted from the standard compile ++ options. ++ ++config NO_KGDB_CPUS ++ int "Number of CPUs" ++ depends on KGDB && SMP ++ default NR_CPUS ++ help ++ ++ This option sets the number of cpus for kgdb ONLY. It is used ++ to prune some internal structures so they look "nice" when ++ displayed with gdb. This is to overcome possibly larger ++ numbers that may have been entered above. Enter the real ++ number to get nice clean kgdb_info displays. ++ ++config KGDB_TS ++ bool "Enable kgdb time stamp macros?" ++ depends on KGDB ++ default n ++ help ++ Kgdb event macros allow you to instrument your code with calls ++ to the kgdb event recording function. The event log may be ++ examined with gdb at a break point. Turning on this ++ capability also allows you to choose how many events to ++ keep. Kgdb always keeps the lastest events. ++ ++choice ++ depends on KGDB_TS ++ prompt "Max number of time stamps to save?" ++ default KGDB_TS_128 ++ ++config KGDB_TS_64 ++ bool "64" ++ ++config KGDB_TS_128 ++ bool "128" ++ ++config KGDB_TS_256 ++ bool "256" ++ ++config KGDB_TS_512 ++ bool "512" ++ ++config KGDB_TS_1024 ++ bool "1024" ++ ++endchoice ++ ++config STACK_OVERFLOW_TEST ++ bool "Turn on kernel stack overflow testing?" ++ depends on KGDB ++ default n ++ help ++ This option enables code in the front line interrupt handlers ++ to check for kernel stack overflow on interrupts and system ++ calls. This is part of the kgdb code on x86 systems. ++ ++config KGDB_CONSOLE ++ bool "Enable serial console thru kgdb port" ++ depends on KGDB ++ default n ++ help ++ This option enables the command line "console=kgdb" option. ++ When the system is booted with this option in the command line ++ all kernel printk output is sent to gdb (as well as to other ++ consoles). For this to work gdb must be connected. For this ++ reason, this command line option will generate a breakpoint if ++ gdb has not yet connected. After the gdb continue command is ++ given all pent up console output will be printed by gdb on the ++ host machine. Neither this option, nor KGDB require the ++ serial driver to be configured. ++ ++config KGDB_SYSRQ ++ bool "Turn on SysRq 'G' command to do a break?" ++ depends on KGDB ++ default y ++ help ++ This option includes an option in the SysRq code that allows ++ you to enter SysRq G which generates a breakpoint to the KGDB ++ stub. This will work if the keyboard is alive and can ++ interrupt the system. Because of constraints on when the ++ serial port interrupt can be enabled, this code may allow you ++ to interrupt the system before the serial port control C is ++ available. Just say yes here. ++ + config FRAME_POINTER + bool "Compile the kernel with frame pointers" ++ default KGDB + help + If you say Y here the resulting kernel image will be slightly larger + and slower, but it will give very useful debugging information. + If you don't debug the kernel, you can say N, but we may not be able + to solve problems without frame pointers. + ++config MAGIC_SYSRQ ++ bool ++ depends on KGDB_SYSRQ ++ default y ++ + config X86_EXTRA_IRQS + bool + depends on X86_LOCAL_APIC || X86_VOYAGER +--- linux-2.6.0/arch/i386/kernel/acpi/boot.c 2003-11-23 19:03:00.000000000 -0800 ++++ 25/arch/i386/kernel/acpi/boot.c 2003-12-28 23:21:57.000000000 -0800 +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -40,9 +41,8 @@ + + #define PREFIX "ACPI: " + +-extern int acpi_disabled; +-extern int acpi_irq; +-extern int acpi_ht; ++int acpi_noirq __initdata = 0; /* skip ACPI IRQ initialization */ ++int acpi_ht __initdata = 1; /* enable HT */ + + int acpi_lapic = 0; + int acpi_ioapic = 0; +@@ -249,29 +249,66 @@ acpi_parse_nmi_src ( + + #ifdef CONFIG_ACPI_BUS + /* +- * Set specified PIC IRQ to level triggered mode. ++ * "acpi_pic_sci=level" (current default) ++ * programs the PIC-mode SCI to Level Trigger. ++ * (NO-OP if the BIOS set Level Trigger already) ++ * ++ * If a PIC-mode SCI is not recogznied or gives spurious IRQ7's ++ * it may require Edge Trigger -- use "acpi_pic_sci=edge" ++ * (NO-OP if the BIOS set Edge Trigger already) + * + * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers + * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. + * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) + * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) +- * +- * As the BIOS should have done this for us, +- * print a warning if the IRQ wasn't already set to level. + */ + +-void acpi_pic_set_level_irq(unsigned int irq) ++static int __initdata acpi_pic_sci_trigger; /* 0: level, 1: edge */ ++ ++void __init ++acpi_pic_sci_set_trigger(unsigned int irq) + { + unsigned char mask = 1 << (irq & 7); + unsigned int port = 0x4d0 + (irq >> 3); + unsigned char val = inb(port); + ++ ++ printk(PREFIX "IRQ%d SCI:", irq); + if (!(val & mask)) { +- printk(KERN_WARNING PREFIX "IRQ %d was Edge Triggered, " +- "setting to Level Triggerd\n", irq); +- outb(val | mask, port); ++ printk(" Edge"); ++ ++ if (!acpi_pic_sci_trigger) { ++ printk(" set to Level"); ++ outb(val | mask, port); ++ } ++ } else { ++ printk(" Level"); ++ ++ if (acpi_pic_sci_trigger) { ++ printk(" set to Edge"); ++ outb(val | mask, port); ++ } ++ } ++ printk(" Trigger.\n"); ++} ++ ++int __init ++acpi_pic_sci_setup(char *str) ++{ ++ while (str && *str) { ++ if (strncmp(str, "level", 5) == 0) ++ acpi_pic_sci_trigger = 0; /* force level trigger */ ++ if (strncmp(str, "edge", 4) == 0) ++ acpi_pic_sci_trigger = 1; /* force edge trigger */ ++ str = strchr(str, ','); ++ if (str) ++ str += strspn(str, ", \t"); + } ++ return 1; + } ++ ++__setup("acpi_pic_sci=", acpi_pic_sci_setup); ++ + #endif /* CONFIG_ACPI_BUS */ + + +@@ -326,11 +363,48 @@ static int __init acpi_parse_hpet(unsign + } + #endif + ++/* detect the location of the ACPI PM Timer */ ++#ifdef CONFIG_X86_PM_TIMER ++extern u32 pmtmr_ioport; ++ ++static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) ++{ ++ struct fadt_descriptor_rev2 *fadt =0; ++ ++ fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size); ++ if(!fadt) { ++ printk(KERN_WARNING PREFIX "Unable to map FADT\n"); ++ return 0; ++ } ++ ++ if (fadt->revision >= FADT2_REVISION_ID) { ++ /* FADT rev. 2 */ ++ if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO) ++ return 0; ++ ++ pmtmr_ioport = fadt->xpm_tmr_blk.address; ++ } else { ++ /* FADT rev. 1 */ ++ pmtmr_ioport = fadt->V1_pm_tmr_blk; ++ } ++ if (pmtmr_ioport) ++ printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport); ++ return 0; ++} ++#endif ++ ++ + unsigned long __init + acpi_find_rsdp (void) + { + unsigned long rsdp_phys = 0; + ++ if (efi_enabled) { ++ if (efi.acpi20) ++ return __pa(efi.acpi20); ++ else if (efi.acpi) ++ return __pa(efi.acpi); ++ } + /* + * Scan memory looking for the RSDP signature. First search EBDA (low + * memory) paragraphs and then search upper memory (E0000-FFFFF). +@@ -380,8 +454,10 @@ acpi_boot_init (void) + * Initialize the ACPI boot-time table parser. + */ + result = acpi_table_init(); +- if (result) ++ if (result) { ++ acpi_disabled = 1; + return result; ++ } + + result = acpi_blacklisted(); + if (result) { +@@ -462,7 +538,7 @@ acpi_boot_init (void) + * If MPS is present, it will handle them, + * otherwise the system will stay in PIC mode + */ +- if (acpi_disabled || !acpi_irq) { ++ if (acpi_disabled || acpi_noirq) { + return 1; + } + +@@ -504,6 +580,8 @@ acpi_boot_init (void) + + acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; + ++ acpi_irq_balance_set(NULL); ++ + acpi_ioapic = 1; + + #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */ +@@ -519,5 +597,9 @@ acpi_boot_init (void) + acpi_table_parse(ACPI_HPET, acpi_parse_hpet); + #endif + ++#ifdef CONFIG_X86_PM_TIMER ++ acpi_table_parse(ACPI_FADT, acpi_parse_fadt); ++#endif ++ + return 0; + } +--- linux-2.6.0/arch/i386/kernel/asm-offsets.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/asm-offsets.c 2003-12-28 23:26:36.000000000 -0800 +@@ -4,9 +4,11 @@ + * to extract and format the required data. + */ + ++#include + #include + #include + #include "sigframe.h" ++#include + + #define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) +@@ -28,4 +30,17 @@ void foo(void) + + DEFINE(RT_SIGFRAME_sigcontext, + offsetof (struct rt_sigframe, uc.uc_mcontext)); ++ DEFINE(TI_task, offsetof (struct thread_info, task)); ++ DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain)); ++ DEFINE(TI_flags, offsetof (struct thread_info, flags)); ++ DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count)); ++ DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit)); ++ DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack)); ++ DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack)); ++ DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd)); ++ ++ DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr, __fix_to_virt(FIX_ENTRY_TRAMPOLINE_0)); ++ DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL)); ++ DEFINE(PAGE_SIZE_asm, PAGE_SIZE); ++ DEFINE(task_thread_db7, offsetof (struct task_struct, thread.debugreg[7])); + } +--- linux-2.6.0/arch/i386/kernel/cpu/common.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/cpu/common.c 2003-12-28 23:26:36.000000000 -0800 +@@ -510,16 +510,20 @@ void __init cpu_init (void) + BUG(); + enter_lazy_tlb(&init_mm, current); + +- load_esp0(t, thread->esp0); ++ load_esp0(t, thread); + set_tss_desc(cpu,t); + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; + load_TR_desc(); +- load_LDT(&init_mm.context); ++ if (cpu) ++ load_LDT(&init_mm.context); + + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); + cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff; + ++ if (cpu) ++ trap_init_virtual_GDT(); ++ + /* Clear %fs and %gs. */ + asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); + +--- linux-2.6.0/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c 2003-09-08 13:58:55.000000000 -0700 ++++ 25/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c 2003-12-28 23:22:07.000000000 -0800 +@@ -73,6 +73,16 @@ static struct cpufreq_frequency_table op + { .frequency = CPUFREQ_TABLE_END } + }; + ++/* Ultra Low Voltage Intel Pentium M processor 1000MHz */ ++static struct cpufreq_frequency_table op_1000[] = ++ { ++ OP(600, 844), ++ OP(800, 972), ++ OP(900, 988), ++ OP(1000, 1004), ++ { .frequency = CPUFREQ_TABLE_END } ++ }; ++ + /* Low Voltage Intel Pentium M processor 1.10GHz */ + static struct cpufreq_frequency_table op_1100[] = + { +@@ -165,6 +175,7 @@ static struct cpufreq_frequency_table op + static const struct cpu_model models[] = + { + _CPU( 900, " 900"), ++ CPU(1000), + CPU(1100), + CPU(1200), + CPU(1300), +--- linux-2.6.0/arch/i386/kernel/cpu/intel.c 2003-11-23 19:03:00.000000000 -0800 ++++ 25/arch/i386/kernel/cpu/intel.c 2003-12-28 23:26:36.000000000 -0800 +@@ -1,5 +1,7 @@ ++#include + #include + #include ++ + #include + #include + #include +@@ -8,10 +10,15 @@ + #include + #include + #include ++#include + + #include "cpu.h" + +-extern int trap_init_f00f_bug(void); ++#ifdef CONFIG_X86_LOCAL_APIC ++#include ++#include ++#include ++#endif + + #ifdef CONFIG_X86_INTEL_USERCOPY + /* +@@ -157,7 +164,7 @@ static void __init init_intel(struct cpu + + c->f00f_bug = 1; + if ( !f00f_workaround_enabled ) { +- trap_init_f00f_bug(); ++ trap_init_virtual_IDT(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } +@@ -240,6 +247,12 @@ static void __init init_intel(struct cpu + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_bit(X86_FEATURE_SEP, c->x86_capability); ++ /* ++ * FIXME: SEP is disabled for 4G/4G for now: ++ */ ++#ifdef CONFIG_X86_HIGH_ENTRY ++ clear_bit(X86_FEATURE_SEP, c->x86_capability); ++#endif + + /* Names for the Pentium II/Celeron processors + detectable only by also checking the cache size. +@@ -277,6 +290,7 @@ static void __init init_intel(struct cpu + extern int phys_proc_id[NR_CPUS]; + + u32 eax, ebx, ecx, edx; ++ int index_lsb, index_msb, tmp; + int cpu = smp_processor_id(); + + cpuid(1, &eax, &ebx, &ecx, &edx); +@@ -285,6 +299,8 @@ static void __init init_intel(struct cpu + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1 ) { ++ index_lsb = 0; ++ index_msb = 31; + /* + * At this point we only support two siblings per + * processor package. +@@ -295,13 +311,19 @@ static void __init init_intel(struct cpu + smp_num_siblings = 1; + goto too_many_siblings; + } +- /* cpuid returns the value latched in the HW at reset, +- * not the APIC ID register's value. For any box +- * whose BIOS changes APIC IDs, like clustered APIC +- * systems, we must use hard_smp_processor_id. +- * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. +- */ +- phys_proc_id[cpu] = hard_smp_processor_id() & ~(smp_num_siblings - 1); ++ tmp = smp_num_siblings; ++ while ((tmp & 1) == 0) { ++ tmp >>=1 ; ++ index_lsb++; ++ } ++ tmp = smp_num_siblings; ++ while ((tmp & 0x80000000 ) == 0) { ++ tmp <<=1 ; ++ index_msb--; ++ } ++ if (index_lsb != index_msb ) ++ index_msb++; ++ phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + phys_proc_id[cpu]); +--- linux-2.6.0/arch/i386/kernel/dmi_scan.c 2003-10-08 15:07:08.000000000 -0700 ++++ 25/arch/i386/kernel/dmi_scan.c 2003-12-28 23:21:33.000000000 -0800 +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -16,6 +17,7 @@ EXPORT_SYMBOL(dmi_broken); + + int is_sony_vaio_laptop; + int is_unsafe_smbus; ++int es7000_plat = 0; + + struct dmi_header + { +@@ -504,6 +506,7 @@ static __init int print_if_true(struct d + } + + ++#ifdef CONFIG_ACPI_BOOT + extern int acpi_disabled, acpi_force; + + static __init __attribute__((unused)) int acpi_disable(struct dmi_blacklist *d) +@@ -518,8 +521,6 @@ static __init __attribute__((unused)) in + return 0; + } + +- +-#ifdef CONFIG_ACPI_BOOT + extern int acpi_ht; + + /* +@@ -542,10 +543,8 @@ static __init __attribute__((unused)) in + #ifdef CONFIG_ACPI_PCI + static __init int disable_acpi_pci(struct dmi_blacklist *d) + { +- extern __init void pci_disable_acpi(void) ; +- + printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", d->ident); +- pci_disable_acpi(); ++ acpi_noirq_set(); + return 0; + } + #endif +@@ -1011,6 +1010,7 @@ static __init void dmi_check_blacklist(v + printk(KERN_NOTICE "ACPI disabled because your bios is from %s and too old\n", s); + printk(KERN_NOTICE "You can enable it with acpi=force\n"); + acpi_disabled = 1; ++ acpi_ht = 0; + } + } + } +--- linux-2.6.0/arch/i386/kernel/doublefault.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/doublefault.c 2003-12-28 23:26:36.000000000 -0800 +@@ -7,12 +7,13 @@ + #include + #include + #include ++#include + + #define DOUBLEFAULT_STACKSIZE (1024) + static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; + #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) + +-#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000) ++#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START)) + + static void doublefault_fn(void) + { +@@ -38,8 +39,8 @@ static void doublefault_fn(void) + + printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", + t->eax, t->ebx, t->ecx, t->edx); +- printk("esi = %08lx, edi = %08lx\n", +- t->esi, t->edi); ++ printk("esi = %08lx, edi = %08lx, ebp = %08lx\n", ++ t->esi, t->edi, t->ebp); + } + } + +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25/arch/i386/kernel/efi.c 2003-12-28 23:21:45.000000000 -0800 +@@ -0,0 +1,645 @@ ++/* ++ * Extensible Firmware Interface ++ * ++ * Based on Extensible Firmware Interface Specification version 1.0 ++ * ++ * Copyright (C) 1999 VA Linux Systems ++ * Copyright (C) 1999 Walt Drummond ++ * Copyright (C) 1999-2002 Hewlett-Packard Co. ++ * David Mosberger-Tang ++ * Stephane Eranian ++ * ++ * All EFI Runtime Services are not implemented yet as EFI only ++ * supports physical mode addressing on SoftSDV. This is to be fixed ++ * in a future version. --drummond 1999-07-20 ++ * ++ * Implemented EFI runtime services and virtual mode calls. --davidm ++ * ++ * Goutham Rao: ++ * Skip non-WB memory and ignore empty memory ranges. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define EFI_DEBUG 0 ++#define PFX "EFI: " ++ ++extern efi_status_t asmlinkage efi_call_phys(void *, ...); ++ ++struct efi efi; ++struct efi efi_phys __initdata; ++struct efi_memory_map memmap __initdata; ++ ++/* ++ * We require an early boot_ioremap mapping mechanism initially ++ */ ++extern void * boot_ioremap(unsigned long, unsigned long); ++ ++/* ++ * efi_dir is allocated here, but the directory isn't created ++ * here, as proc_mkdir() doesn't work this early in the bootup ++ * process. Therefore, each module, like efivars, must test for ++ * if (!efi_dir) efi_dir = proc_mkdir("efi", NULL); ++ * prior to creating their own entries under /proc/efi. ++ */ ++#ifdef CONFIG_PROC_FS ++struct proc_dir_entry *efi_dir; ++#endif ++ ++ ++/* ++ * To make EFI call EFI runtime service in physical addressing mode we need ++ * prelog/epilog before/after the invocation to disable interrupt, to ++ * claim EFI runtime service handler exclusively and to duplicate a memory in ++ * low memory space say 0 - 3G. ++ */ ++ ++static unsigned long efi_rt_eflags; ++static spinlock_t efi_rt_lock = SPIN_LOCK_UNLOCKED; ++static pgd_t efi_bak_pg_dir_pointer[2]; ++ ++static void efi_call_phys_prelog(void) ++{ ++ unsigned long cr4; ++ unsigned long temp; ++ ++ spin_lock(&efi_rt_lock); ++ local_irq_save(efi_rt_eflags); ++ ++ /* ++ * If I don't have PSE, I should just duplicate two entries in page ++ * directory. If I have PSE, I just need to duplicate one entry in ++ * page directory. ++ */ ++ __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); ++ ++ if (cr4 & X86_CR4_PSE) { ++ efi_bak_pg_dir_pointer[0].pgd = ++ swapper_pg_dir[pgd_index(0)].pgd; ++ swapper_pg_dir[0].pgd = ++ swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; ++ } else { ++ efi_bak_pg_dir_pointer[0].pgd = ++ swapper_pg_dir[pgd_index(0)].pgd; ++ efi_bak_pg_dir_pointer[1].pgd = ++ swapper_pg_dir[pgd_index(0x400000)].pgd; ++ swapper_pg_dir[pgd_index(0)].pgd = ++ swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; ++ temp = PAGE_OFFSET + 0x400000; ++ swapper_pg_dir[pgd_index(0x400000)].pgd = ++ swapper_pg_dir[pgd_index(temp)].pgd; ++ } ++ ++ /* ++ * After the lock is released, the original page table is restored. ++ */ ++ local_flush_tlb(); ++ ++ cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); ++ __asm__ __volatile__("lgdt %0":"=m" ++ (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]))); ++} ++ ++static void efi_call_phys_epilog(void) ++{ ++ unsigned long cr4; ++ ++ cpu_gdt_descr[0].address = ++ (unsigned long) __va(cpu_gdt_descr[0].address); ++ __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); ++ __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); ++ ++ if (cr4 & X86_CR4_PSE) { ++ swapper_pg_dir[pgd_index(0)].pgd = ++ efi_bak_pg_dir_pointer[0].pgd; ++ } else { ++ swapper_pg_dir[pgd_index(0)].pgd = ++ efi_bak_pg_dir_pointer[0].pgd; ++ swapper_pg_dir[pgd_index(0x400000)].pgd = ++ efi_bak_pg_dir_pointer[1].pgd; ++ } ++ ++ /* ++ * After the lock is released, the original page table is restored. ++ */ ++ local_flush_tlb(); ++ ++ local_irq_restore(efi_rt_eflags); ++ spin_unlock(&efi_rt_lock); ++} ++ ++static efi_status_t ++phys_efi_set_virtual_address_map(unsigned long memory_map_size, ++ unsigned long descriptor_size, ++ u32 descriptor_version, ++ efi_memory_desc_t *virtual_map) ++{ ++ efi_status_t status; ++ ++ efi_call_phys_prelog(); ++ status = efi_call_phys(efi_phys.set_virtual_address_map, ++ memory_map_size, descriptor_size, ++ descriptor_version, virtual_map); ++ efi_call_phys_epilog(); ++ return status; ++} ++ ++efi_status_t ++phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) ++{ ++ efi_status_t status; ++ ++ efi_call_phys_prelog(); ++ status = efi_call_phys(efi_phys.get_time, tm, tc); ++ efi_call_phys_epilog(); ++ return status; ++} ++ ++int inline efi_set_rtc_mmss(unsigned long nowtime) ++{ ++ int real_seconds, real_minutes; ++ efi_status_t status; ++ efi_time_t eft; ++ efi_time_cap_t cap; ++ ++ spin_lock(&efi_rt_lock); ++ status = efi.get_time(&eft, &cap); ++ spin_unlock(&efi_rt_lock); ++ if (status != EFI_SUCCESS) ++ panic("Ooops, efitime: can't read time!\n"); ++ real_seconds = nowtime % 60; ++ real_minutes = nowtime / 60; ++ ++ if (((abs(real_minutes - eft.minute) + 15)/30) & 1) ++ real_minutes += 30; ++ real_minutes %= 60; ++ ++ eft.minute = real_minutes; ++ eft.second = real_seconds; ++ ++ if (status != EFI_SUCCESS) { ++ printk("Ooops: efitime: can't read time!\n"); ++ return -1; ++ } ++ return 0; ++} ++/* ++ * This should only be used during kernel init and before runtime ++ * services have been remapped, therefore, we'll need to call in physical ++ * mode. Note, this call isn't used later, so mark it __init. ++ */ ++unsigned long inline __init efi_get_time(void) ++{ ++ efi_status_t status; ++ efi_time_t eft; ++ efi_time_cap_t cap; ++ ++ status = phys_efi_get_time(&eft, &cap); ++ if (status != EFI_SUCCESS) ++ printk("Oops: efitime: can't read time status: 0x%lx\n",status); ++ ++ return mktime(eft.year, eft.month, eft.day, eft.hour, ++ eft.minute, eft.second); ++} ++ ++int is_available_memory(efi_memory_desc_t * md) ++{ ++ if (!(md->attribute & EFI_MEMORY_WB)) ++ return 0; ++ ++ switch (md->type) { ++ case EFI_LOADER_CODE: ++ case EFI_LOADER_DATA: ++ case EFI_BOOT_SERVICES_CODE: ++ case EFI_BOOT_SERVICES_DATA: ++ case EFI_CONVENTIONAL_MEMORY: ++ return 1; ++ } ++ return 0; ++} ++ ++/* ++ * We need to map the EFI memory map again after paging_init(). ++ */ ++void __init efi_map_memmap(void) ++{ ++ memmap.map = NULL; ++ ++ memmap.map = (efi_memory_desc_t *) ++ bt_ioremap((unsigned long) memmap.phys_map, ++ (memmap.nr_map * sizeof(efi_memory_desc_t))); ++ ++ if (memmap.map == NULL) ++ printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); ++} ++ ++void __init print_efi_memmap(void) ++{ ++ efi_memory_desc_t *md; ++ int i; ++ ++ for (i = 0; i < memmap.nr_map; i++) { ++ md = &memmap.map[i]; ++ printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " ++ "range=[0x%016llx-0x%016llx) (%lluMB)\n", ++ i, md->type, md->attribute, md->phys_addr, ++ md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), ++ (md->num_pages >> (20 - EFI_PAGE_SHIFT))); ++ } ++} ++ ++/* ++ * Walks the EFI memory map and calls CALLBACK once for each EFI ++ * memory descriptor that has memory that is available for kernel use. ++ */ ++void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) ++{ ++ int prev_valid = 0; ++ struct range { ++ unsigned long start; ++ unsigned long end; ++ } prev, curr; ++ efi_memory_desc_t *md; ++ unsigned long start, end; ++ int i; ++ ++ for (i = 0; i < memmap.nr_map; i++) { ++ md = &memmap.map[i]; ++ ++ if ((md->num_pages == 0) || (!is_available_memory(md))) ++ continue; ++ ++ curr.start = md->phys_addr; ++ curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); ++ ++ if (!prev_valid) { ++ prev = curr; ++ prev_valid = 1; ++ } else { ++ if (curr.start < prev.start) ++ printk(KERN_INFO PFX "Unordered memory map\n"); ++ if (prev.end == curr.start) ++ prev.end = curr.end; ++ else { ++ start = ++ (unsigned long) (PAGE_ALIGN(prev.start)); ++ end = (unsigned long) (prev.end & PAGE_MASK); ++ if ((end > start) ++ && (*callback) (start, end, arg) < 0) ++ return; ++ prev = curr; ++ } ++ } ++ } ++ if (prev_valid) { ++ start = (unsigned long) PAGE_ALIGN(prev.start); ++ end = (unsigned long) (prev.end & PAGE_MASK); ++ if (end > start) ++ (*callback) (start, end, arg); ++ } ++} ++ ++void __init efi_init(void) ++{ ++ efi_config_table_t *config_tables; ++ efi_runtime_services_t *runtime; ++ efi_char16_t *c16; ++ char vendor[100] = "unknown"; ++ unsigned long num_config_tables; ++ int i = 0; ++ ++ memset(&efi, 0, sizeof(efi) ); ++ memset(&efi_phys, 0, sizeof(efi_phys)); ++ ++ efi_phys.systab = EFI_SYSTAB; ++ memmap.phys_map = EFI_MEMMAP; ++ memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; ++ memmap.desc_version = EFI_MEMDESC_VERSION; ++ ++ efi.systab = (efi_system_table_t *) ++ boot_ioremap((unsigned long) efi_phys.systab, ++ sizeof(efi_system_table_t)); ++ /* ++ * Verify the EFI Table ++ */ ++ if (efi.systab == NULL) ++ printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n"); ++ if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) ++ printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n"); ++ if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0) ++ printk(KERN_ERR PFX ++ "Warning: EFI system table major version mismatch: " ++ "got %d.%02d, expected %d.%02d\n", ++ efi.systab->hdr.revision >> 16, ++ efi.systab->hdr.revision & 0xffff, ++ EFI_SYSTEM_TABLE_REVISION >> 16, ++ EFI_SYSTEM_TABLE_REVISION & 0xffff); ++ /* ++ * Grab some details from the system table ++ */ ++ num_config_tables = efi.systab->nr_tables; ++ config_tables = (efi_config_table_t *)efi.systab->tables; ++ runtime = efi.systab->runtime; ++ ++ /* ++ * Show what we know for posterity ++ */ ++ c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2); ++ if (c16) { ++ for (i = 0; i < sizeof(vendor) && *c16; ++i) ++ vendor[i] = *c16++; ++ vendor[i] = '\0'; ++ } else ++ printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); ++ ++ printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n", ++ efi.systab->hdr.revision >> 16, ++ efi.systab->hdr.revision & 0xffff, vendor); ++ ++ /* ++ * Let's see what config tables the firmware passed to us. ++ */ ++ config_tables = (efi_config_table_t *) ++ boot_ioremap((unsigned long) config_tables, ++ num_config_tables * sizeof(efi_config_table_t)); ++ ++ if (config_tables == NULL) ++ printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n"); ++ ++ for (i = 0; i < num_config_tables; i++) { ++ if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { ++ efi.mps = (void *)config_tables[i].table; ++ printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table); ++ } else ++ if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { ++ efi.acpi20 = __va(config_tables[i].table); ++ printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table); ++ } else ++ if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { ++ efi.acpi = __va(config_tables[i].table); ++ printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table); ++ } else ++ if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { ++ efi.smbios = (void *) config_tables[i].table; ++ printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table); ++ } else ++ if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { ++ efi.hcdp = (void *)config_tables[i].table; ++ printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table); ++ } else ++ if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) { ++ efi.uga = (void *)config_tables[i].table; ++ printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table); ++ } ++ } ++ printk("\n"); ++ ++ /* ++ * Check out the runtime services table. We need to map ++ * the runtime services table so that we can grab the physical ++ * address of several of the EFI runtime functions, needed to ++ * set the firmware into virtual mode. ++ */ ++ ++ runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long) ++ runtime, ++ sizeof(efi_runtime_services_t)); ++ if (runtime != NULL) { ++ /* ++ * We will only need *early* access to the following ++ * two EFI runtime services before set_virtual_address_map ++ * is invoked. ++ */ ++ efi_phys.get_time = (efi_get_time_t *) runtime->get_time; ++ efi_phys.set_virtual_address_map = ++ (efi_set_virtual_address_map_t *) ++ runtime->set_virtual_address_map; ++ } else ++ printk(KERN_ERR PFX "Could not map the runtime service table!\n"); ++ ++ /* Map the EFI memory map for use until paging_init() */ ++ ++ memmap.map = (efi_memory_desc_t *) ++ boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); ++ ++ if (memmap.map == NULL) ++ printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); ++ ++ if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) { ++ printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't " ++ "match the one from EFI!\n"); ++ } ++#if EFI_DEBUG ++ print_efi_memmap(); ++#endif ++} ++ ++/* ++ * This function will switch the EFI runtime services to virtual mode. ++ * Essentially, look through the EFI memmap and map every region that ++ * has the runtime attribute bit set in its memory descriptor and update ++ * that memory descriptor with the virtual address obtained from ioremap(). ++ * This enables the runtime services to be called without having to ++ * thunk back into physical mode for every invocation. ++ */ ++ ++void __init efi_enter_virtual_mode(void) ++{ ++ efi_memory_desc_t *md; ++ efi_status_t status; ++ int i; ++ ++ efi.systab = NULL; ++ ++ for (i = 0; i < memmap.nr_map; i++) { ++ md = &memmap.map[i]; ++ ++ if (md->attribute & EFI_MEMORY_RUNTIME) { ++ md->virt_addr = ++ (unsigned long)ioremap(md->phys_addr, ++ md->num_pages << EFI_PAGE_SHIFT); ++ if (!(unsigned long)md->virt_addr) { ++ printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", ++ (unsigned long)md->phys_addr); ++ } ++ ++ if (((unsigned long)md->phys_addr <= ++ (unsigned long)efi_phys.systab) && ++ ((unsigned long)efi_phys.systab < ++ md->phys_addr + ++ ((unsigned long)md->num_pages << ++ EFI_PAGE_SHIFT))) { ++ unsigned long addr; ++ ++ addr = md->virt_addr - md->phys_addr + ++ (unsigned long)efi_phys.systab; ++ efi.systab = (efi_system_table_t *)addr; ++ } ++ } ++ } ++ ++ if (!efi.systab) ++ BUG(); ++ ++ status = phys_efi_set_virtual_address_map( ++ sizeof(efi_memory_desc_t) * memmap.nr_map, ++ sizeof(efi_memory_desc_t), ++ memmap.desc_version, ++ memmap.phys_map); ++ ++ if (status != EFI_SUCCESS) { ++ printk (KERN_ALERT "You are screwed! " ++ "Unable to switch EFI into virtual mode " ++ "(status=%lx)\n", status); ++ panic("EFI call to SetVirtualAddressMap() failed!"); ++ } ++ ++ /* ++ * Now that EFI is in virtual mode, update the function ++ * pointers in the runtime service table to the new virtual addresses. ++ */ ++ ++ efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time; ++ efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time; ++ efi.get_wakeup_time = (efi_get_wakeup_time_t *) ++ efi.systab->runtime->get_wakeup_time; ++ efi.set_wakeup_time = (efi_set_wakeup_time_t *) ++ efi.systab->runtime->set_wakeup_time; ++ efi.get_variable = (efi_get_variable_t *) ++ efi.systab->runtime->get_variable; ++ efi.get_next_variable = (efi_get_next_variable_t *) ++ efi.systab->runtime->get_next_variable; ++ efi.set_variable = (efi_set_variable_t *) ++ efi.systab->runtime->set_variable; ++ efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *) ++ efi.systab->runtime->get_next_high_mono_count; ++ efi.reset_system = (efi_reset_system_t *) ++ efi.systab->runtime->reset_system; ++} ++ ++void __init ++efi_initialize_iomem_resources(struct resource *code_resource, ++ struct resource *data_resource) ++{ ++ struct resource *res; ++ efi_memory_desc_t *md; ++ int i; ++ ++ for (i = 0; i < memmap.nr_map; i++) { ++ md = &memmap.map[i]; ++ ++ if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > ++ 0x100000000ULL) ++ continue; ++ res = alloc_bootmem_low(sizeof(struct resource)); ++ switch (md->type) { ++ case EFI_RESERVED_TYPE: ++ res->name = "Reserved Memory"; ++ break; ++ case EFI_LOADER_CODE: ++ res->name = "Loader Code"; ++ break; ++ case EFI_LOADER_DATA: ++ res->name = "Loader Data"; ++ break; ++ case EFI_BOOT_SERVICES_DATA: ++ res->name = "BootServices Data"; ++ break; ++ case EFI_BOOT_SERVICES_CODE: ++ res->name = "BootServices Code"; ++ break; ++ case EFI_RUNTIME_SERVICES_CODE: ++ res->name = "Runtime Service Code"; ++ break; ++ case EFI_RUNTIME_SERVICES_DATA: ++ res->name = "Runtime Service Data"; ++ break; ++ case EFI_CONVENTIONAL_MEMORY: ++ res->name = "Conventional Memory"; ++ break; ++ case EFI_UNUSABLE_MEMORY: ++ res->name = "Unusable Memory"; ++ break; ++ case EFI_ACPI_RECLAIM_MEMORY: ++ res->name = "ACPI Reclaim"; ++ break; ++ case EFI_ACPI_MEMORY_NVS: ++ res->name = "ACPI NVS"; ++ break; ++ case EFI_MEMORY_MAPPED_IO: ++ res->name = "Memory Mapped IO"; ++ break; ++ case EFI_MEMORY_MAPPED_IO_PORT_SPACE: ++ res->name = "Memory Mapped IO Port Space"; ++ break; ++ default: ++ res->name = "Reserved"; ++ break; ++ } ++ res->start = md->phys_addr; ++ res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1); ++ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; ++ if (request_resource(&iomem_resource, res) < 0) ++ printk(KERN_ERR PFX "Failed to allocate res %s : 0x%lx-0x%lx\n", ++ res->name, res->start, res->end); ++ /* ++ * We don't know which region contains kernel data so we try ++ * it repeatedly and let the resource manager test it. ++ */ ++ if (md->type == EFI_CONVENTIONAL_MEMORY) { ++ request_resource(res, code_resource); ++ request_resource(res, data_resource); ++ } ++ } ++} ++ ++/* ++ * Convenience functions to obtain memory types and attributes ++ */ ++ ++u32 efi_mem_type(unsigned long phys_addr) ++{ ++ efi_memory_desc_t *md; ++ int i; ++ ++ for (i = 0; i < memmap.nr_map; i++) { ++ md = &memmap.map[i]; ++ if ((md->phys_addr <= phys_addr) && (phys_addr < ++ (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) ++ return md->type; ++ } ++ return 0; ++} ++ ++u64 efi_mem_attributes(unsigned long phys_addr) ++{ ++ efi_memory_desc_t *md; ++ int i; ++ ++ for (i = 0; i < memmap.nr_map; i++) { ++ md = &memmap.map[i]; ++ if ((md->phys_addr <= phys_addr) && (phys_addr < ++ (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) ++ return md->attribute; ++ } ++ return 0; ++} +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25/arch/i386/kernel/efi_stub.S 2003-12-28 23:21:45.000000000 -0800 +@@ -0,0 +1,124 @@ ++/* ++ * EFI call stub for IA32. ++ * ++ * This stub allows us to make EFI calls in physical mode with interrupts ++ * turned off. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++/* ++ * efi_call_phys(void *, ...) is a function with variable parameters. ++ * All the callers of this function assure that all the parameters are 4-bytes. ++ */ ++ ++/* ++ * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. ++ * So we'd better save all of them at the beginning of this function and restore ++ * at the end no matter how many we use, because we can not assure EFI runtime ++ * service functions will comply with gcc calling convention, too. ++ */ ++ ++.text ++ENTRY(efi_call_phys) ++ /* ++ * 0. The function can only be called in Linux kernel. So CS has been ++ * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found ++ * the values of these registers are the same. And, the corresponding ++ * GDT entries are identical. So I will do nothing about segment reg ++ * and GDT, but change GDT base register in prelog and epilog. ++ */ ++ ++ /* ++ * 1. Now I am running with EIP = + PAGE_OFFSET. ++ * But to make it smoothly switch from virtual mode to flat mode. ++ * The mapping of lower virtual memory has been created in prelog and ++ * epilog. ++ */ ++ movl $1f, %edx ++ subl $__PAGE_OFFSET, %edx ++ jmp *%edx ++1: ++ ++ /* ++ * 2. Now on the top of stack is the return ++ * address in the caller of efi_call_phys(), then parameter 1, ++ * parameter 2, ..., param n. To make things easy, we save the return ++ * address of efi_call_phys in a global variable. ++ */ ++ popl %edx ++ movl %edx, saved_return_addr ++ /* get the function pointer into ECX*/ ++ popl %ecx ++ movl %ecx, efi_rt_function_ptr ++ movl $2f, %edx ++ subl $__PAGE_OFFSET, %edx ++ pushl %edx ++ ++ /* ++ * 3. Clear PG bit in %CR0. ++ */ ++ movl %cr0, %edx ++ andl $0x7fffffff, %edx ++ movl %edx, %cr0 ++ jmp 1f ++1: ++ ++ /* ++ * 4. Adjust stack pointer. ++ */ ++ subl $__PAGE_OFFSET, %esp ++ ++ /* ++ * 5. Call the physical function. ++ */ ++ jmp *%ecx ++ ++2: ++ /* ++ * 6. After EFI runtime service returns, control will return to ++ * following instruction. We'd better readjust stack pointer first. ++ */ ++ addl $__PAGE_OFFSET, %esp ++ ++ /* ++ * 7. Restore PG bit ++ */ ++ movl %cr0, %edx ++ orl $0x80000000, %edx ++ movl %edx, %cr0 ++ jmp 1f ++1: ++ /* ++ * 8. Now restore the virtual mode from flat mode by ++ * adding EIP with PAGE_OFFSET. ++ */ ++ movl $1f, %edx ++ jmp *%edx ++1: ++ ++ /* ++ * 9. Balance the stack. And because EAX contain the return value, ++ * we'd better not clobber it. ++ */ ++ leal efi_rt_function_ptr, %edx ++ movl (%edx), %ecx ++ pushl %ecx ++ ++ /* ++ * 10. Push the saved return address onto the stack and return. ++ */ ++ leal saved_return_addr, %edx ++ movl (%edx), %ecx ++ pushl %ecx ++ ret ++.previous ++ ++.data ++saved_return_addr: ++ .long 0 ++efi_rt_function_ptr: ++ .long 0 +--- linux-2.6.0/arch/i386/kernel/entry.S 2003-11-23 19:03:00.000000000 -0800 ++++ 25/arch/i386/kernel/entry.S 2003-12-28 23:26:36.000000000 -0800 +@@ -43,11 +43,25 @@ + #include + #include + #include ++#include + #include + #include ++#include + #include + #include + #include "irq_vectors.h" ++ /* We do not recover from a stack overflow, but at least ++ * we know it happened and should be able to track it down. ++ */ ++#ifdef CONFIG_STACK_OVERFLOW_TEST ++#define STACK_OVERFLOW_TEST \ ++ testl $7680,%esp; \ ++ jnz 10f; \ ++ call stack_overflow; \ ++10: ++#else ++#define STACK_OVERFLOW_TEST ++#endif + + #define nr_syscalls ((syscall_table_size)/4) + +@@ -87,7 +101,102 @@ TSS_ESP0_OFFSET = (4 - 0x200) + #define resume_kernel restore_all + #endif + +-#define SAVE_ALL \ ++#ifdef CONFIG_X86_HIGH_ENTRY ++ ++#ifdef CONFIG_X86_SWITCH_PAGETABLES ++ ++#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) ++/* ++ * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu, ++ * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is ++ * left stale, so we must check whether to repeat the real stack calculation. ++ */ ++#define repeat_if_esp_changed \ ++ xorl %esp, %ebp; \ ++ testl $0xffffe000, %ebp; \ ++ jnz 0b ++#else ++#define repeat_if_esp_changed ++#endif ++ ++/* clobbers ebx, edx and ebp */ ++ ++#define __SWITCH_KERNELSPACE \ ++ cmpl $0xff000000, %esp; \ ++ jb 1f; \ ++ \ ++ /* \ ++ * switch pagetables and load the real stack, \ ++ * keep the stack offset: \ ++ */ \ ++ \ ++ movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \ ++ \ ++ /* GET_THREAD_INFO(%ebp) intermixed */ \ ++0: \ ++ movl %esp, %ebp; \ ++ movl %esp, %ebx; \ ++ andl $0xffffe000, %ebp; \ ++ andl $0x00001fff, %ebx; \ ++ orl TI_real_stack(%ebp), %ebx; \ ++ repeat_if_esp_changed; \ ++ \ ++ movl %edx, %cr3; \ ++ movl %ebx, %esp; \ ++1: ++ ++#endif ++ ++ ++#define __SWITCH_USERSPACE \ ++ /* interrupted any of the user return paths? */ \ ++ \ ++ movl EIP(%esp), %eax; \ ++ \ ++ cmpl $int80_ret_start_marker, %eax; \ ++ jb 33f; /* nope - continue with sysexit check */\ ++ cmpl $int80_ret_end_marker, %eax; \ ++ jb 22f; /* yes - switch to virtual stack */ \ ++33: \ ++ cmpl $sysexit_ret_start_marker, %eax; \ ++ jb 44f; /* nope - continue with user check */ \ ++ cmpl $sysexit_ret_end_marker, %eax; \ ++ jb 22f; /* yes - switch to virtual stack */ \ ++ /* return to userspace? */ \ ++44: \ ++ movl EFLAGS(%esp),%ecx; \ ++ movb CS(%esp),%cl; \ ++ testl $(VM_MASK | 3),%ecx; \ ++ jz 2f; \ ++22: \ ++ /* \ ++ * switch to the virtual stack, then switch to \ ++ * the userspace pagetables. \ ++ */ \ ++ \ ++ GET_THREAD_INFO(%ebp); \ ++ movl TI_virtual_stack(%ebp), %edx; \ ++ movl TI_user_pgd(%ebp), %ecx; \ ++ \ ++ movl %esp, %ebx; \ ++ andl $0x1fff, %ebx; \ ++ orl %ebx, %edx; \ ++int80_ret_start_marker: \ ++ movl %edx, %esp; \ ++ movl %ecx, %cr3; \ ++ \ ++ __RESTORE_ALL; \ ++int80_ret_end_marker: \ ++2: ++ ++#else /* !CONFIG_X86_HIGH_ENTRY */ ++ ++#define __SWITCH_KERNELSPACE ++#define __SWITCH_USERSPACE ++ ++#endif ++ ++#define __SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ +@@ -102,7 +211,7 @@ TSS_ESP0_OFFSET = (4 - 0x200) + movl %edx, %ds; \ + movl %edx, %es; + +-#define RESTORE_INT_REGS \ ++#define __RESTORE_INT_REGS \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ +@@ -111,29 +220,28 @@ TSS_ESP0_OFFSET = (4 - 0x200) + popl %ebp; \ + popl %eax + +-#define RESTORE_REGS \ +- RESTORE_INT_REGS; \ +-1: popl %ds; \ +-2: popl %es; \ ++#define __RESTORE_REGS \ ++ __RESTORE_INT_REGS; \ ++111: popl %ds; \ ++222: popl %es; \ + .section .fixup,"ax"; \ +-3: movl $0,(%esp); \ +- jmp 1b; \ +-4: movl $0,(%esp); \ +- jmp 2b; \ ++444: movl $0,(%esp); \ ++ jmp 111b; \ ++555: movl $0,(%esp); \ ++ jmp 222b; \ + .previous; \ + .section __ex_table,"a";\ + .align 4; \ +- .long 1b,3b; \ +- .long 2b,4b; \ ++ .long 111b,444b;\ ++ .long 222b,555b;\ + .previous + +- +-#define RESTORE_ALL \ +- RESTORE_REGS \ ++#define __RESTORE_ALL \ ++ __RESTORE_REGS \ + addl $4, %esp; \ +-1: iret; \ ++333: iret; \ + .section .fixup,"ax"; \ +-2: sti; \ ++666: sti; \ + movl $(__USER_DS), %edx; \ + movl %edx, %ds; \ + movl %edx, %es; \ +@@ -142,10 +250,19 @@ TSS_ESP0_OFFSET = (4 - 0x200) + .previous; \ + .section __ex_table,"a";\ + .align 4; \ +- .long 1b,2b; \ ++ .long 333b,666b;\ + .previous + ++#define SAVE_ALL \ ++ __SAVE_ALL; \ ++ __SWITCH_KERNELSPACE; \ ++ STACK_OVERFLOW_TEST; ++ ++#define RESTORE_ALL \ ++ __SWITCH_USERSPACE; \ ++ __RESTORE_ALL; + ++.section .entry.text,"ax" + + ENTRY(lcall7) + pushfl # We get a different stack layout with call +@@ -163,7 +280,7 @@ do_lcall: + movl %edx,EIP(%ebp) # Now we move them to their "normal" places + movl %ecx,CS(%ebp) # + andl $-8192, %ebp # GET_THREAD_INFO +- movl TI_EXEC_DOMAIN(%ebp), %edx # Get the execution domain ++ movl TI_exec_domain(%ebp), %edx # Get the execution domain + call *4(%edx) # Call the lcall7 handler for the domain + addl $4, %esp + popl %eax +@@ -208,7 +325,7 @@ ENTRY(resume_userspace) + cli # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret +- movl TI_FLAGS(%ebp), %ecx ++ movl TI_flags(%ebp), %ecx + andl $_TIF_WORK_MASK, %ecx # is there any work to be done on + # int/exception return? + jne work_pending +@@ -216,18 +333,18 @@ ENTRY(resume_userspace) + + #ifdef CONFIG_PREEMPT + ENTRY(resume_kernel) +- cmpl $0,TI_PRE_COUNT(%ebp) # non-zero preempt_count ? ++ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? + jnz restore_all + need_resched: +- movl TI_FLAGS(%ebp), %ecx # need_resched set ? ++ movl TI_flags(%ebp), %ecx # need_resched set ? + testb $_TIF_NEED_RESCHED, %cl + jz restore_all + testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? + jz restore_all +- movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebp) ++ movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp) + sti + call schedule +- movl $0,TI_PRE_COUNT(%ebp) ++ movl $0,TI_preempt_count(%ebp) + cli + jmp need_resched + #endif +@@ -246,37 +363,50 @@ sysenter_past_esp: + pushl $(__USER_CS) + pushl $SYSENTER_RETURN + +-/* +- * Load the potential sixth argument from user stack. +- * Careful about security. +- */ +- cmpl $__PAGE_OFFSET-3,%ebp +- jae syscall_fault +-1: movl (%ebp),%ebp +-.section __ex_table,"a" +- .align 4 +- .long 1b,syscall_fault +-.previous +- + pushl %eax + SAVE_ALL + GET_THREAD_INFO(%ebp) + cmpl $(nr_syscalls), %eax + jae syscall_badsys + +- testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) ++ testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) + jnz syscall_trace_entry + call *sys_call_table(,%eax,4) + movl %eax,EAX(%esp) + cli +- movl TI_FLAGS(%ebp), %ecx ++ movl TI_flags(%ebp), %ecx + testw $_TIF_ALLWORK_MASK, %cx + jne syscall_exit_work ++ ++#ifdef CONFIG_X86_SWITCH_PAGETABLES ++ ++ GET_THREAD_INFO(%ebp) ++ movl TI_virtual_stack(%ebp), %edx ++ movl TI_user_pgd(%ebp), %ecx ++ movl %esp, %ebx ++ andl $0x1fff, %ebx ++ orl %ebx, %edx ++sysexit_ret_start_marker: ++ movl %edx, %esp ++ movl %ecx, %cr3 ++#endif ++ /* ++ * only ebx is not restored by the userspace sysenter vsyscall ++ * code, it assumes it to be callee-saved. ++ */ ++ movl EBX(%esp), %ebx ++ + /* if something modifies registers it must also disable sysexit */ ++ + movl EIP(%esp), %edx + movl OLDESP(%esp), %ecx ++ + sti + sysexit ++#ifdef CONFIG_X86_SWITCH_PAGETABLES ++sysexit_ret_end_marker: ++ nop ++#endif + + + # system call handler stub +@@ -287,7 +417,7 @@ ENTRY(system_call) + cmpl $(nr_syscalls), %eax + jae syscall_badsys + # system call tracing in operation +- testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) ++ testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) + jnz syscall_trace_entry + syscall_call: + call *sys_call_table(,%eax,4) +@@ -296,10 +426,23 @@ syscall_exit: + cli # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret +- movl TI_FLAGS(%ebp), %ecx ++ movl TI_flags(%ebp), %ecx + testw $_TIF_ALLWORK_MASK, %cx # current->work + jne syscall_exit_work + restore_all: ++#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS ++ movl EFLAGS(%esp), %eax # mix EFLAGS and CS ++ movb CS(%esp), %al ++ testl $(VM_MASK | 3), %eax ++ jz resume_kernelX # returning to kernel or vm86-space ++ ++ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? ++ jz resume_kernelX ++ ++ int $3 ++ ++resume_kernelX: ++#endif + RESTORE_ALL + + # perform work that needs to be done immediately before resumption +@@ -312,7 +455,7 @@ work_resched: + cli # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret +- movl TI_FLAGS(%ebp), %ecx ++ movl TI_flags(%ebp), %ecx + andl $_TIF_WORK_MASK, %ecx # is there any work to be done other + # than syscall tracing? + jz restore_all +@@ -327,6 +470,22 @@ work_notifysig: # deal with pending s + # vm86-space + xorl %edx, %edx + call do_notify_resume ++ ++#if CONFIG_X86_HIGH_ENTRY ++ /* ++ * Reload db7 if necessary: ++ */ ++ movl TI_flags(%ebp), %ecx ++ testb $_TIF_DB7, %cl ++ jnz work_db7 ++ ++ jmp restore_all ++ ++work_db7: ++ movl TI_task(%ebp), %edx; ++ movl task_thread_db7(%edx), %edx; ++ movl %edx, %db7; ++#endif + jmp restore_all + + ALIGN +@@ -382,7 +541,7 @@ syscall_badsys: + */ + .data + ENTRY(interrupt) +-.text ++.previous + + vector=0 + ENTRY(irq_entries_start) +@@ -392,7 +551,7 @@ ENTRY(irq_entries_start) + jmp common_interrupt + .data + .long 1b +-.text ++.previous + vector=vector+1 + .endr + +@@ -433,12 +592,17 @@ error_code: + movl ES(%esp), %edi # get the function address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) +- movl %esp, %edx + pushl %esi # push the error code +- pushl %edx # push the pt_regs pointer + movl $(__USER_DS), %edx + movl %edx, %ds + movl %edx, %es ++ ++/* clobbers edx, ebx and ebp */ ++ __SWITCH_KERNELSPACE ++ ++ leal 4(%esp), %edx # prepare pt_regs ++ pushl %edx # push pt_regs ++ + call *%edi + addl $8, %esp + jmp ret_from_exception +@@ -529,7 +693,7 @@ nmi_stack_correct: + pushl %edx + call do_nmi + addl $8, %esp +- RESTORE_ALL ++ jmp restore_all + + nmi_stack_fixup: + FIX_STACK(12,nmi_stack_correct, 1) +@@ -606,6 +770,8 @@ ENTRY(spurious_interrupt_bug) + pushl $do_spurious_interrupt_bug + jmp error_code + ++.previous ++ + .data + ENTRY(sys_call_table) + .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25/arch/i386/kernel/entry_trampoline.c 2003-12-28 23:26:36.000000000 -0800 +@@ -0,0 +1,75 @@ ++/* ++ * linux/arch/i386/kernel/entry_trampoline.c ++ * ++ * (C) Copyright 2003 Ingo Molnar ++ * ++ * This file contains the needed support code for 4GB userspace ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text; ++ ++void __init init_entry_mappings(void) ++{ ++#ifdef CONFIG_X86_HIGH_ENTRY ++ void *tramp; ++ ++ /* ++ * We need a high IDT and GDT for the 4G/4G split: ++ */ ++ trap_init_virtual_IDT(); ++ ++ __set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL); ++ __set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL); ++ tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0); ++ ++ printk("mapped 4G/4G trampoline to %p.\n", tramp); ++ BUG_ON((void *)&__start___entry_text != tramp); ++ /* ++ * Virtual kernel stack: ++ */ ++ BUG_ON(__kmap_atomic_vaddr(KM_VSTACK0) & 8191); ++ BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE); ++ BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE); ++ ++ /* ++ * set up the initial thread's virtual stack related ++ * fields: ++ */ ++ current->thread.stack_page0 = virt_to_page((char *)current->thread_info); ++ current->thread.stack_page1 = virt_to_page((char *)current->thread_info + PAGE_SIZE); ++ current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK0); ++ ++ __kunmap_atomic_type(KM_VSTACK0); ++ __kunmap_atomic_type(KM_VSTACK1); ++ __kmap_atomic(current->thread.stack_page0, KM_VSTACK0); ++ __kmap_atomic(current->thread.stack_page1, KM_VSTACK1); ++ ++#endif ++ printk("current: %p\n", current); ++ printk("current->thread_info: %p\n", current->thread_info); ++ current->thread_info->real_stack = (void *)current->thread_info; ++ current->thread_info->user_pgd = NULL; ++ current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE; ++} ++ ++ ++ ++void __init entry_trampoline_setup(void) ++{ ++ /* ++ * old IRQ entries set up by the boot code will still hang ++ * around - they are a sign of hw trouble anyway, now they'll ++ * produce a double fault message. ++ */ ++ trap_init_virtual_GDT(); ++} +--- linux-2.6.0/arch/i386/kernel/head.S 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/head.S 2003-12-28 23:26:36.000000000 -0800 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + #define OLD_CL_MAGIC_ADDR 0x90020 + #define OLD_CL_MAGIC 0xA33F +@@ -330,7 +331,7 @@ ENTRY(stack_start) + + /* This is the default interrupt "handler" :-) */ + int_msg: +- .asciz "Unknown interrupt\n" ++ .asciz "Unknown interrupt or fault at EIP %p %p %p\n" + ALIGN + ignore_int: + cld +@@ -342,9 +343,17 @@ ignore_int: + movl $(__KERNEL_DS),%eax + movl %eax,%ds + movl %eax,%es ++ pushl 16(%esp) ++ pushl 24(%esp) ++ pushl 32(%esp) ++ pushl 40(%esp) + pushl $int_msg + call printk + popl %eax ++ popl %eax ++ popl %eax ++ popl %eax ++ popl %eax + popl %ds + popl %es + popl %edx +@@ -377,23 +386,27 @@ cpu_gdt_descr: + .fill NR_CPUS-1,8,0 # space for the other GDT descriptors + + /* +- * This is initialized to create an identity-mapping at 0-8M (for bootup +- * purposes) and another mapping of the 0-8M area at virtual address ++ * This is initialized to create an identity-mapping at 0-16M (for bootup ++ * purposes) and another mapping of the 0-16M area at virtual address + * PAGE_OFFSET. + */ + .org 0x1000 + ENTRY(swapper_pg_dir) + .long 0x00102007 + .long 0x00103007 +- .fill BOOT_USER_PGD_PTRS-2,4,0 +- /* default: 766 entries */ ++ .long 0x00104007 ++ .long 0x00105007 ++ .fill BOOT_USER_PGD_PTRS-4,4,0 ++ /* default: 764 entries */ + .long 0x00102007 + .long 0x00103007 +- /* default: 254 entries */ +- .fill BOOT_KERNEL_PGD_PTRS-2,4,0 ++ .long 0x00104007 ++ .long 0x00105007 ++ /* default: 252 entries */ ++ .fill BOOT_KERNEL_PGD_PTRS-4,4,0 + + /* +- * The page tables are initialized to only 8MB here - the final page ++ * The page tables are initialized to only 16MB here - the final page + * tables are set up later depending on memory size. + */ + .org 0x2000 +@@ -402,15 +415,21 @@ ENTRY(pg0) + .org 0x3000 + ENTRY(pg1) + ++.org 0x4000 ++ENTRY(pg2) ++ ++.org 0x5000 ++ENTRY(pg3) ++ + /* + * empty_zero_page must immediately follow the page tables ! (The + * initialization loop counts until empty_zero_page) + */ + +-.org 0x4000 ++.org 0x6000 + ENTRY(empty_zero_page) + +-.org 0x5000 ++.org 0x7000 + + /* + * Real beginning of normal "text" segment +@@ -419,12 +438,12 @@ ENTRY(stext) + ENTRY(_stext) + + /* +- * This starts the data section. Note that the above is all +- * in the text section because it has alignment requirements +- * that we cannot fulfill any other way. ++ * This starts the data section. + */ + .data + ++.align PAGE_SIZE_asm ++ + /* + * The Global Descriptor Table contains 28 quadwords, per-CPU. + */ +@@ -439,7 +458,9 @@ ENTRY(boot_gdt_table) + .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ + .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ + #endif +- .align L1_CACHE_BYTES ++ ++.align PAGE_SIZE_asm ++ + ENTRY(cpu_gdt_table) + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x0000000000000000 /* 0x0b reserved */ +--- linux-2.6.0/arch/i386/kernel/i386_ksyms.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/i386_ksyms.c 2003-12-28 23:26:36.000000000 -0800 +@@ -98,7 +98,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter + EXPORT_SYMBOL_NOVERS(__down_failed_trylock); + EXPORT_SYMBOL_NOVERS(__up_wakeup); + /* Networking helper routines. */ +-EXPORT_SYMBOL(csum_partial_copy_generic); + /* Delay loops */ + EXPORT_SYMBOL(__ndelay); + EXPORT_SYMBOL(__udelay); +@@ -112,13 +111,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4); + EXPORT_SYMBOL(strpbrk); + EXPORT_SYMBOL(strstr); + ++#if !defined(CONFIG_X86_UACCESS_INDIRECT) + EXPORT_SYMBOL(strncpy_from_user); +-EXPORT_SYMBOL(__strncpy_from_user); ++EXPORT_SYMBOL(__direct_strncpy_from_user); + EXPORT_SYMBOL(clear_user); + EXPORT_SYMBOL(__clear_user); + EXPORT_SYMBOL(__copy_from_user_ll); + EXPORT_SYMBOL(__copy_to_user_ll); + EXPORT_SYMBOL(strnlen_user); ++#else /* CONFIG_X86_UACCESS_INDIRECT */ ++EXPORT_SYMBOL(direct_csum_partial_copy_generic); ++#endif + + EXPORT_SYMBOL(dma_alloc_coherent); + EXPORT_SYMBOL(dma_free_coherent); +--- linux-2.6.0/arch/i386/kernel/i387.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/i387.c 2003-12-28 23:26:36.000000000 -0800 +@@ -218,6 +218,7 @@ void set_fpu_mxcsr( struct task_struct * + static int convert_fxsr_to_user( struct _fpstate __user *buf, + struct i387_fxsave_struct *fxsave ) + { ++ struct _fpreg tmp[8]; /* 80 bytes scratch area */ + unsigned long env[7]; + struct _fpreg __user *to; + struct _fpxreg *from; +@@ -234,23 +235,25 @@ static int convert_fxsr_to_user( struct + if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) + return 1; + +- to = &buf->_st[0]; ++ to = tmp; + from = (struct _fpxreg *) &fxsave->st_space[0]; + for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + unsigned long *t = (unsigned long *)to; + unsigned long *f = (unsigned long *)from; + +- if (__put_user(*f, t) || +- __put_user(*(f + 1), t + 1) || +- __put_user(from->exponent, &to->exponent)) +- return 1; ++ *t = *f; ++ *(t + 1) = *(f+1); ++ to->exponent = from->exponent; + } ++ if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8]))) ++ return 1; + return 0; + } + + static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, + struct _fpstate __user *buf ) + { ++ struct _fpreg tmp[8]; /* 80 bytes scratch area */ + unsigned long env[7]; + struct _fpxreg *to; + struct _fpreg __user *from; +@@ -258,6 +261,8 @@ static int convert_fxsr_from_user( struc + + if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) + return 1; ++ if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8]))) ++ return 1; + + fxsave->cwd = (unsigned short)(env[0] & 0xffff); + fxsave->swd = (unsigned short)(env[1] & 0xffff); +@@ -269,15 +274,14 @@ static int convert_fxsr_from_user( struc + fxsave->fos = env[6]; + + to = (struct _fpxreg *) &fxsave->st_space[0]; +- from = &buf->_st[0]; ++ from = tmp; + for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + unsigned long *t = (unsigned long *)to; + unsigned long *f = (unsigned long *)from; + +- if (__get_user(*t, f) || +- __get_user(*(t + 1), f + 1) || +- __get_user(to->exponent, &from->exponent)) +- return 1; ++ *t = *f; ++ *(t + 1) = *(f + 1); ++ to->exponent = from->exponent; + } + return 0; + } +--- linux-2.6.0/arch/i386/kernel/i8259.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/i8259.c 2003-12-28 23:21:44.000000000 -0800 +@@ -419,8 +419,10 @@ void __init init_IRQ(void) + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ +- for (i = 0; i < NR_IRQS; i++) { ++ for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; ++ if (i >= NR_IRQS) ++ break; + if (vector != SYSCALL_VECTOR) + set_intr_gate(vector, interrupt[i]); + } +--- linux-2.6.0/arch/i386/kernel/init_task.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/init_task.c 2003-12-28 23:26:36.000000000 -0800 +@@ -26,7 +26,7 @@ EXPORT_SYMBOL(init_mm); + */ + union thread_union init_thread_union + __attribute__((__section__(".data.init_task"))) = +- { INIT_THREAD_INFO(init_task) }; ++ { INIT_THREAD_INFO(init_task, init_thread_union) }; + + /* + * Initial task structure. +@@ -44,5 +44,5 @@ EXPORT_SYMBOL(init_task); + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +-struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; ++struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS }; + +--- linux-2.6.0/arch/i386/kernel/io_apic.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/io_apic.c 2003-12-28 23:21:44.000000000 -0800 +@@ -76,6 +76,14 @@ static struct irq_pin_list { + int apic, pin, next; + } irq_2_pin[PIN_MAP_SIZE]; + ++#ifdef CONFIG_PCI_USE_VECTOR ++int vector_irq[NR_IRQS] = { [0 ... NR_IRQS -1] = -1}; ++#define vector_to_irq(vector) \ ++ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) ++#else ++#define vector_to_irq(vector) (vector) ++#endif ++ + /* + * The common case is 1:1 IRQ<->pin mappings. Sometimes there are + * shared ISA-space IRQs, so we have to support them. We are super +@@ -249,7 +257,7 @@ static void clear_IO_APIC (void) + clear_IO_APIC_pin(apic, pin); + } + +-static void set_ioapic_affinity(unsigned int irq, cpumask_t cpumask) ++static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) + { + unsigned long flags; + int pin; +@@ -288,7 +296,7 @@ static void set_ioapic_affinity(unsigned + + extern cpumask_t irq_affinity[NR_IRQS]; + +-static cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; ++cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; + + #define IRQBALANCE_CHECK_ARCH -999 + static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; +@@ -670,13 +678,11 @@ static int __init irqbalance_disable(cha + + __setup("noirqbalance", irqbalance_disable); + +-static void set_ioapic_affinity(unsigned int irq, cpumask_t mask); +- + static inline void move_irq(int irq) + { + /* note - we hold the desc->lock */ + if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { +- set_ioapic_affinity(irq, pending_irq_balance_cpumask[irq]); ++ set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); + cpus_clear(pending_irq_balance_cpumask[irq]); + } + } +@@ -853,7 +859,7 @@ void __init setup_ioapic_dest(cpumask_t + if (irq_entry == -1) + continue; + irq = pin_2_irq(irq_entry, ioapic, pin); +- set_ioapic_affinity(irq, mask); ++ set_ioapic_affinity_irq(irq, mask); + } + + } +@@ -1141,7 +1147,8 @@ static inline int IO_APIC_irq_trigger(in + /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ + u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; + +-static int __init assign_irq_vector(int irq) ++#ifndef CONFIG_PCI_USE_VECTOR ++int __init assign_irq_vector(int irq) + { + static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; + BUG_ON(irq >= NR_IRQ_VECTORS); +@@ -1158,11 +1165,36 @@ next: + } + + IO_APIC_VECTOR(irq) = current_vector; ++ + return current_vector; + } ++#endif ++ ++static struct hw_interrupt_type ioapic_level_type; ++static struct hw_interrupt_type ioapic_edge_type; + +-static struct hw_interrupt_type ioapic_level_irq_type; +-static struct hw_interrupt_type ioapic_edge_irq_type; ++#define IOAPIC_AUTO -1 ++#define IOAPIC_EDGE 0 ++#define IOAPIC_LEVEL 1 ++ ++static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) ++{ ++ if (use_pci_vector() && !platform_legacy_irq(irq)) { ++ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || ++ trigger == IOAPIC_LEVEL) ++ irq_desc[vector].handler = &ioapic_level_type; ++ else ++ irq_desc[vector].handler = &ioapic_edge_type; ++ set_intr_gate(vector, interrupt[vector]); ++ } else { ++ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || ++ trigger == IOAPIC_LEVEL) ++ irq_desc[irq].handler = &ioapic_level_type; ++ else ++ irq_desc[irq].handler = &ioapic_edge_type; ++ set_intr_gate(vector, interrupt[irq]); ++ } ++} + + void __init setup_IO_APIC_irqs(void) + { +@@ -1220,13 +1252,7 @@ void __init setup_IO_APIC_irqs(void) + if (IO_APIC_IRQ(irq)) { + vector = assign_irq_vector(irq); + entry.vector = vector; +- +- if (IO_APIC_irq_trigger(irq)) +- irq_desc[irq].handler = &ioapic_level_irq_type; +- else +- irq_desc[irq].handler = &ioapic_edge_irq_type; +- +- set_intr_gate(vector, interrupt[irq]); ++ ioapic_register_intr(irq, vector, IOAPIC_AUTO); + + if (!apic && (irq < 16)) + disable_8259A_irq(irq); +@@ -1273,7 +1299,7 @@ void __init setup_ExtINT_IRQ0_pin(unsign + * The timer IRQ doesn't have to know that behind the + * scene we have a 8259A-master in AEOI mode ... + */ +- irq_desc[0].handler = &ioapic_edge_irq_type; ++ irq_desc[0].handler = &ioapic_edge_type; + + /* + * Add it to the IO-APIC irq-routing table: +@@ -1624,10 +1650,6 @@ static void __init setup_ioapic_ids_from + unsigned char old_id; + unsigned long flags; + +- if (acpi_ioapic) +- /* This gets done during IOAPIC enumeration for ACPI. */ +- return; +- + /* + * This is broken; anything with a real cpu count has to + * circumvent this idiocy regardless. +@@ -1763,9 +1785,6 @@ static int __init timer_irq_works(void) + * that was delayed but this is now handled in the device + * independent code. + */ +-#define enable_edge_ioapic_irq unmask_IO_APIC_irq +- +-static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ } + + /* + * Starting up a edge-triggered IO-APIC interrupt is +@@ -1776,7 +1795,6 @@ static void disable_edge_ioapic_irq (uns + * This is not complete - we should be able to fake + * an edge even if it isn't on the 8259A... + */ +- + static unsigned int startup_edge_ioapic_irq(unsigned int irq) + { + int was_pending = 0; +@@ -1794,8 +1812,6 @@ static unsigned int startup_edge_ioapic_ + return was_pending; + } + +-#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq +- + /* + * Once we have recorded IRQ_PENDING already, we can mask the + * interrupt for real. This prevents IRQ storms from unhandled +@@ -1810,9 +1826,6 @@ static void ack_edge_ioapic_irq(unsigned + ack_APIC_irq(); + } + +-static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ } +- +- + /* + * Level triggered interrupts can just be masked, + * and shutting down and starting up the interrupt +@@ -1834,10 +1847,6 @@ static unsigned int startup_level_ioapic + return 0; /* don't check for pending */ + } + +-#define shutdown_level_ioapic_irq mask_IO_APIC_irq +-#define enable_level_ioapic_irq unmask_IO_APIC_irq +-#define disable_level_ioapic_irq mask_IO_APIC_irq +- + static void end_level_ioapic_irq (unsigned int irq) + { + unsigned long v; +@@ -1864,6 +1873,7 @@ static void end_level_ioapic_irq (unsign + * The idea is from Manfred Spraul. --macro + */ + i = IO_APIC_VECTOR(irq); ++ + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + ack_APIC_irq(); +@@ -1898,7 +1908,57 @@ static void end_level_ioapic_irq (unsign + } + } + +-static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ } ++#ifdef CONFIG_PCI_USE_VECTOR ++static unsigned int startup_edge_ioapic_vector(unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ return startup_edge_ioapic_irq(irq); ++} ++ ++static void ack_edge_ioapic_vector(unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ ack_edge_ioapic_irq(irq); ++} ++ ++static unsigned int startup_level_ioapic_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ return startup_level_ioapic_irq (irq); ++} ++ ++static void end_level_ioapic_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ end_level_ioapic_irq(irq); ++} ++ ++static void mask_IO_APIC_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ mask_IO_APIC_irq(irq); ++} ++ ++static void unmask_IO_APIC_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ unmask_IO_APIC_irq(irq); ++} ++ ++static void set_ioapic_affinity_vector (unsigned int vector, ++ cpumask_t cpu_mask) ++{ ++ int irq = vector_to_irq(vector); ++ ++ set_ioapic_affinity_irq(irq, cpu_mask); ++} ++#endif + + /* + * Level and edge triggered IO-APIC interrupts need different handling, +@@ -1908,26 +1968,25 @@ static void mask_and_ack_level_ioapic_ir + * edge-triggered handler, without risking IRQ storms and other ugly + * races. + */ +- +-static struct hw_interrupt_type ioapic_edge_irq_type = { ++static struct hw_interrupt_type ioapic_edge_type = { + .typename = "IO-APIC-edge", +- .startup = startup_edge_ioapic_irq, +- .shutdown = shutdown_edge_ioapic_irq, +- .enable = enable_edge_ioapic_irq, +- .disable = disable_edge_ioapic_irq, +- .ack = ack_edge_ioapic_irq, +- .end = end_edge_ioapic_irq, ++ .startup = startup_edge_ioapic, ++ .shutdown = shutdown_edge_ioapic, ++ .enable = enable_edge_ioapic, ++ .disable = disable_edge_ioapic, ++ .ack = ack_edge_ioapic, ++ .end = end_edge_ioapic, + .set_affinity = set_ioapic_affinity, + }; + +-static struct hw_interrupt_type ioapic_level_irq_type = { ++static struct hw_interrupt_type ioapic_level_type = { + .typename = "IO-APIC-level", +- .startup = startup_level_ioapic_irq, +- .shutdown = shutdown_level_ioapic_irq, +- .enable = enable_level_ioapic_irq, +- .disable = disable_level_ioapic_irq, +- .ack = mask_and_ack_level_ioapic_irq, +- .end = end_level_ioapic_irq, ++ .startup = startup_level_ioapic, ++ .shutdown = shutdown_level_ioapic, ++ .enable = enable_level_ioapic, ++ .disable = disable_level_ioapic, ++ .ack = mask_and_ack_level_ioapic, ++ .end = end_level_ioapic, + .set_affinity = set_ioapic_affinity, + }; + +@@ -1947,7 +2006,13 @@ static inline void init_IO_APIC_traps(vo + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + for (irq = 0; irq < NR_IRQS ; irq++) { +- if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) { ++ int tmp = irq; ++ if (use_pci_vector()) { ++ if (!platform_legacy_irq(tmp)) ++ if ((tmp = vector_to_irq(tmp)) == -1) ++ continue; ++ } ++ if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { + /* + * Hmm.. We don't have an entry for this, + * so default to an old-fashioned 8259 +@@ -2217,12 +2282,14 @@ void __init setup_IO_APIC(void) + /* + * Set up IO-APIC IRQ routing. + */ +- setup_ioapic_ids_from_mpc(); ++ if (!acpi_ioapic) ++ setup_ioapic_ids_from_mpc(); + sync_Arb_IDs(); + setup_IO_APIC_irqs(); + init_IO_APIC_traps(); + check_timer(); +- print_IO_APIC(); ++ if (!acpi_ioapic) ++ print_IO_APIC(); + } + + /* +@@ -2379,10 +2446,12 @@ int io_apic_set_pci_routing (int ioapic, + "IRQ %d Mode:%i Active:%i)\n", ioapic, + mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); + ++ if (use_pci_vector() && !platform_legacy_irq(irq)) ++ irq = IO_APIC_VECTOR(irq); + if (edge_level) { +- irq_desc[irq].handler = &ioapic_level_irq_type; ++ irq_desc[irq].handler = &ioapic_level_type; + } else { +- irq_desc[irq].handler = &ioapic_edge_irq_type; ++ irq_desc[irq].handler = &ioapic_edge_type; + } + + set_intr_gate(entry.vector, interrupt[irq]); +--- linux-2.6.0/arch/i386/kernel/irq.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/irq.c 2003-12-28 23:22:10.000000000 -0800 +@@ -138,17 +138,19 @@ atomic_t irq_mis_count; + + int show_interrupts(struct seq_file *p, void *v) + { +- int i, j; ++ int i = *(loff_t *) v, j; + struct irqaction * action; + unsigned long flags; + +- seq_printf(p, " "); +- for (j=0; j HEX_DIGITS) +- count = HEX_DIGITS; +- if (copy_from_user(hexnum, buffer, count)) +- return -EFAULT; +- +- /* +- * Parse the first HEX_DIGITS characters as a hex string, any non-hex char +- * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. +- */ +- +- for (i = 0; i < count; i++) { +- unsigned int c = hexnum[i]; +- int k; +- +- switch (c) { +- case '0' ... '9': c -= '0'; break; +- case 'a' ... 'f': c -= 'a'-10; break; +- case 'A' ... 'F': c -= 'A'-10; break; +- default: +- goto out; +- } +- cpus_shift_left(value, value, 4); +- for (k = 0; k < 4; ++k) +- if (test_bit(k, (unsigned long *)&c)) +- cpu_set(k, value); +- } +-out: +- *ret = value; +- return 0; +-} +- + #ifdef CONFIG_SMP + + static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; +@@ -949,20 +915,10 @@ cpumask_t irq_affinity[NR_IRQS] = { [0 . + static int irq_affinity_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) + { +- int k, len; +- cpumask_t tmp = irq_affinity[(long)data]; +- +- if (count < HEX_DIGITS+1) ++ int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); ++ if (count - len < 2) + return -EINVAL; +- +- len = 0; +- for (k = 0; k < sizeof(cpumask_t)/sizeof(u16); ++k) { +- int j = sprintf(page, "%04hx", (u16)cpus_coerce(tmp)); +- len += j; +- page += j; +- cpus_shift_right(tmp, tmp, 16); +- } +- len += sprintf(page, "\n"); ++ len += sprintf(page + len, "\n"); + return len; + } + +@@ -975,7 +931,7 @@ static int irq_affinity_write_proc(struc + if (!irq_desc[irq].handler->set_affinity) + return -EIO; + +- err = parse_hex_value(buffer, count, &new_value); ++ err = cpumask_parse(buffer, count, new_value); + if (err) + return err; + +@@ -1000,10 +956,11 @@ static int irq_affinity_write_proc(struc + static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) + { +- unsigned long *mask = (unsigned long *) data; +- if (count < HEX_DIGITS+1) ++ int len = cpumask_snprintf(page, count, *(cpumask_t *)data); ++ if (count - len < 2) + return -EINVAL; +- return sprintf (page, "%08lx\n", *mask); ++ len += sprintf(page + len, "\n"); ++ return len; + } + + static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer, +@@ -1013,7 +970,7 @@ static int prof_cpu_mask_write_proc (str + unsigned long full_count = count, err; + cpumask_t new_value; + +- err = parse_hex_value(buffer, count, &new_value); ++ err = cpumask_parse(buffer, count, new_value); + if (err) + return err; + +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25/arch/i386/kernel/kgdb_stub.c 2003-12-28 23:21:09.000000000 -0800 +@@ -0,0 +1,2457 @@ ++/* ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2, or (at your option) any ++ * later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ */ ++ ++/* ++ * Copyright (c) 2000 VERITAS Software Corporation. ++ * ++ */ ++/**************************************************************************** ++ * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ ++ * ++ * Module name: remcom.c $ ++ * Revision: 1.34 $ ++ * Date: 91/03/09 12:29:49 $ ++ * Contributor: Lake Stevens Instrument Division$ ++ * ++ * Description: low level support for gdb debugger. $ ++ * ++ * Considerations: only works on target hardware $ ++ * ++ * Written by: Glenn Engel $ ++ * Updated by: David Grothe ++ * Updated by: Robert Walsh ++ * Updated by: wangdi ++ * ModuleState: Experimental $ ++ * ++ * NOTES: See Below $ ++ * ++ * Modified for 386 by Jim Kingdon, Cygnus Support. ++ * Compatibility with 2.1.xx kernel by David Grothe ++ * ++ * Changes to allow auto initilization. All that is needed is that it ++ * be linked with the kernel and a break point (int 3) be executed. ++ * The header file defines BREAKPOINT to allow one to do ++ * this. It should also be possible, once the interrupt system is up, to ++ * call putDebugChar("+"). Once this is done, the remote debugger should ++ * get our attention by sending a ^C in a packet. George Anzinger ++ * ++ * Integrated into 2.2.5 kernel by Tigran Aivazian ++ * Added thread support, support for multiple processors, ++ * support for ia-32(x86) hardware debugging. ++ * Amit S. Kale ( akale@veritas.com ) ++ * ++ * Modified to support debugging over ethernet by Robert Walsh ++ * and wangdi , based on ++ * code by San Mehat. ++ * ++ * ++ * To enable debugger support, two things need to happen. One, a ++ * call to set_debug_traps() is necessary in order to allow any breakpoints ++ * or error conditions to be properly intercepted and reported to gdb. ++ * Two, a breakpoint needs to be generated to begin communication. This ++ * is most easily accomplished by a call to breakpoint(). Breakpoint() ++ * simulates a breakpoint by executing an int 3. ++ * ++ ************* ++ * ++ * The following gdb commands are supported: ++ * ++ * command function Return value ++ * ++ * g return the value of the CPU registers hex data or ENN ++ * G set the value of the CPU registers OK or ENN ++ * ++ * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN ++ * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN ++ * ++ * c Resume at current address SNN ( signal NN) ++ * cAA..AA Continue at address AA..AA SNN ++ * ++ * s Step one instruction SNN ++ * sAA..AA Step one instruction from AA..AA SNN ++ * ++ * k kill ++ * ++ * ? What was the last sigval ? SNN (signal NN) ++ * ++ * All commands and responses are sent with a packet which includes a ++ * checksum. A packet consists of ++ * ++ * $#. ++ * ++ * where ++ * :: ++ * :: < two hex digits computed as modulo 256 sum of > ++ * ++ * When a packet is received, it is first acknowledged with either '+' or '-'. ++ * '+' indicates a successful transfer. '-' indicates a failed transfer. ++ * ++ * Example: ++ * ++ * Host: Reply: ++ * $m0,10#2a +$00010203040506070809101112131415#42 ++ * ++ ****************************************************************************/ ++#define KGDB_VERSION "<20030915.1651.33>" ++#include ++#include ++#include /* for strcpy */ ++#include ++#include ++#include ++#include ++#include /* for linux pt_regs struct */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/************************************************************************ ++ * ++ * external low-level support routines ++ */ ++typedef void (*Function) (void); /* pointer to a function */ ++ ++/* Thread reference */ ++typedef unsigned char threadref[8]; ++ ++extern int tty_putDebugChar(int); /* write a single character */ ++extern int tty_getDebugChar(void); /* read and return a single char */ ++extern void tty_flushDebugChar(void); /* flush pending characters */ ++extern int eth_putDebugChar(int); /* write a single character */ ++extern int eth_getDebugChar(void); /* read and return a single char */ ++extern void eth_flushDebugChar(void); /* flush pending characters */ ++ ++/************************************************************************/ ++/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ ++/* at least NUMREGBYTES*2 are needed for register packets */ ++/* Longer buffer is needed to list all threads */ ++#define BUFMAX 400 ++ ++char *kgdb_version = KGDB_VERSION; ++ ++/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ ++int debug_regs = 0; /* set to non-zero to print registers */ ++ ++/* filled in by an external module */ ++char *gdb_module_offsets; ++ ++static const char hexchars[] = "0123456789abcdef"; ++ ++/* Number of bytes of registers. */ ++#define NUMREGBYTES 64 ++/* ++ * Note that this register image is in a different order than ++ * the register image that Linux produces at interrupt time. ++ * ++ * Linux's register image is defined by struct pt_regs in ptrace.h. ++ * Just why GDB uses a different order is a historical mystery. ++ */ ++enum regnames { _EAX, /* 0 */ ++ _ECX, /* 1 */ ++ _EDX, /* 2 */ ++ _EBX, /* 3 */ ++ _ESP, /* 4 */ ++ _EBP, /* 5 */ ++ _ESI, /* 6 */ ++ _EDI, /* 7 */ ++ _PC /* 8 also known as eip */ , ++ _PS /* 9 also known as eflags */ , ++ _CS, /* 10 */ ++ _SS, /* 11 */ ++ _DS, /* 12 */ ++ _ES, /* 13 */ ++ _FS, /* 14 */ ++ _GS /* 15 */ ++}; ++ ++/*************************** ASSEMBLY CODE MACROS *************************/ ++/* ++ * Put the error code here just in case the user cares. ++ * Likewise, the vector number here (since GDB only gets the signal ++ * number through the usual means, and that's not very specific). ++ * The called_from is the return address so he can tell how we entered kgdb. ++ * This will allow him to seperate out the various possible entries. ++ */ ++#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ ++ ++#define PID_MAX PID_MAX_DEFAULT ++ ++#ifdef CONFIG_SMP ++void smp_send_nmi_allbutself(void); ++#define IF_SMP(x) x ++#undef MAX_NO_CPUS ++#ifndef CONFIG_NO_KGDB_CPUS ++#define CONFIG_NO_KGDB_CPUS 2 ++#endif ++#if CONFIG_NO_KGDB_CPUS > NR_CPUS ++#define MAX_NO_CPUS NR_CPUS ++#else ++#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS ++#endif ++#define hold_init hold_on_sstep: 1, ++#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) ++#define NUM_CPUS num_online_cpus() ++#else ++#define IF_SMP(x) ++#define hold_init ++#undef MAX_NO_CPUS ++#define MAX_NO_CPUS 1 ++#define NUM_CPUS 1 ++#endif ++#define NOCPU (struct task_struct *)0xbad1fbad ++/* *INDENT-OFF* */ ++struct kgdb_info { ++ int used_malloc; ++ void *called_from; ++ long long entry_tsc; ++ int errcode; ++ int vector; ++ int print_debug_info; ++#ifdef CONFIG_SMP ++ int hold_on_sstep; ++ struct { ++ volatile struct task_struct *task; ++ int pid; ++ int hold; ++ struct pt_regs *regs; ++ } cpus_waiting[MAX_NO_CPUS]; ++#endif ++} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; ++ ++/* *INDENT-ON* */ ++ ++#define used_m kgdb_info.used_malloc ++/* ++ * This is little area we set aside to contain the stack we ++ * need to build to allow gdb to call functions. We use one ++ * per cpu to avoid locking issues. We will do all this work ++ * with interrupts off so that should take care of the protection ++ * issues. ++ */ ++#define LOOKASIDE_SIZE 200 /* should be more than enough */ ++#define MALLOC_MAX 200 /* Max malloc size */ ++struct { ++ unsigned int esp; ++ int array[LOOKASIDE_SIZE]; ++} fn_call_lookaside[MAX_NO_CPUS]; ++ ++static int trap_cpu; ++static unsigned int OLD_esp; ++ ++#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] ++#define IF_BIT 0x200 ++#define TF_BIT 0x100 ++ ++#define MALLOC_ROUND 8-1 ++ ++static char malloc_array[MALLOC_MAX]; ++IF_SMP(static void to_gdb(const char *mess)); ++void * ++malloc(int size) ++{ ++ ++ if (size <= (MALLOC_MAX - used_m)) { ++ int old_used = used_m; ++ used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); ++ return &malloc_array[old_used]; ++ } else { ++ return NULL; ++ } ++} ++ ++/* ++ * I/O dispatch functions... ++ * Based upon kgdboe, either call the ethernet ++ * handler or the serial one.. ++ */ ++void ++putDebugChar(int c) ++{ ++ if (!kgdboe) { ++ tty_putDebugChar(c); ++ } else { ++ eth_putDebugChar(c); ++ } ++} ++ ++int ++getDebugChar(void) ++{ ++ if (!kgdboe) { ++ return tty_getDebugChar(); ++ } else { ++ return eth_getDebugChar(); ++ } ++} ++ ++void ++flushDebugChar(void) ++{ ++ if (!kgdboe) { ++ tty_flushDebugChar(); ++ } else { ++ eth_flushDebugChar(); ++ } ++} ++ ++/* ++ * Gdb calls functions by pushing agruments, including a return address ++ * on the stack and the adjusting EIP to point to the function. The ++ * whole assumption in GDB is that we are on a different stack than the ++ * one the "user" i.e. code that hit the break point, is on. This, of ++ * course is not true in the kernel. Thus various dodges are needed to ++ * do the call without directly messing with EIP (which we can not change ++ * as it is just a location and not a register. To adjust it would then ++ * require that we move every thing below EIP up or down as needed. This ++ * will not work as we may well have stack relative pointer on the stack ++ * (such as the pointer to regs, for example). ++ ++ * So here is what we do: ++ * We detect gdb attempting to store into the stack area and instead, store ++ * into the fn_call_lookaside.array at the same relative location as if it ++ * were the area ESP pointed at. We also trap ESP modifications ++ * and uses these to adjust fn_call_lookaside.esp. On entry ++ * fn_call_lookaside.esp will be set to point at the last entry in ++ * fn_call_lookaside.array. This allows us to check if it has changed, and ++ * if so, on exit, we add the registers we will use to do the move and a ++ * trap/ interrupt return exit sequence. We then adjust the eflags in the ++ * regs array (remember we now have a copy in the fn_call_lookaside.array) to ++ * kill the interrupt bit, AND we change EIP to point at our set up stub. ++ * As part of the register set up we preset the registers to point at the ++ * begining and end of the fn_call_lookaside.array, so all the stub needs to ++ * do is move words from the array to the stack until ESP= the desired value ++ * then do the rti. This will then transfer to the desired function with ++ * all the correct registers. Nifty huh? ++ */ ++extern asmlinkage void fn_call_stub(void); ++extern asmlinkage void fn_rtn_stub(void); ++/* *INDENT-OFF* */ ++__asm__("fn_rtn_stub:\n\t" ++ "movl %eax,%esp\n\t" ++ "fn_call_stub:\n\t" ++ "1:\n\t" ++ "addl $-4,%ebx\n\t" ++ "movl (%ebx), %eax\n\t" ++ "pushl %eax\n\t" ++ "cmpl %esp,%ecx\n\t" ++ "jne 1b\n\t" ++ "popl %eax\n\t" ++ "popl %ebx\n\t" ++ "popl %ecx\n\t" ++ "iret \n\t"); ++/* *INDENT-ON* */ ++#define gdb_i386vector kgdb_info.vector ++#define gdb_i386errcode kgdb_info.errcode ++#define waiting_cpus kgdb_info.cpus_waiting ++#define remote_debug kgdb_info.print_debug_info ++#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold ++/* gdb locks */ ++ ++#ifdef CONFIG_SMP ++static int in_kgdb_called; ++static spinlock_t waitlocks[MAX_NO_CPUS] = ++ {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; ++/* ++ * The following array has the thread pointer of each of the "other" ++ * cpus. We make it global so it can be seen by gdb. ++ */ ++volatile int in_kgdb_entry_log[MAX_NO_CPUS]; ++volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; ++/* ++static spinlock_t continuelocks[MAX_NO_CPUS]; ++*/ ++spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; ++/* waiters on our spinlock plus us */ ++static atomic_t spinlock_waiters = ATOMIC_INIT(1); ++static int spinlock_count = 0; ++static int spinlock_cpu = 0; ++/* ++ * Note we use nested spin locks to account for the case where a break ++ * point is encountered when calling a function by user direction from ++ * kgdb. Also there is the memory exception recursion to account for. ++ * Well, yes, but this lets other cpus thru too. Lets add a ++ * cpu id to the lock. ++ */ ++#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ ++ spinlock_cpu != smp_processor_id()){\ ++ atomic_inc(&spinlock_waiters); \ ++ while (! spin_trylock(x)) {\ ++ in_kgdb(®s);\ ++ }\ ++ atomic_dec(&spinlock_waiters); \ ++ spinlock_count = 1; \ ++ spinlock_cpu = smp_processor_id(); \ ++ }else{ \ ++ spinlock_count++; \ ++ } ++#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) ++#else ++unsigned kgdb_spinlock = 0; ++#define KGDB_SPIN_LOCK(x) --*x ++#define KGDB_SPIN_UNLOCK(x) ++*x ++#endif ++ ++int ++hex(char ch) ++{ ++ if ((ch >= 'a') && (ch <= 'f')) ++ return (ch - 'a' + 10); ++ if ((ch >= '0') && (ch <= '9')) ++ return (ch - '0'); ++ if ((ch >= 'A') && (ch <= 'F')) ++ return (ch - 'A' + 10); ++ return (-1); ++} ++ ++/* scan for the sequence $# */ ++void ++getpacket(char *buffer) ++{ ++ unsigned char checksum; ++ unsigned char xmitcsum; ++ int i; ++ int count; ++ char ch; ++ ++ do { ++ /* wait around for the start character, ignore all other characters */ ++ while ((ch = (getDebugChar() & 0x7f)) != '$') ; ++ checksum = 0; ++ xmitcsum = -1; ++ ++ count = 0; ++ ++ /* now, read until a # or end of buffer is found */ ++ while (count < BUFMAX) { ++ ch = getDebugChar() & 0x7f; ++ if (ch == '#') ++ break; ++ checksum = checksum + ch; ++ buffer[count] = ch; ++ count = count + 1; ++ } ++ buffer[count] = 0; ++ ++ if (ch == '#') { ++ xmitcsum = hex(getDebugChar() & 0x7f) << 4; ++ xmitcsum += hex(getDebugChar() & 0x7f); ++ if ((remote_debug) && (checksum != xmitcsum)) { ++ printk ++ ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", ++ checksum, xmitcsum, buffer); ++ } ++ ++ if (checksum != xmitcsum) ++ putDebugChar('-'); /* failed checksum */ ++ else { ++ putDebugChar('+'); /* successful transfer */ ++ /* if a sequence char is present, reply the sequence ID */ ++ if (buffer[2] == ':') { ++ putDebugChar(buffer[0]); ++ putDebugChar(buffer[1]); ++ /* remove sequence chars from buffer */ ++ count = strlen(buffer); ++ for (i = 3; i <= count; i++) ++ buffer[i - 3] = buffer[i]; ++ } ++ } ++ } ++ } while (checksum != xmitcsum); ++ ++ if (remote_debug) ++ printk("R:%s\n", buffer); ++ flushDebugChar(); ++} ++ ++/* send the packet in buffer. */ ++ ++void ++putpacket(char *buffer) ++{ ++ unsigned char checksum; ++ int count; ++ char ch; ++ ++ /* $#. */ ++ ++ if (!kgdboe) { ++ do { ++ if (remote_debug) ++ printk("T:%s\n", buffer); ++ putDebugChar('$'); ++ checksum = 0; ++ count = 0; ++ ++ while ((ch = buffer[count])) { ++ putDebugChar(ch); ++ checksum += ch; ++ count += 1; ++ } ++ ++ putDebugChar('#'); ++ putDebugChar(hexchars[checksum >> 4]); ++ putDebugChar(hexchars[checksum % 16]); ++ flushDebugChar(); ++ ++ } while ((getDebugChar() & 0x7f) != '+'); ++ } else { ++ /* ++ * For udp, we can not transfer too much bytes once. ++ * We only transfer MAX_SEND_COUNT size bytes each time ++ */ ++ ++#define MAX_SEND_COUNT 30 ++ ++ int send_count = 0, i = 0; ++ char send_buf[MAX_SEND_COUNT]; ++ ++ do { ++ if (remote_debug) ++ printk("T:%s\n", buffer); ++ putDebugChar('$'); ++ checksum = 0; ++ count = 0; ++ send_count = 0; ++ while ((ch = buffer[count])) { ++ if (send_count >= MAX_SEND_COUNT) { ++ for(i = 0; i < MAX_SEND_COUNT; i++) { ++ putDebugChar(send_buf[i]); ++ } ++ flushDebugChar(); ++ send_count = 0; ++ } else { ++ send_buf[send_count] = ch; ++ checksum += ch; ++ count ++; ++ send_count++; ++ } ++ } ++ for(i = 0; i < send_count; i++) ++ putDebugChar(send_buf[i]); ++ putDebugChar('#'); ++ putDebugChar(hexchars[checksum >> 4]); ++ putDebugChar(hexchars[checksum % 16]); ++ flushDebugChar(); ++ } while ((getDebugChar() & 0x7f) != '+'); ++ } ++} ++ ++static char remcomInBuffer[BUFMAX]; ++static char remcomOutBuffer[BUFMAX]; ++static short error; ++ ++void ++debug_error(char *format, char *parm) ++{ ++ if (remote_debug) ++ printk(format, parm); ++} ++ ++static void ++print_regs(struct pt_regs *regs) ++{ ++ printk("EAX=%08lx ", regs->eax); ++ printk("EBX=%08lx ", regs->ebx); ++ printk("ECX=%08lx ", regs->ecx); ++ printk("EDX=%08lx ", regs->edx); ++ printk("\n"); ++ printk("ESI=%08lx ", regs->esi); ++ printk("EDI=%08lx ", regs->edi); ++ printk("EBP=%08lx ", regs->ebp); ++ printk("ESP=%08lx ", (long) ®s->esp); ++ printk("\n"); ++ printk(" DS=%08x ", regs->xds); ++ printk(" ES=%08x ", regs->xes); ++ printk(" SS=%08x ", __KERNEL_DS); ++ printk(" FL=%08lx ", regs->eflags); ++ printk("\n"); ++ printk(" CS=%08x ", regs->xcs); ++ printk(" IP=%08lx ", regs->eip); ++#if 0 ++ printk(" FS=%08x ", regs->fs); ++ printk(" GS=%08x ", regs->gs); ++#endif ++ printk("\n"); ++ ++} /* print_regs */ ++ ++#define NEW_esp fn_call_lookaside[trap_cpu].esp ++ ++static void ++regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) ++{ ++ gdb_regs[_EAX] = regs->eax; ++ gdb_regs[_EBX] = regs->ebx; ++ gdb_regs[_ECX] = regs->ecx; ++ gdb_regs[_EDX] = regs->edx; ++ gdb_regs[_ESI] = regs->esi; ++ gdb_regs[_EDI] = regs->edi; ++ gdb_regs[_EBP] = regs->ebp; ++ gdb_regs[_DS] = regs->xds; ++ gdb_regs[_ES] = regs->xes; ++ gdb_regs[_PS] = regs->eflags; ++ gdb_regs[_CS] = regs->xcs; ++ gdb_regs[_PC] = regs->eip; ++ /* Note, as we are a debugging the kernel, we will always ++ * trap in kernel code, this means no priviledge change, ++ * and so the pt_regs structure is not completely valid. In a non ++ * privilege change trap, only EFLAGS, CS and EIP are put on the stack, ++ * SS and ESP are not stacked, this means that the last 2 elements of ++ * pt_regs is not valid (they would normally refer to the user stack) ++ * also, using regs+1 is no good because you end up will a value that is ++ * 2 longs (8) too high. This used to cause stepping over functions ++ * to fail, so my fix is to use the address of regs->esp, which ++ * should point at the end of the stack frame. Note I have ignored ++ * completely exceptions that cause an error code to be stacked, such ++ * as double fault. Stuart Hughes, Zentropix. ++ * original code: gdb_regs[_ESP] = (int) (regs + 1) ; ++ ++ * this is now done on entry and moved to OLD_esp (as well as NEW_esp). ++ */ ++ gdb_regs[_ESP] = NEW_esp; ++ gdb_regs[_SS] = __KERNEL_DS; ++ gdb_regs[_FS] = 0xFFFF; ++ gdb_regs[_GS] = 0xFFFF; ++} /* regs_to_gdb_regs */ ++ ++static void ++gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) ++{ ++ regs->eax = gdb_regs[_EAX]; ++ regs->ebx = gdb_regs[_EBX]; ++ regs->ecx = gdb_regs[_ECX]; ++ regs->edx = gdb_regs[_EDX]; ++ regs->esi = gdb_regs[_ESI]; ++ regs->edi = gdb_regs[_EDI]; ++ regs->ebp = gdb_regs[_EBP]; ++ regs->xds = gdb_regs[_DS]; ++ regs->xes = gdb_regs[_ES]; ++ regs->eflags = gdb_regs[_PS]; ++ regs->xcs = gdb_regs[_CS]; ++ regs->eip = gdb_regs[_PC]; ++ NEW_esp = gdb_regs[_ESP]; /* keep the value */ ++#if 0 /* can't change these */ ++ regs->esp = gdb_regs[_ESP]; ++ regs->xss = gdb_regs[_SS]; ++ regs->fs = gdb_regs[_FS]; ++ regs->gs = gdb_regs[_GS]; ++#endif ++ ++} /* gdb_regs_to_regs */ ++extern void scheduling_functions_start_here(void); ++extern void scheduling_functions_end_here(void); ++#define first_sched ((unsigned long) scheduling_functions_start_here) ++#define last_sched ((unsigned long) scheduling_functions_end_here) ++ ++int thread_list = 0; ++ ++void ++get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) ++{ ++ unsigned long stack_page; ++ int count = 0; ++ IF_SMP(int i); ++ if (!p || p == current) { ++ regs_to_gdb_regs(gdb_regs, regs); ++ return; ++ } ++#ifdef CONFIG_SMP ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (p == kgdb_info.cpus_waiting[i].task) { ++ regs_to_gdb_regs(gdb_regs, ++ kgdb_info.cpus_waiting[i].regs); ++ gdb_regs[_ESP] = ++ (int) &kgdb_info.cpus_waiting[i].regs->esp; ++ ++ return; ++ } ++ } ++#endif ++ memset(gdb_regs, 0, NUMREGBYTES); ++ gdb_regs[_ESP] = p->thread.esp; ++ gdb_regs[_PC] = p->thread.eip; ++ gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; ++ gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); ++ gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); ++ ++/* ++ * This code is to give a more informative notion of where a process ++ * is waiting. It is used only when the user asks for a thread info ++ * list. If he then switches to the thread, s/he will find the task ++ * is in schedule, but a back trace should show the same info we come ++ * up with. This code was shamelessly purloined from process.c. It was ++ * then enhanced to provide more registers than simply the program ++ * counter. ++ */ ++ ++ if (!thread_list) { ++ return; ++ } ++ ++ if (p->state == TASK_RUNNING) ++ return; ++ stack_page = (unsigned long) p->thread_info; ++ if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > 8188 + stack_page) ++ return; ++ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ ++ do { ++ if (gdb_regs[_EBP] < stack_page || ++ gdb_regs[_EBP] > 8184 + stack_page) ++ return; ++ gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); ++ gdb_regs[_ESP] = gdb_regs[_EBP] + 8; ++ gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; ++ if (gdb_regs[_PC] < first_sched || gdb_regs[_PC] >= last_sched) ++ return; ++ } while (count++ < 16); ++ return; ++} ++ ++/* Indicate to caller of mem2hex or hex2mem that there has been an ++ error. */ ++static volatile int mem_err = 0; ++static volatile int mem_err_expected = 0; ++static volatile int mem_err_cnt = 0; ++static int garbage_loc = -1; ++ ++int ++get_char(char *addr) ++{ ++ return *addr; ++} ++ ++void ++set_char(char *addr, int val, int may_fault) ++{ ++ /* ++ * This code traps references to the area mapped to the kernel ++ * stack as given by the regs and, instead, stores to the ++ * fn_call_lookaside[cpu].array ++ */ ++ if (may_fault && ++ (unsigned int) addr < OLD_esp && ++ ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { ++ addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); ++ } ++ *addr = val; ++} ++ ++/* convert the memory pointed to by mem into hex, placing result in buf */ ++/* return a pointer to the last char put in buf (null) */ ++/* If MAY_FAULT is non-zero, then we should set mem_err in response to ++ a fault; if zero treat a fault like any other fault in the stub. */ ++char * ++mem2hex(char *mem, char *buf, int count, int may_fault) ++{ ++ int i; ++ unsigned char ch; ++ ++ if (may_fault) { ++ mem_err_expected = 1; ++ mem_err = 0; ++ } ++ for (i = 0; i < count; i++) { ++ /* printk("%lx = ", mem) ; */ ++ ++ ch = get_char(mem++); ++ ++ /* printk("%02x\n", ch & 0xFF) ; */ ++ if (may_fault && mem_err) { ++ if (remote_debug) ++ printk("Mem fault fetching from addr %lx\n", ++ (long) (mem - 1)); ++ *buf = 0; /* truncate buffer */ ++ return (buf); ++ } ++ *buf++ = hexchars[ch >> 4]; ++ *buf++ = hexchars[ch % 16]; ++ } ++ *buf = 0; ++ if (may_fault) ++ mem_err_expected = 0; ++ return (buf); ++} ++ ++/* convert the hex array pointed to by buf into binary to be placed in mem */ ++/* return a pointer to the character AFTER the last byte written */ ++/* NOTE: We use the may fault flag to also indicate if the write is to ++ * the registers (0) or "other" memory (!=0) ++ */ ++char * ++hex2mem(char *buf, char *mem, int count, int may_fault) ++{ ++ int i; ++ unsigned char ch; ++ ++ if (may_fault) { ++ mem_err_expected = 1; ++ mem_err = 0; ++ } ++ for (i = 0; i < count; i++) { ++ ch = hex(*buf++) << 4; ++ ch = ch + hex(*buf++); ++ set_char(mem++, ch, may_fault); ++ ++ if (may_fault && mem_err) { ++ if (remote_debug) ++ printk("Mem fault storing to addr %lx\n", ++ (long) (mem - 1)); ++ return (mem); ++ } ++ } ++ if (may_fault) ++ mem_err_expected = 0; ++ return (mem); ++} ++ ++/**********************************************/ ++/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ ++/* RETURN NUMBER OF CHARS PROCESSED */ ++/**********************************************/ ++int ++hexToInt(char **ptr, int *intValue) ++{ ++ int numChars = 0; ++ int hexValue; ++ ++ *intValue = 0; ++ ++ while (**ptr) { ++ hexValue = hex(**ptr); ++ if (hexValue >= 0) { ++ *intValue = (*intValue << 4) | hexValue; ++ numChars++; ++ } else ++ break; ++ ++ (*ptr)++; ++ } ++ ++ return (numChars); ++} ++ ++#define stubhex(h) hex(h) ++#ifdef old_thread_list ++ ++static int ++stub_unpack_int(char *buff, int fieldlength) ++{ ++ int nibble; ++ int retval = 0; ++ ++ while (fieldlength) { ++ nibble = stubhex(*buff++); ++ retval |= nibble; ++ fieldlength--; ++ if (fieldlength) ++ retval = retval << 4; ++ } ++ return retval; ++} ++#endif ++static char * ++pack_hex_byte(char *pkt, int byte) ++{ ++ *pkt++ = hexchars[(byte >> 4) & 0xf]; ++ *pkt++ = hexchars[(byte & 0xf)]; ++ return pkt; ++} ++ ++#define BUF_THREAD_ID_SIZE 16 ++ ++static char * ++pack_threadid(char *pkt, threadref * id) ++{ ++ char *limit; ++ unsigned char *altid; ++ ++ altid = (unsigned char *) id; ++ limit = pkt + BUF_THREAD_ID_SIZE; ++ while (pkt < limit) ++ pkt = pack_hex_byte(pkt, *altid++); ++ return pkt; ++} ++ ++#ifdef old_thread_list ++static char * ++unpack_byte(char *buf, int *value) ++{ ++ *value = stub_unpack_int(buf, 2); ++ return buf + 2; ++} ++ ++static char * ++unpack_threadid(char *inbuf, threadref * id) ++{ ++ char *altref; ++ char *limit = inbuf + BUF_THREAD_ID_SIZE; ++ int x, y; ++ ++ altref = (char *) id; ++ ++ while (inbuf < limit) { ++ x = stubhex(*inbuf++); ++ y = stubhex(*inbuf++); ++ *altref++ = (x << 4) | y; ++ } ++ return inbuf; ++} ++#endif ++void ++int_to_threadref(threadref * id, int value) ++{ ++ unsigned char *scan; ++ ++ scan = (unsigned char *) id; ++ { ++ int i = 4; ++ while (i--) ++ *scan++ = 0; ++ } ++ *scan++ = (value >> 24) & 0xff; ++ *scan++ = (value >> 16) & 0xff; ++ *scan++ = (value >> 8) & 0xff; ++ *scan++ = (value & 0xff); ++} ++int ++int_to_hex_v(unsigned char * id, int value) ++{ ++ unsigned char *start = id; ++ int shift; ++ int ch; ++ ++ for (shift = 28; shift >= 0; shift -= 4) { ++ if ((ch = (value >> shift) & 0xf) || (id != start)) { ++ *id = hexchars[ch]; ++ id++; ++ } ++ } ++ if (id == start) ++ *id++ = '0'; ++ return id - start; ++} ++#ifdef old_thread_list ++ ++static int ++threadref_to_int(threadref * ref) ++{ ++ int i, value = 0; ++ unsigned char *scan; ++ ++ scan = (char *) ref; ++ scan += 4; ++ i = 4; ++ while (i-- > 0) ++ value = (value << 8) | ((*scan++) & 0xff); ++ return value; ++} ++#endif ++static int ++cmp_str(char *s1, char *s2, int count) ++{ ++ while (count--) { ++ if (*s1++ != *s2++) ++ return 0; ++ } ++ return 1; ++} ++ ++#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ ++extern struct task_struct *kgdb_get_idle(int cpu); ++#define idle_task(cpu) kgdb_get_idle(cpu) ++#else ++#define idle_task(cpu) init_tasks[cpu] ++#endif ++ ++extern int kgdb_pid_init_done; ++ ++struct task_struct * ++getthread(int pid) ++{ ++ struct task_struct *thread; ++ if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { ++ ++ return idle_task(pid - PID_MAX); ++ } else { ++ /* ++ * find_task_by_pid is relatively safe all the time ++ * Other pid functions require lock downs which imply ++ * that we may be interrupting them (as we get here ++ * in the middle of most any lock down). ++ * Still we don't want to call until the table exists! ++ */ ++ if (kgdb_pid_init_done){ ++ thread = find_task_by_pid(pid); ++ if (thread) { ++ return thread; ++ } ++ } ++ } ++ return NULL; ++} ++/* *INDENT-OFF* */ ++struct hw_breakpoint { ++ unsigned enabled; ++ unsigned type; ++ unsigned len; ++ unsigned addr; ++} breakinfo[4] = { {enabled:0}, ++ {enabled:0}, ++ {enabled:0}, ++ {enabled:0}}; ++/* *INDENT-ON* */ ++unsigned hw_breakpoint_status; ++void ++correct_hw_break(void) ++{ ++ int breakno; ++ int correctit; ++ int breakbit; ++ unsigned dr7; ++ ++ asm volatile ("movl %%db7, %0\n":"=r" (dr7) ++ :); ++ /* *INDENT-OFF* */ ++ do { ++ unsigned addr0, addr1, addr2, addr3; ++ asm volatile ("movl %%db0, %0\n" ++ "movl %%db1, %1\n" ++ "movl %%db2, %2\n" ++ "movl %%db3, %3\n" ++ :"=r" (addr0), "=r"(addr1), ++ "=r"(addr2), "=r"(addr3) ++ :); ++ } while (0); ++ /* *INDENT-ON* */ ++ correctit = 0; ++ for (breakno = 0; breakno < 3; breakno++) { ++ breakbit = 2 << (breakno << 1); ++ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 |= breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ dr7 |= (((breakinfo[breakno].len << 2) | ++ breakinfo[breakno].type) << 16) << ++ (breakno << 2); ++ switch (breakno) { ++ case 0: ++ asm volatile ("movl %0, %%dr0\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ ++ case 1: ++ asm volatile ("movl %0, %%dr1\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ ++ case 2: ++ asm volatile ("movl %0, %%dr2\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ ++ case 3: ++ asm volatile ("movl %0, %%dr3\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ } ++ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 &= ~breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ } ++ } ++ if (correctit) { ++ asm volatile ("movl %0, %%db7\n"::"r" (dr7)); ++ } ++} ++ ++int ++remove_hw_break(unsigned breakno) ++{ ++ if (!breakinfo[breakno].enabled) { ++ return -1; ++ } ++ breakinfo[breakno].enabled = 0; ++ return 0; ++} ++ ++int ++set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) ++{ ++ if (breakinfo[breakno].enabled) { ++ return -1; ++ } ++ breakinfo[breakno].enabled = 1; ++ breakinfo[breakno].type = type; ++ breakinfo[breakno].len = len; ++ breakinfo[breakno].addr = addr; ++ return 0; ++} ++ ++#ifdef CONFIG_SMP ++static int in_kgdb_console = 0; ++ ++int ++in_kgdb(struct pt_regs *regs) ++{ ++ unsigned flags; ++ int cpu = smp_processor_id(); ++ in_kgdb_called = 1; ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ ++ in_kgdb_console) { /* or we are doing slow i/o */ ++ return 1; ++ } ++ return 0; ++ } ++ ++ /* As I see it the only reason not to let all cpus spin on ++ * the same spin_lock is to allow selected ones to proceed. ++ * This would be a good thing, so we leave it this way. ++ * Maybe someday.... Done ! ++ ++ * in_kgdb() is called from an NMI so we don't pretend ++ * to have any resources, like printk() for example. ++ */ ++ ++ kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ ++ /* ++ * log arival of this cpu ++ * The NMI keeps on ticking. Protect against recurring more ++ * than once, and ignor the cpu that has the kgdb lock ++ */ ++ in_kgdb_entry_log[cpu]++; ++ in_kgdb_here_log[cpu] = regs; ++ if (cpu == spinlock_cpu || waiting_cpus[cpu].task) ++ goto exit_in_kgdb; ++ ++ /* ++ * For protection of the initilization of the spin locks by kgdb ++ * it locks the kgdb spinlock before it gets the wait locks set ++ * up. We wait here for the wait lock to be taken. If the ++ * kgdb lock goes away first?? Well, it could be a slow exit ++ * sequence where the wait lock is removed prior to the kgdb lock ++ * so if kgdb gets unlocked, we just exit. ++ */ ++ ++ while (spin_is_locked(&kgdb_spinlock) && ++ !spin_is_locked(waitlocks + cpu)) ; ++ if (!spin_is_locked(&kgdb_spinlock)) ++ goto exit_in_kgdb; ++ ++ waiting_cpus[cpu].task = current; ++ waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); ++ waiting_cpus[cpu].regs = regs; ++ ++ spin_unlock_wait(waitlocks + cpu); ++ ++ /* ++ * log departure of this cpu ++ */ ++ waiting_cpus[cpu].task = 0; ++ waiting_cpus[cpu].pid = 0; ++ waiting_cpus[cpu].regs = 0; ++ correct_hw_break(); ++ exit_in_kgdb: ++ in_kgdb_here_log[cpu] = 0; ++ kgdb_local_irq_restore(flags); ++ return 1; ++ /* ++ spin_unlock(continuelocks + smp_processor_id()); ++ */ ++} ++ ++void ++smp__in_kgdb(struct pt_regs regs) ++{ ++ ack_APIC_irq(); ++ in_kgdb(®s); ++} ++#else ++int ++in_kgdb(struct pt_regs *regs) ++{ ++ return (kgdb_spinlock); ++} ++#endif ++ ++void ++printexceptioninfo(int exceptionNo, int errorcode, char *buffer) ++{ ++ unsigned dr6; ++ int i; ++ switch (exceptionNo) { ++ case 1: /* debug exception */ ++ break; ++ case 3: /* breakpoint */ ++ sprintf(buffer, "Software breakpoint"); ++ return; ++ default: ++ sprintf(buffer, "Details not available"); ++ return; ++ } ++ asm volatile ("movl %%db6, %0\n":"=r" (dr6) ++ :); ++ if (dr6 & 0x4000) { ++ sprintf(buffer, "Single step"); ++ return; ++ } ++ for (i = 0; i < 4; ++i) { ++ if (dr6 & (1 << i)) { ++ sprintf(buffer, "Hardware breakpoint %d", i); ++ return; ++ } ++ } ++ sprintf(buffer, "Unknown trap"); ++ return; ++} ++ ++/* ++ * This function does all command procesing for interfacing to gdb. ++ * ++ * NOTE: The INT nn instruction leaves the state of the interrupt ++ * enable flag UNCHANGED. That means that when this routine ++ * is entered via a breakpoint (INT 3) instruction from code ++ * that has interrupts enabled, then interrupts will STILL BE ++ * enabled when this routine is entered. The first thing that ++ * we do here is disable interrupts so as to prevent recursive ++ * entries and bothersome serial interrupts while we are ++ * trying to run the serial port in polled mode. ++ * ++ * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so ++ * it is always necessary to do a restore_flags before returning ++ * so as to let go of that lock. ++ */ ++int ++kgdb_handle_exception(int exceptionVector, ++ int signo, int err_code, struct pt_regs *linux_regs) ++{ ++ struct task_struct *usethread = NULL; ++ struct task_struct *thread_list_start = 0, *thread = NULL; ++ int addr, length; ++ int breakno, breaktype; ++ char *ptr; ++ int newPC; ++ threadref thref; ++ int threadid; ++ int thread_min = PID_MAX + MAX_NO_CPUS; ++#ifdef old_thread_list ++ int maxthreads; ++#endif ++ int nothreads; ++ unsigned long flags; ++ int gdb_regs[NUMREGBYTES / 4]; ++ int dr6; ++ IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ ++#define NO_NMI 1 ++#define NO_SYNC 2 ++#define regs (*linux_regs) ++#define NUMREGS NUMREGBYTES/4 ++ /* ++ * If the entry is not from the kernel then return to the Linux ++ * trap handler and let it process the interrupt normally. ++ */ ++ if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { ++ printk("ignoring non-kernel exception\n"); ++ print_regs(®s); ++ return (0); ++ } ++ /* ++ * If we're using eth mode, set the 'mode' in the netdevice. ++ */ ++ ++ if (kgdboe) ++ netpoll_set_trap(1); ++ ++ kgdb_local_irq_save(flags); ++ ++ /* Get kgdb spinlock */ ++ ++ KGDB_SPIN_LOCK(&kgdb_spinlock); ++ rdtscll(kgdb_info.entry_tsc); ++ /* ++ * We depend on this spinlock and the NMI watch dog to control the ++ * other cpus. They will arrive at "in_kgdb()" as a result of the ++ * NMI and will wait there for the following spin locks to be ++ * released. ++ */ ++#ifdef CONFIG_SMP ++ ++#if 0 ++ if (cpu_callout_map & ~MAX_CPU_MASK) { ++ printk("kgdb : too many cpus, possibly not mapped" ++ " in contiguous space, change MAX_NO_CPUS" ++ " in kgdb_stub and make new kernel.\n" ++ " cpu_callout_map is %lx\n", cpu_callout_map); ++ goto exit_just_unlock; ++ } ++#endif ++ if (spinlock_count == 1) { ++ int time, end_time, dum; ++ int i; ++ int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) ++ }; ++ if (remote_debug) { ++ printk("kgdb : cpu %d entry, syncing others\n", ++ smp_processor_id()); ++ } ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ /* ++ * Use trylock as we may already hold the lock if ++ * we are holding the cpu. Net result is all ++ * locked. ++ */ ++ spin_trylock(&waitlocks[i]); ++ } ++ for (i = 0; i < MAX_NO_CPUS; i++) ++ cpu_logged_in[i] = 0; ++ /* ++ * Wait for their arrival. We know the watch dog is active if ++ * in_kgdb() has ever been called, as it is always called on a ++ * watchdog tick. ++ */ ++ rdtsc(dum, time); ++ end_time = time + 2; /* Note: we use the High order bits! */ ++ i = 1; ++ if (num_online_cpus() > 1) { ++ int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; ++ smp_send_nmi_allbutself(); ++ ++ while (i < num_online_cpus() && time != end_time) { ++ int j; ++ for (j = 0; j < MAX_NO_CPUS; j++) { ++ if (waiting_cpus[j].task && ++ waiting_cpus[j].task != NOCPU && ++ !cpu_logged_in[j]) { ++ i++; ++ cpu_logged_in[j] = 1; ++ if (remote_debug) { ++ printk ++ ("kgdb : cpu %d arrived at kgdb\n", ++ j); ++ } ++ break; ++ } else if (!waiting_cpus[j].task && ++ !cpu_online(j)) { ++ waiting_cpus[j].task = NOCPU; ++ cpu_logged_in[j] = 1; ++ waiting_cpus[j].hold = 1; ++ break; ++ } ++ if (!waiting_cpus[j].task && ++ in_kgdb_here_log[j]) { ++ ++ int wait = 100000; ++ while (wait--) ; ++ if (!waiting_cpus[j].task && ++ in_kgdb_here_log[j]) { ++ printk ++ ("kgdb : cpu %d stall" ++ " in in_kgdb\n", ++ j); ++ i++; ++ cpu_logged_in[j] = 1; ++ waiting_cpus[j].task = ++ (struct task_struct ++ *) 1; ++ } ++ } ++ } ++ ++ if (in_kgdb_entry_log[smp_processor_id()] > ++ (me_in_kgdb + 10)) { ++ break; ++ } ++ ++ rdtsc(dum, time); ++ } ++ if (i < num_online_cpus()) { ++ printk ++ ("kgdb : time out, proceeding without sync\n"); ++#if 0 ++ printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", ++ waiting_cpus[0].task != 0, ++ waiting_cpus[1].task != 0); ++ printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", ++ cpu_logged_in[0], cpu_logged_in[1]); ++ printk ++ ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", ++ in_kgdb_here_log[0] != 0, ++ in_kgdb_here_log[1] != 0); ++#endif ++ entry_state = NO_SYNC; ++ } else { ++#if 0 ++ int ent = ++ in_kgdb_entry_log[smp_processor_id()] - ++ me_in_kgdb; ++ printk("kgdb : sync after %d entries\n", ent); ++#endif ++ } ++ } else { ++ if (remote_debug) { ++ printk ++ ("kgdb : %d cpus, but watchdog not active\n" ++ "proceeding without locking down other cpus\n", ++ num_online_cpus()); ++ entry_state = NO_NMI; ++ } ++ } ++ } ++#endif ++ ++ if (remote_debug) { ++ unsigned long *lp = (unsigned long *) &linux_regs; ++ ++ printk("handle_exception(exceptionVector=%d, " ++ "signo=%d, err_code=%d, linux_regs=%p)\n", ++ exceptionVector, signo, err_code, linux_regs); ++ if (debug_regs) { ++ print_regs(®s); ++ printk("Stk: %8lx %8lx %8lx %8lx" ++ " %8lx %8lx %8lx %8lx\n", ++ lp[0], lp[1], lp[2], lp[3], ++ lp[4], lp[5], lp[6], lp[7]); ++ printk(" %8lx %8lx %8lx %8lx" ++ " %8lx %8lx %8lx %8lx\n", ++ lp[8], lp[9], lp[10], lp[11], ++ lp[12], lp[13], lp[14], lp[15]); ++ printk(" %8lx %8lx %8lx %8lx " ++ "%8lx %8lx %8lx %8lx\n", ++ lp[16], lp[17], lp[18], lp[19], ++ lp[20], lp[21], lp[22], lp[23]); ++ printk(" %8lx %8lx %8lx %8lx " ++ "%8lx %8lx %8lx %8lx\n", ++ lp[24], lp[25], lp[26], lp[27], ++ lp[28], lp[29], lp[30], lp[31]); ++ } ++ } ++ ++ /* Disable hardware debugging while we are in kgdb */ ++ /* Get the debug register status register */ ++/* *INDENT-OFF* */ ++ __asm__("movl %0,%%db7" ++ : /* no output */ ++ :"r"(0)); ++ ++ asm volatile ("movl %%db6, %0\n" ++ :"=r" (hw_breakpoint_status) ++ :); ++ ++/* *INDENT-ON* */ ++ switch (exceptionVector) { ++ case 0: /* divide error */ ++ case 1: /* debug exception */ ++ case 2: /* NMI */ ++ case 3: /* breakpoint */ ++ case 4: /* overflow */ ++ case 5: /* bounds check */ ++ case 6: /* invalid opcode */ ++ case 7: /* device not available */ ++ case 8: /* double fault (errcode) */ ++ case 10: /* invalid TSS (errcode) */ ++ case 12: /* stack fault (errcode) */ ++ case 16: /* floating point error */ ++ case 17: /* alignment check (errcode) */ ++ default: /* any undocumented */ ++ break; ++ case 11: /* segment not present (errcode) */ ++ case 13: /* general protection (errcode) */ ++ case 14: /* page fault (special errcode) */ ++ case 19: /* cache flush denied */ ++ if (mem_err_expected) { ++ /* ++ * This fault occured because of the ++ * get_char or set_char routines. These ++ * two routines use either eax of edx to ++ * indirectly reference the location in ++ * memory that they are working with. ++ * For a page fault, when we return the ++ * instruction will be retried, so we ++ * have to make sure that these ++ * registers point to valid memory. ++ */ ++ mem_err = 1; /* set mem error flag */ ++ mem_err_expected = 0; ++ mem_err_cnt++; /* helps in debugging */ ++ /* make valid address */ ++ regs.eax = (long) &garbage_loc; ++ /* make valid address */ ++ regs.edx = (long) &garbage_loc; ++ if (remote_debug) ++ printk("Return after memory error: " ++ "mem_err_cnt=%d\n", mem_err_cnt); ++ if (debug_regs) ++ print_regs(®s); ++ goto exit_kgdb; ++ } ++ break; ++ } ++ if (remote_debug) ++ printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); ++ ++ gdb_i386vector = exceptionVector; ++ gdb_i386errcode = err_code; ++ kgdb_info.called_from = __builtin_return_address(0); ++#ifdef CONFIG_SMP ++ /* ++ * OK, we can now communicate, lets tell gdb about the sync. ++ * but only if we had a problem. ++ */ ++ switch (entry_state) { ++ case NO_NMI: ++ to_gdb("NMI not active, other cpus not stopped\n"); ++ break; ++ case NO_SYNC: ++ to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); ++ default:; ++ } ++ ++#endif ++/* ++ * Set up the gdb function call area. ++ */ ++ trap_cpu = smp_processor_id(); ++ OLD_esp = NEW_esp = (int) (&linux_regs->esp); ++ ++ IF_SMP(once_again:) ++ /* reply to host that an exception has occurred */ ++ remcomOutBuffer[0] = 'S'; ++ remcomOutBuffer[1] = hexchars[signo >> 4]; ++ remcomOutBuffer[2] = hexchars[signo % 16]; ++ remcomOutBuffer[3] = 0; ++ ++ putpacket(remcomOutBuffer); ++ ++ while (1 == 1) { ++ error = 0; ++ remcomOutBuffer[0] = 0; ++ getpacket(remcomInBuffer); ++ switch (remcomInBuffer[0]) { ++ case '?': ++ remcomOutBuffer[0] = 'S'; ++ remcomOutBuffer[1] = hexchars[signo >> 4]; ++ remcomOutBuffer[2] = hexchars[signo % 16]; ++ remcomOutBuffer[3] = 0; ++ break; ++ case 'd': ++ remote_debug = !(remote_debug); /* toggle debug flag */ ++ printk("Remote debug %s\n", ++ remote_debug ? "on" : "off"); ++ break; ++ case 'g': /* return the value of the CPU registers */ ++ get_gdb_regs(usethread, ®s, gdb_regs); ++ mem2hex((char *) gdb_regs, ++ remcomOutBuffer, NUMREGBYTES, 0); ++ break; ++ case 'G': /* set the value of the CPU registers - return OK */ ++ hex2mem(&remcomInBuffer[1], ++ (char *) gdb_regs, NUMREGBYTES, 0); ++ if (!usethread || usethread == current) { ++ gdb_regs_to_regs(gdb_regs, ®s); ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "E00"); ++ } ++ break; ++ ++ case 'P':{ /* set the value of a single CPU register - ++ return OK */ ++ /* ++ * For some reason, gdb wants to talk about psudo ++ * registers (greater than 15). These may have ++ * meaning for ptrace, but for us it is safe to ++ * ignor them. We do this by dumping them into ++ * _GS which we also ignor, but do have memory for. ++ */ ++ int regno; ++ ++ ptr = &remcomInBuffer[1]; ++ regs_to_gdb_regs(gdb_regs, ®s); ++ if ((!usethread || usethread == current) && ++ hexToInt(&ptr, ®no) && ++ *ptr++ == '=' && (regno >= 0)) { ++ regno = ++ (regno >= NUMREGS ? _GS : regno); ++ hex2mem(ptr, (char *) &gdb_regs[regno], ++ 4, 0); ++ gdb_regs_to_regs(gdb_regs, ®s); ++ strcpy(remcomOutBuffer, "OK"); ++ break; ++ } ++ strcpy(remcomOutBuffer, "E01"); ++ break; ++ } ++ ++ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ ++ case 'm': ++ /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr) && ++ (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { ++ ptr = 0; ++ /* ++ * hex doubles the byte count ++ */ ++ if (length > (BUFMAX / 2)) ++ length = BUFMAX / 2; ++ mem2hex((char *) addr, ++ remcomOutBuffer, length, 1); ++ if (mem_err) { ++ strcpy(remcomOutBuffer, "E03"); ++ debug_error("memory fault\n", NULL); ++ } ++ } ++ ++ if (ptr) { ++ strcpy(remcomOutBuffer, "E01"); ++ debug_error ++ ("malformed read memory command: %s\n", ++ remcomInBuffer); ++ } ++ break; ++ ++ /* MAA..AA,LLLL: ++ Write LLLL bytes at address AA.AA return OK */ ++ case 'M': ++ /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr) && ++ (*(ptr++) == ',') && ++ (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { ++ hex2mem(ptr, (char *) addr, length, 1); ++ ++ if (mem_err) { ++ strcpy(remcomOutBuffer, "E03"); ++ debug_error("memory fault\n", NULL); ++ } else { ++ strcpy(remcomOutBuffer, "OK"); ++ } ++ ++ ptr = 0; ++ } ++ if (ptr) { ++ strcpy(remcomOutBuffer, "E02"); ++ debug_error ++ ("malformed write memory command: %s\n", ++ remcomInBuffer); ++ } ++ break; ++ case 'S': ++ remcomInBuffer[0] = 's'; ++ case 'C': ++ /* Csig;AA..AA where ;AA..AA is optional ++ * continue with signal ++ * Since signals are meaning less to us, delete that ++ * part and then fall into the 'c' code. ++ */ ++ ptr = &remcomInBuffer[1]; ++ length = 2; ++ while (*ptr && *ptr != ';') { ++ length++; ++ ptr++; ++ } ++ if (*ptr) { ++ do { ++ ptr++; ++ *(ptr - length++) = *ptr; ++ } while (*ptr); ++ } else { ++ remcomInBuffer[1] = 0; ++ } ++ ++ /* cAA..AA Continue at address AA..AA(optional) */ ++ /* sAA..AA Step one instruction from AA..AA(optional) */ ++ /* D detach, reply OK and then continue */ ++ case 'c': ++ case 's': ++ case 'D': ++ ++ /* try to read optional parameter, ++ pc unchanged if no parm */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr)) { ++ if (remote_debug) ++ printk("Changing EIP to 0x%x\n", addr); ++ ++ regs.eip = addr; ++ } ++ ++ newPC = regs.eip; ++ ++ /* clear the trace bit */ ++ regs.eflags &= 0xfffffeff; ++ ++ /* set the trace bit if we're stepping */ ++ if (remcomInBuffer[0] == 's') ++ regs.eflags |= 0x100; ++ ++ /* detach is a friendly version of continue. Note that ++ debugging is still enabled (e.g hit control C) ++ */ ++ if (remcomInBuffer[0] == 'D') { ++ strcpy(remcomOutBuffer, "OK"); ++ putpacket(remcomOutBuffer); ++ } ++ ++ if (remote_debug) { ++ printk("Resuming execution\n"); ++ print_regs(®s); ++ } ++ asm volatile ("movl %%db6, %0\n":"=r" (dr6) ++ :); ++ if (!(dr6 & 0x4000)) { ++ for (breakno = 0; breakno < 4; ++breakno) { ++ if (dr6 & (1 << breakno) && ++ (breakinfo[breakno].type == 0)) { ++ /* Set restore flag */ ++ regs.eflags |= 0x10000; ++ break; ++ } ++ } ++ } ++ ++ if (kgdboe) ++ netpoll_set_trap(0); ++ ++ correct_hw_break(); ++ asm volatile ("movl %0, %%db6\n"::"r" (0)); ++ goto exit_kgdb; ++ ++ /* kill the program */ ++ case 'k': /* do nothing */ ++ break; ++ ++ /* query */ ++ case 'q': ++ nothreads = 0; ++ switch (remcomInBuffer[1]) { ++ case 'f': ++ threadid = 1; ++ thread_list = 2; ++ thread_list_start = (usethread ? : current); ++ case 's': ++ if (!cmp_str(&remcomInBuffer[2], ++ "ThreadInfo", 10)) ++ break; ++ ++ remcomOutBuffer[nothreads++] = 'm'; ++ for (; threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ thread = getthread(threadid); ++ if (thread) { ++ nothreads += int_to_hex_v( ++ &remcomOutBuffer[ ++ nothreads], ++ threadid); ++ if (thread_min > threadid) ++ thread_min = threadid; ++ remcomOutBuffer[ ++ nothreads] = ','; ++ nothreads++; ++ if (nothreads > BUFMAX - 10) ++ break; ++ } ++ } ++ if (remcomOutBuffer[nothreads - 1] == 'm') { ++ remcomOutBuffer[nothreads - 1] = 'l'; ++ } else { ++ nothreads--; ++ } ++ remcomOutBuffer[nothreads] = 0; ++ break; ++ ++#ifdef old_thread_list /* Old thread info request */ ++ case 'L': ++ /* List threads */ ++ thread_list = 2; ++ thread_list_start = (usethread ? : current); ++ unpack_byte(remcomInBuffer + 3, &maxthreads); ++ unpack_threadid(remcomInBuffer + 5, &thref); ++ do { ++ int buf_thread_limit = ++ (BUFMAX - 22) / BUF_THREAD_ID_SIZE; ++ if (maxthreads > buf_thread_limit) { ++ maxthreads = buf_thread_limit; ++ } ++ } while (0); ++ remcomOutBuffer[0] = 'q'; ++ remcomOutBuffer[1] = 'M'; ++ remcomOutBuffer[4] = '0'; ++ pack_threadid(remcomOutBuffer + 5, &thref); ++ ++ threadid = threadref_to_int(&thref); ++ for (nothreads = 0; ++ nothreads < maxthreads && ++ threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ thread = getthread(threadid); ++ if (thread) { ++ int_to_threadref(&thref, ++ threadid); ++ pack_threadid(remcomOutBuffer + ++ 21 + ++ nothreads * 16, ++ &thref); ++ nothreads++; ++ if (thread_min > threadid) ++ thread_min = threadid; ++ } ++ } ++ ++ if (threadid == PID_MAX + MAX_NO_CPUS) { ++ remcomOutBuffer[4] = '1'; ++ } ++ pack_hex_byte(remcomOutBuffer + 2, nothreads); ++ remcomOutBuffer[21 + nothreads * 16] = '\0'; ++ break; ++#endif ++ case 'C': ++ /* Current thread id */ ++ remcomOutBuffer[0] = 'Q'; ++ remcomOutBuffer[1] = 'C'; ++ threadid = current->pid; ++ if (!threadid) { ++ /* ++ * idle thread ++ */ ++ for (threadid = PID_MAX; ++ threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ if (current == ++ idle_task(threadid - ++ PID_MAX)) ++ break; ++ } ++ } ++ int_to_threadref(&thref, threadid); ++ pack_threadid(remcomOutBuffer + 2, &thref); ++ remcomOutBuffer[18] = '\0'; ++ break; ++ ++ case 'E': ++ /* Print exception info */ ++ printexceptioninfo(exceptionVector, ++ err_code, remcomOutBuffer); ++ break; ++ case 'T':{ ++ char * nptr; ++ /* Thread extra info */ ++ if (!cmp_str(&remcomInBuffer[2], ++ "hreadExtraInfo,", 15)) { ++ break; ++ } ++ ptr = &remcomInBuffer[17]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ nptr = &thread->comm[0]; ++ length = 0; ++ ptr = &remcomOutBuffer[0]; ++ do { ++ length++; ++ ptr = pack_hex_byte(ptr, *nptr++); ++ } while (*nptr && length < 16); ++ /* ++ * would like that 16 to be the size of ++ * task_struct.comm but don't know the ++ * syntax.. ++ */ ++ *ptr = 0; ++ } ++ } ++ break; ++ ++ /* task related */ ++ case 'H': ++ switch (remcomInBuffer[1]) { ++ case 'g': ++ ptr = &remcomInBuffer[2]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ if (!thread) { ++ remcomOutBuffer[0] = 'E'; ++ remcomOutBuffer[1] = '\0'; ++ break; ++ } ++ /* ++ * Just in case I forget what this is all about, ++ * the "thread info" command to gdb causes it ++ * to ask for a thread list. It then switches ++ * to each thread and asks for the registers. ++ * For this (and only this) usage, we want to ++ * fudge the registers of tasks not on the run ++ * list (i.e. waiting) to show the routine that ++ * called schedule. Also, gdb, is a minimalist ++ * in that if the current thread is the last ++ * it will not re-read the info when done. ++ * This means that in this case we must show ++ * the real registers. So here is how we do it: ++ * Each entry we keep track of the min ++ * thread in the list (the last that gdb will) ++ * get info for. We also keep track of the ++ * starting thread. ++ * "thread_list" is cleared when switching back ++ * to the min thread if it is was current, or ++ * if it was not current, thread_list is set ++ * to 1. When the switch to current comes, ++ * if thread_list is 1, clear it, else do ++ * nothing. ++ */ ++ usethread = thread; ++ if ((thread_list == 1) && ++ (thread == thread_list_start)) { ++ thread_list = 0; ++ } ++ if (thread_list && (threadid == thread_min)) { ++ if (thread == thread_list_start) { ++ thread_list = 0; ++ } else { ++ thread_list = 1; ++ } ++ } ++ /* follow through */ ++ case 'c': ++ remcomOutBuffer[0] = 'O'; ++ remcomOutBuffer[1] = 'K'; ++ remcomOutBuffer[2] = '\0'; ++ break; ++ } ++ break; ++ ++ /* Query thread status */ ++ case 'T': ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ if (thread) { ++ remcomOutBuffer[0] = 'O'; ++ remcomOutBuffer[1] = 'K'; ++ remcomOutBuffer[2] = '\0'; ++ if (thread_min > threadid) ++ thread_min = threadid; ++ } else { ++ remcomOutBuffer[0] = 'E'; ++ remcomOutBuffer[1] = '\0'; ++ } ++ break; ++ ++ case 'Y': /* set up a hardware breakpoint */ ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &breakno); ++ ptr++; ++ hexToInt(&ptr, &breaktype); ++ ptr++; ++ hexToInt(&ptr, &length); ++ ptr++; ++ hexToInt(&ptr, &addr); ++ if (set_hw_break(breakno & 0x3, ++ breaktype & 0x3, ++ length & 0x3, addr) == 0) { ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "ERROR"); ++ } ++ break; ++ ++ /* Remove hardware breakpoint */ ++ case 'y': ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &breakno); ++ if (remove_hw_break(breakno & 0x3) == 0) { ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "ERROR"); ++ } ++ break; ++ ++ case 'r': /* reboot */ ++ strcpy(remcomOutBuffer, "OK"); ++ putpacket(remcomOutBuffer); ++ /*to_gdb("Rebooting\n"); */ ++ /* triplefault no return from here */ ++ { ++ static long no_idt[2]; ++ __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); ++ BREAKPOINT; ++ } ++ ++ } /* switch */ ++ ++ /* reply to the request */ ++ putpacket(remcomOutBuffer); ++ } /* while(1==1) */ ++ /* ++ * reached by goto only. ++ */ ++ exit_kgdb: ++ /* ++ * Here is where we set up to trap a gdb function call. NEW_esp ++ * will be changed if we are trying to do this. We handle both ++ * adding and subtracting, thus allowing gdb to put grung on ++ * the stack which it removes later. ++ */ ++ if (NEW_esp != OLD_esp) { ++ int *ptr = END_OF_LOOKASIDE; ++ if (NEW_esp < OLD_esp) ++ ptr -= (OLD_esp - NEW_esp) / sizeof (int); ++ *--ptr = linux_regs->eflags; ++ *--ptr = linux_regs->xcs; ++ *--ptr = linux_regs->eip; ++ *--ptr = linux_regs->ecx; ++ *--ptr = linux_regs->ebx; ++ *--ptr = linux_regs->eax; ++ linux_regs->ecx = NEW_esp - (sizeof (int) * 6); ++ linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; ++ if (NEW_esp < OLD_esp) { ++ linux_regs->eip = (unsigned int) fn_call_stub; ++ } else { ++ linux_regs->eip = (unsigned int) fn_rtn_stub; ++ linux_regs->eax = NEW_esp; ++ } ++ linux_regs->eflags &= ~(IF_BIT | TF_BIT); ++ } ++#ifdef CONFIG_SMP ++ /* ++ * Release gdb wait locks ++ * Sanity check time. Must have at least one cpu to run. Also single ++ * step must not be done if the current cpu is on hold. ++ */ ++ if (spinlock_count == 1) { ++ int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; ++ int cpu_avail = 0; ++ int i; ++ ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (!cpu_online(i)) ++ break; ++ if (!hold_cpu(i)) { ++ cpu_avail = 1; ++ } ++ } ++ /* ++ * Early in the bring up there will be NO cpus on line... ++ */ ++ if (!cpu_avail && !cpus_empty(cpu_online_map)) { ++ to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); ++ goto once_again; ++ } ++ if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { ++ to_gdb ++ ("Current cpu must be unblocked to single step\n"); ++ goto once_again; ++ } ++ if (!(ss_hold)) { ++ int i; ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (!hold_cpu(i)) { ++ spin_unlock(&waitlocks[i]); ++ } ++ } ++ } else { ++ spin_unlock(&waitlocks[smp_processor_id()]); ++ } ++ /* Release kgdb spinlock */ ++ KGDB_SPIN_UNLOCK(&kgdb_spinlock); ++ /* ++ * If this cpu is on hold, this is where we ++ * do it. Note, the NMI will pull us out of here, ++ * but will return as the above lock is not held. ++ * We will stay here till another cpu releases the lock for us. ++ */ ++ spin_unlock_wait(waitlocks + smp_processor_id()); ++ kgdb_local_irq_restore(flags); ++ return (0); ++ } ++#if 0 ++exit_just_unlock: ++#endif ++#endif ++ /* Release kgdb spinlock */ ++ KGDB_SPIN_UNLOCK(&kgdb_spinlock); ++ kgdb_local_irq_restore(flags); ++ return (0); ++} ++ ++/* this function is used to set up exception handlers for tracing and ++ * breakpoints. ++ * This function is not needed as the above line does all that is needed. ++ * We leave it for backward compatitability... ++ */ ++void ++set_debug_traps(void) ++{ ++ /* ++ * linux_debug_hook is defined in traps.c. We store a pointer ++ * to our own exception handler into it. ++ ++ * But really folks, every hear of labeled common, an old Fortran ++ * concept. Lots of folks can reference it and it is define if ++ * anyone does. Only one can initialize it at link time. We do ++ * this with the hook. See the statement above. No need for any ++ * executable code and it is ready as soon as the kernel is ++ * loaded. Very desirable in kernel debugging. ++ ++ linux_debug_hook = handle_exception ; ++ */ ++ ++ /* In case GDB is started before us, ack any packets (presumably ++ "$?#xx") sitting there. ++ putDebugChar ('+'); ++ ++ initialized = 1; ++ */ ++} ++ ++/* This function will generate a breakpoint exception. It is used at the ++ beginning of a program to sync up with a debugger and can be used ++ otherwise as a quick means to stop program execution and "break" into ++ the debugger. */ ++/* But really, just use the BREAKPOINT macro. We will handle the int stuff ++ */ ++ ++#ifdef later ++/* ++ * possibly we should not go thru the traps.c code at all? Someday. ++ */ ++void ++do_kgdb_int3(struct pt_regs *regs, long error_code) ++{ ++ kgdb_handle_exception(3, 5, error_code, regs); ++ return; ++} ++#endif ++#undef regs ++#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS ++asmlinkage void ++bad_sys_call_exit(int stuff) ++{ ++ struct pt_regs *regs = (struct pt_regs *) &stuff; ++ printk("Sys call %d return with %x preempt_count\n", ++ (int) regs->orig_eax, preempt_count()); ++} ++#endif ++#ifdef CONFIG_STACK_OVERFLOW_TEST ++#include ++asmlinkage void ++stack_overflow(void) ++{ ++#ifdef BREAKPOINT ++ BREAKPOINT; ++#else ++ printk("Kernel stack overflow, looping forever\n"); ++#endif ++ while (1) { ++ } ++} ++#endif ++ ++#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) ++char gdbconbuf[BUFMAX]; ++ ++static void ++kgdb_gdb_message(const char *s, unsigned count) ++{ ++ int i; ++ int wcount; ++ char *bufptr; ++ /* ++ * This takes care of NMI while spining out chars to gdb ++ */ ++ IF_SMP(in_kgdb_console = 1); ++ gdbconbuf[0] = 'O'; ++ bufptr = gdbconbuf + 1; ++ while (count > 0) { ++ if ((count << 1) > (BUFMAX - 2)) { ++ wcount = (BUFMAX - 2) >> 1; ++ } else { ++ wcount = count; ++ } ++ count -= wcount; ++ for (i = 0; i < wcount; i++) { ++ bufptr = pack_hex_byte(bufptr, s[i]); ++ } ++ *bufptr = '\0'; ++ s += wcount; ++ ++ putpacket(gdbconbuf); ++ ++ } ++ IF_SMP(in_kgdb_console = 0); ++} ++#endif ++#ifdef CONFIG_SMP ++static void ++to_gdb(const char *s) ++{ ++ int count = 0; ++ while (s[count] && (count++ < BUFMAX)) ; ++ kgdb_gdb_message(s, count); ++} ++#endif ++#ifdef CONFIG_KGDB_CONSOLE ++#include ++#include ++#include ++#include ++#include ++ ++void ++kgdb_console_write(struct console *co, const char *s, unsigned count) ++{ ++ ++ if (gdb_i386vector == -1) { ++ /* ++ * We have not yet talked to gdb. What to do... ++ * lets break, on continue we can do the write. ++ * But first tell him whats up. Uh, well no can do, ++ * as this IS the console. Oh well... ++ * We do need to wait or the messages will be lost. ++ * Other option would be to tell the above code to ++ * ignore this breakpoint and do an auto return, ++ * but that might confuse gdb. Also this happens ++ * early enough in boot up that we don't have the traps ++ * set up yet, so... ++ */ ++ breakpoint(); ++ } ++ kgdb_gdb_message(s, count); ++} ++ ++/* ++ * ------------------------------------------------------------ ++ * Serial KGDB driver ++ * ------------------------------------------------------------ ++ */ ++ ++static struct console kgdbcons = { ++ name:"kgdb", ++ write:kgdb_console_write, ++#ifdef CONFIG_KGDB_USER_CONSOLE ++ device:kgdb_console_device, ++#endif ++ flags:CON_PRINTBUFFER | CON_ENABLED, ++ index:-1, ++}; ++ ++/* ++ * The trick here is that this file gets linked before printk.o ++ * That means we get to peer at the console info in the command ++ * line before it does. If we are up, we register, otherwise, ++ * do nothing. By returning 0, we allow printk to look also. ++ */ ++static int kgdb_console_enabled; ++ ++int __init ++kgdb_console_init(char *str) ++{ ++ if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { ++ register_console(&kgdbcons); ++ kgdb_console_enabled = 1; ++ } ++ return 0; /* let others look at the string */ ++} ++ ++__setup("console=", kgdb_console_init); ++ ++#ifdef CONFIG_KGDB_USER_CONSOLE ++static kdev_t kgdb_console_device(struct console *c); ++/* This stuff sort of works, but it knocks out telnet devices ++ * we are leaving it here in case we (or you) find time to figure it out ++ * better.. ++ */ ++ ++/* ++ * We need a real char device as well for when the console is opened for user ++ * space activities. ++ */ ++ ++static int ++kgdb_consdev_open(struct inode *inode, struct file *file) ++{ ++ return 0; ++} ++ ++static ssize_t ++kgdb_consdev_write(struct file *file, const char *buf, ++ size_t count, loff_t * ppos) ++{ ++ int size, ret = 0; ++ static char kbuf[128]; ++ static DECLARE_MUTEX(sem); ++ ++ /* We are not reentrant... */ ++ if (down_interruptible(&sem)) ++ return -ERESTARTSYS; ++ ++ while (count > 0) { ++ /* need to copy the data from user space */ ++ size = count; ++ if (size > sizeof (kbuf)) ++ size = sizeof (kbuf); ++ if (copy_from_user(kbuf, buf, size)) { ++ ret = -EFAULT; ++ break;; ++ } ++ kgdb_console_write(&kgdbcons, kbuf, size); ++ count -= size; ++ ret += size; ++ buf += size; ++ } ++ ++ up(&sem); ++ ++ return ret; ++} ++ ++struct file_operations kgdb_consdev_fops = { ++ open:kgdb_consdev_open, ++ write:kgdb_consdev_write ++}; ++static kdev_t ++kgdb_console_device(struct console *c) ++{ ++ return MKDEV(TTYAUX_MAJOR, 1); ++} ++ ++/* ++ * This routine gets called from the serial stub in the i386/lib ++ * This is so it is done late in bring up (just before the console open). ++ */ ++void ++kgdb_console_finit(void) ++{ ++ if (kgdb_console_enabled) { ++ char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); ++ char *cp = cptr; ++ while (*cptr && *cptr != '(') ++ cptr++; ++ *cptr = 0; ++ unregister_chrdev(TTYAUX_MAJOR, cp); ++ register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); ++ } ++} ++#endif ++#endif ++#ifdef CONFIG_KGDB_TS ++#include /* time stamp code */ ++#include /* in_interrupt */ ++#ifdef CONFIG_KGDB_TS_64 ++#define DATA_POINTS 64 ++#endif ++#ifdef CONFIG_KGDB_TS_128 ++#define DATA_POINTS 128 ++#endif ++#ifdef CONFIG_KGDB_TS_256 ++#define DATA_POINTS 256 ++#endif ++#ifdef CONFIG_KGDB_TS_512 ++#define DATA_POINTS 512 ++#endif ++#ifdef CONFIG_KGDB_TS_1024 ++#define DATA_POINTS 1024 ++#endif ++#ifndef DATA_POINTS ++#define DATA_POINTS 128 /* must be a power of two */ ++#endif ++#define INDEX_MASK (DATA_POINTS - 1) ++#if (INDEX_MASK & DATA_POINTS) ++#error "CONFIG_KGDB_TS_COUNT must be a power of 2" ++#endif ++struct kgdb_and_then_struct { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ struct task_struct *task; ++ long long at_time; ++ int from_ln; ++ char *in_src; ++ void *from; ++ int *with_shpf; ++ int data0; ++ int data1; ++}; ++struct kgdb_and_then_struct2 { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ struct task_struct *task; ++ long long at_time; ++ int from_ln; ++ char *in_src; ++ void *from; ++ int *with_shpf; ++ struct task_struct *t1; ++ struct task_struct *t2; ++}; ++struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; ++ ++struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; ++int kgdb_and_then_count; ++ ++void ++kgdb_tstamp(int line, char *source, int data0, int data1) ++{ ++ static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; ++ int flags; ++ kgdb_local_irq_save(flags); ++ spin_lock(&ts_spin); ++ rdtscll(kgdb_and_then->at_time); ++#ifdef CONFIG_SMP ++ kgdb_and_then->on_cpu = smp_processor_id(); ++#endif ++ kgdb_and_then->task = current; ++ kgdb_and_then->from_ln = line; ++ kgdb_and_then->in_src = source; ++ kgdb_and_then->from = __builtin_return_address(0); ++ kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | ++ (preempt_count() << 8)); ++ kgdb_and_then->data0 = data0; ++ kgdb_and_then->data1 = data1; ++ kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; ++ spin_unlock(&ts_spin); ++ kgdb_local_irq_restore(flags); ++#ifdef CONFIG_PREEMPT ++ ++#endif ++ return; ++} ++#endif ++typedef int gdb_debug_hook(int exceptionVector, ++ int signo, int err_code, struct pt_regs *linux_regs); ++gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ ++ ++static int kgdb_need_breakpoint[NR_CPUS]; ++ ++void kgdb_schedule_breakpoint(void) ++{ ++ kgdb_need_breakpoint[smp_processor_id()] = 1; ++} ++ ++void kgdb_process_breakpoint(void) ++{ ++ /* ++ * Handle a breakpoint queued from inside network driver code ++ * to avoid reentrancy issues ++ */ ++ if (kgdb_need_breakpoint[smp_processor_id()]) { ++ kgdb_need_breakpoint[smp_processor_id()] = 0; ++ BREAKPOINT; ++ } ++} ++ +--- linux-2.6.0/arch/i386/kernel/ldt.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/ldt.c 2003-12-28 23:26:36.000000000 -0800 +@@ -2,7 +2,7 @@ + * linux/kernel/ldt.c + * + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds +- * Copyright (C) 1999 Ingo Molnar ++ * Copyright (C) 1999, 2003 Ingo Molnar + */ + + #include +@@ -18,6 +18,8 @@ + #include + #include + #include ++#include ++#include + + #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ + static void flush_ldt(void *null) +@@ -29,34 +31,31 @@ static void flush_ldt(void *null) + + static int alloc_ldt(mm_context_t *pc, int mincount, int reload) + { +- void *oldldt; +- void *newldt; +- int oldsize; ++ int oldsize, newsize, i; + + if (mincount <= pc->size) + return 0; ++ /* ++ * LDT got larger - reallocate if necessary. ++ */ + oldsize = pc->size; + mincount = (mincount+511)&(~511); +- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) +- newldt = vmalloc(mincount*LDT_ENTRY_SIZE); +- else +- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); +- +- if (!newldt) +- return -ENOMEM; +- +- if (oldsize) +- memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); +- oldldt = pc->ldt; +- memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); +- pc->ldt = newldt; +- wmb(); ++ newsize = mincount*LDT_ENTRY_SIZE; ++ for (i = 0; i < newsize; i += PAGE_SIZE) { ++ int nr = i/PAGE_SIZE; ++ BUG_ON(i >= 64*1024); ++ if (!pc->ldt_pages[nr]) { ++ pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER); ++ if (!pc->ldt_pages[nr]) ++ return -ENOMEM; ++ clear_highpage(pc->ldt_pages[nr]); ++ } ++ } + pc->size = mincount; +- wmb(); +- + if (reload) { + #ifdef CONFIG_SMP + cpumask_t mask; ++ + preempt_disable(); + load_LDT(pc); + mask = cpumask_of_cpu(smp_processor_id()); +@@ -67,21 +66,20 @@ static int alloc_ldt(mm_context_t *pc, i + load_LDT(pc); + #endif + } +- if (oldsize) { +- if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) +- vfree(oldldt); +- else +- kfree(oldldt); +- } + return 0; + } + + static inline int copy_ldt(mm_context_t *new, mm_context_t *old) + { +- int err = alloc_ldt(new, old->size, 0); +- if (err < 0) ++ int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE; ++ ++ err = alloc_ldt(new, size, 0); ++ if (err < 0) { ++ new->size = 0; + return err; +- memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); ++ } ++ for (i = 0; i < nr_pages; i++) ++ copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0); + return 0; + } + +@@ -96,6 +94,7 @@ int init_new_context(struct task_struct + + init_MUTEX(&mm->context.sem); + mm->context.size = 0; ++ memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES); + old_mm = current->mm; + if (old_mm && old_mm->context.size > 0) { + down(&old_mm->context.sem); +@@ -107,23 +106,21 @@ int init_new_context(struct task_struct + + /* + * No need to lock the MM as we are the last user ++ * Do not touch the ldt register, we are already ++ * in the next thread. + */ + void destroy_context(struct mm_struct *mm) + { +- if (mm->context.size) { +- if (mm == current->active_mm) +- clear_LDT(); +- if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) +- vfree(mm->context.ldt); +- else +- kfree(mm->context.ldt); +- mm->context.size = 0; +- } ++ int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; ++ ++ for (i = 0; i < nr_pages; i++) ++ __free_page(mm->context.ldt_pages[i]); ++ mm->context.size = 0; + } + + static int read_ldt(void __user * ptr, unsigned long bytecount) + { +- int err; ++ int err, i; + unsigned long size; + struct mm_struct * mm = current->mm; + +@@ -138,8 +135,25 @@ static int read_ldt(void __user * ptr, u + size = bytecount; + + err = 0; +- if (copy_to_user(ptr, mm->context.ldt, size)) +- err = -EFAULT; ++ /* ++ * This is necessary just in case we got here straight from a ++ * context-switch where the ptes were set but no tlb flush ++ * was done yet. We rather avoid doing a TLB flush in the ++ * context-switch path and do it here instead. ++ */ ++ __flush_tlb_global(); ++ ++ for (i = 0; i < size; i += PAGE_SIZE) { ++ int nr = i / PAGE_SIZE, bytes; ++ char *kaddr = kmap(mm->context.ldt_pages[nr]); ++ ++ bytes = size - i; ++ if (bytes > PAGE_SIZE) ++ bytes = PAGE_SIZE; ++ if (copy_to_user(ptr + i, kaddr, size - i)) ++ err = -EFAULT; ++ kunmap(mm->context.ldt_pages[nr]); ++ } + up(&mm->context.sem); + if (err < 0) + return err; +@@ -158,7 +172,7 @@ static int read_default_ldt(void __user + + err = 0; + address = &default_ldt[0]; +- size = 5*sizeof(struct desc_struct); ++ size = 5*LDT_ENTRY_SIZE; + if (size > bytecount) + size = bytecount; + +@@ -200,7 +214,15 @@ static int write_ldt(void __user * ptr, + goto out_unlock; + } + +- lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); ++ /* ++ * No rescheduling allowed from this point to the install. ++ * ++ * We do a TLB flush for the same reason as in the read_ldt() path. ++ */ ++ preempt_disable(); ++ __flush_tlb_global(); ++ lp = (__u32 *) ((ldt_info.entry_number << 3) + ++ (char *) __kmap_atomic_vaddr(KM_LDT_PAGE0)); + + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { +@@ -221,6 +243,7 @@ install: + *lp = entry_1; + *(lp+1) = entry_2; + error = 0; ++ preempt_enable(); + + out_unlock: + up(&mm->context.sem); +@@ -248,3 +271,26 @@ asmlinkage int sys_modify_ldt(int func, + } + return ret; + } ++ ++/* ++ * load one particular LDT into the current CPU ++ */ ++void load_LDT_nolock(mm_context_t *pc, int cpu) ++{ ++ struct page **pages = pc->ldt_pages; ++ int count = pc->size; ++ int nr_pages, i; ++ ++ if (likely(!count)) { ++ pages = &default_ldt_page; ++ count = 5; ++ } ++ nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; ++ ++ for (i = 0; i < nr_pages; i++) { ++ __kunmap_atomic_type(KM_LDT_PAGE0 - i); ++ __kmap_atomic(pages[i], KM_LDT_PAGE0 - i); ++ } ++ set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count); ++ load_LDT_desc(); ++} +--- linux-2.6.0/arch/i386/kernel/Makefile 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/Makefile 2003-12-28 23:26:36.000000000 -0800 +@@ -7,13 +7,14 @@ extra-y := head.o init_task.o vmlinux.ld + obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ + ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ + pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ +- doublefault.o ++ doublefault.o entry_trampoline.o + + obj-y += cpu/ + obj-y += timers/ + obj-$(CONFIG_ACPI_BOOT) += acpi/ + obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o + obj-$(CONFIG_MCA) += mca.o ++obj-$(CONFIG_KGDB) += kgdb_stub.o + obj-$(CONFIG_X86_MSR) += msr.o + obj-$(CONFIG_X86_CPUID) += cpuid.o + obj-$(CONFIG_MICROCODE) += microcode.o +@@ -24,12 +25,13 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o + obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o + obj-$(CONFIG_X86_IO_APIC) += io_apic.o + obj-$(CONFIG_X86_NUMAQ) += numaq.o +-obj-$(CONFIG_X86_SUMMIT) += summit.o ++obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o + obj-$(CONFIG_EDD) += edd.o + obj-$(CONFIG_MODULES) += module.o + obj-y += sysenter.o vsyscall.o + obj-$(CONFIG_ACPI_SRAT) += srat.o + obj-$(CONFIG_HPET_TIMER) += time_hpet.o ++obj-$(CONFIG_EFI) += efi.o efi_stub.o + + EXTRA_AFLAGS := -traditional + +--- linux-2.6.0/arch/i386/kernel/mpparse.c 2003-11-23 19:03:00.000000000 -0800 ++++ 25/arch/i386/kernel/mpparse.c 2003-12-28 23:26:36.000000000 -0800 +@@ -668,7 +668,7 @@ void __init get_smp_config (void) + * Read the physical hardware table. Anything here will + * override the defaults. + */ +- if (!smp_read_mpc((void *)mpf->mpf_physptr)) { ++ if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) { + smp_found_config = 0; + printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); + printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); +@@ -962,7 +962,8 @@ void __init mp_override_legacy_irq ( + */ + for (i = 0; i < mp_irq_entries; i++) { + if ((mp_irqs[i].mpc_dstapic == intsrc.mpc_dstapic) +- && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) { ++ && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq) ++ && (mp_irqs[i].mpc_irqtype == intsrc.mpc_irqtype)) { + mp_irqs[i] = intsrc; + found = 1; + break; +@@ -1081,8 +1082,14 @@ found: + + ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start; + ++ /* ++ * MPS INTI flags: ++ * trigger: 0=default, 1=edge, 3=level ++ * polarity: 0=default, 1=high, 3=low ++ * Per ACPI spec, default for SCI means level/low. ++ */ + io_apic_set_pci_routing(ioapic, ioapic_pin, irq, +- (flags.trigger >> 1) , (flags.polarity >> 1)); ++ (flags.trigger == 1 ? 0 : 1), (flags.polarity == 1 ? 0 : 1)); + } + + #ifdef CONFIG_ACPI_PCI +@@ -1129,8 +1136,11 @@ void __init mp_parse_prt (void) + continue; + ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start; + +- if (!ioapic && (irq < 16)) +- irq += 16; ++ if (es7000_plat) { ++ if (!ioapic && (irq < 16)) ++ irq += 16; ++ } ++ + /* + * Avoid pin reprogramming. PRTs typically include entries + * with redundant pin->irq mappings (but unique PCI devices); +@@ -1147,21 +1157,29 @@ void __init mp_parse_prt (void) + if ((1<irq = irq; ++ if (use_pci_vector() && !platform_legacy_irq(irq)) ++ irq = IO_APIC_VECTOR(irq); ++ entry->irq = irq; + continue; + } + + mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<irq = irq; +- ++ if (!io_apic_set_pci_routing(ioapic, ioapic_pin, irq, edge_level, active_high_low)) { ++ if (use_pci_vector() && !platform_legacy_irq(irq)) ++ irq = IO_APIC_VECTOR(irq); ++ entry->irq = irq; ++ } + printk(KERN_DEBUG "%02x:%02x:%02x[%c] -> %d-%d -> IRQ %d\n", + entry->id.segment, entry->id.bus, + entry->id.device, ('A' + entry->pin), + mp_ioapic_routing[ioapic].apic_id, ioapic_pin, + entry->irq); + } ++ ++ print_IO_APIC(); ++ ++ return; + } + + #endif /*CONFIG_ACPI_PCI*/ +--- linux-2.6.0/arch/i386/kernel/nmi.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/nmi.c 2003-12-28 23:21:06.000000000 -0800 +@@ -31,7 +31,16 @@ + #include + #include + ++#ifdef CONFIG_KGDB ++#include ++#ifdef CONFIG_SMP ++unsigned int nmi_watchdog = NMI_IO_APIC; ++#else ++unsigned int nmi_watchdog = NMI_LOCAL_APIC; ++#endif ++#else + unsigned int nmi_watchdog = NMI_NONE; ++#endif + static unsigned int nmi_hz = HZ; + unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ + extern void show_registers(struct pt_regs *regs); +@@ -408,6 +417,9 @@ void touch_nmi_watchdog (void) + for (i = 0; i < NR_CPUS; i++) + alert_counter[i] = 0; + } ++#ifdef CONFIG_KGDB ++int tune_watchdog = 5*HZ; ++#endif + + void nmi_watchdog_tick (struct pt_regs * regs) + { +@@ -421,12 +433,24 @@ void nmi_watchdog_tick (struct pt_regs * + + sum = irq_stat[cpu].apic_timer_irqs; + ++#ifdef CONFIG_KGDB ++ if (! in_kgdb(regs) && last_irq_sums[cpu] == sum ) { ++ ++#else + if (last_irq_sums[cpu] == sum) { ++#endif + /* + * Ayiee, looks like this CPU is stuck ... + * wait a few IRQs (5 seconds) before doing the oops ... + */ + alert_counter[cpu]++; ++#ifdef CONFIG_KGDB ++ if (alert_counter[cpu] == tune_watchdog) { ++ kgdb_handle_exception(2, SIGPWR, 0, regs); ++ last_irq_sums[cpu] = sum; ++ alert_counter[cpu] = 0; ++ } ++#endif + if (alert_counter[cpu] == 5*nmi_hz) { + spin_lock(&nmi_print_lock); + /* +--- linux-2.6.0/arch/i386/kernel/process.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/process.c 2003-12-28 23:26:36.000000000 -0800 +@@ -47,6 +47,7 @@ + #include + #include + #include ++#include + #ifdef CONFIG_MATH_EMULATION + #include + #endif +@@ -302,6 +303,9 @@ void flush_thread(void) + struct task_struct *tsk = current; + + memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); ++#ifdef CONFIG_X86_HIGH_ENTRY ++ clear_thread_flag(TIF_DB7); ++#endif + memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); + /* + * Forget coprocessor state.. +@@ -315,9 +319,8 @@ void release_thread(struct task_struct * + if (dead_task->mm) { + // temporary debugging check + if (dead_task->mm->context.size) { +- printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", ++ printk("WARNING: dead process %8s still has LDT? <%d>\n", + dead_task->comm, +- dead_task->mm->context.ldt, + dead_task->mm->context.size); + BUG(); + } +@@ -352,7 +355,17 @@ int copy_thread(int nr, unsigned long cl + p->thread.esp = (unsigned long) childregs; + p->thread.esp0 = (unsigned long) (childregs+1); + ++ /* ++ * get the two stack pages, for the virtual stack. ++ * ++ * IMPORTANT: this code relies on the fact that the task ++ * structure is an 8K aligned piece of physical memory. ++ */ ++ p->thread.stack_page0 = virt_to_page((unsigned long)p->thread_info); ++ p->thread.stack_page1 = virt_to_page((unsigned long)p->thread_info + PAGE_SIZE); ++ + p->thread.eip = (unsigned long) ret_from_fork; ++ p->thread_info->real_stack = p->thread_info; + + savesegment(fs,p->thread.fs); + savesegment(gs,p->thread.gs); +@@ -504,10 +517,41 @@ struct task_struct * __switch_to(struct + + __unlazy_fpu(prev_p); + ++#ifdef CONFIG_X86_HIGH_ENTRY ++ /* ++ * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is ++ * needed because otherwise NMIs could interrupt the ++ * user-return code with a virtual stack and stale TLBs.) ++ */ ++ __kunmap_atomic_type(KM_VSTACK0); ++ __kunmap_atomic_type(KM_VSTACK1); ++ __kmap_atomic(next->stack_page0, KM_VSTACK0); ++ __kmap_atomic(next->stack_page1, KM_VSTACK1); ++ ++ /* ++ * NOTE: here we rely on the task being the stack as well ++ */ ++ next_p->thread_info->virtual_stack = ++ (void *)__kmap_atomic_vaddr(KM_VSTACK0); ++ ++#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) ++ /* ++ * If next was preempted on entry from userspace to kernel, ++ * and now it's on a different cpu, we need to adjust %esp. ++ * This assumes that entry.S does not copy %esp while on the ++ * virtual stack (with interrupts enabled): which is so, ++ * except within __SWITCH_KERNELSPACE itself. ++ */ ++ if (unlikely(next->esp >= TASK_SIZE)) { ++ next->esp &= THREAD_SIZE - 1; ++ next->esp |= (unsigned long) next_p->thread_info->virtual_stack; ++ } ++#endif ++#endif + /* + * Reload esp0, LDT and the page table pointer: + */ +- load_esp0(tss, next->esp0); ++ load_virtual_esp0(tss, next_p); + + /* + * Load the per-thread Thread-Local Storage descriptor. +--- linux-2.6.0/arch/i386/kernel/reboot.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/reboot.c 2003-12-28 23:26:36.000000000 -0800 +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + #include + #include + #include "mach_reboot.h" +@@ -154,12 +155,11 @@ void machine_real_restart(unsigned char + CMOS_WRITE(0x00, 0x8f); + spin_unlock_irqrestore(&rtc_lock, flags); + +- /* Remap the kernel at virtual address zero, as well as offset zero +- from the kernel segment. This assumes the kernel segment starts at +- virtual address PAGE_OFFSET. */ +- +- memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, +- sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); ++ /* ++ * Remap the first 16 MB of RAM (which includes the kernel image) ++ * at virtual address zero: ++ */ ++ setup_identity_mappings(swapper_pg_dir, 0, 16*1024*1024); + + /* + * Use `swapper_pg_dir' as our page directory. +@@ -263,7 +263,12 @@ void machine_restart(char * __unused) + disable_IO_APIC(); + #endif + +- if(!reboot_thru_bios) { ++ if (!reboot_thru_bios) { ++ if (efi_enabled) { ++ efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, 0); ++ __asm__ __volatile__("lidt %0": :"m" (no_idt)); ++ __asm__ __volatile__("int3"); ++ } + /* rebooting needs to touch the page at absolute addr 0 */ + *((unsigned short *)__va(0x472)) = reboot_mode; + for (;;) { +@@ -273,6 +278,8 @@ void machine_restart(char * __unused) + __asm__ __volatile__("int3"); + } + } ++ if (efi_enabled) ++ efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, 0); + + machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); + } +@@ -287,6 +294,8 @@ EXPORT_SYMBOL(machine_halt); + + void machine_power_off(void) + { ++ if (efi_enabled) ++ efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, 0); + if (pm_power_off) + pm_power_off(); + } +--- linux-2.6.0/arch/i386/kernel/setup.c 2003-11-09 16:45:04.000000000 -0800 ++++ 25/arch/i386/kernel/setup.c 2003-12-28 23:21:45.000000000 -0800 +@@ -36,6 +36,8 @@ + #include + #include + #include ++#include ++#include + #include