From 28e4672c4fbe0afa1b607b247130aa518dd86879 Mon Sep 17 00:00:00 2001 From: braam Date: Tue, 15 Jul 2003 08:33:07 +0000 Subject: [PATCH] - missing patches for b_unify from the 2.5 series --- lustre/kernel_patches/patches/dump_netdev.patch | 340 ++ .../patches/ext3-san-jdike-2.5.73.patch | 106 + lustre/kernel_patches/patches/iopen-2.5.73.patch | 403 ++ .../kernel_patches/patches/kexec-2.5.73-full.patch | 1479 ++++++ lustre/kernel_patches/patches/kgdb-ga-2.5.73.patch | 5046 ++++++++++++++++++++ .../patches/kgdb-ga-docco-fixes-2.5.73.patch | 347 ++ .../patches/kgdb-use-ggdb-2.5.73.patch | 17 + .../patches/lkcd-kernel-changes-2.5.73.patch | 608 +++ 8 files changed, 8346 insertions(+) create mode 100644 lustre/kernel_patches/patches/dump_netdev.patch create mode 100644 lustre/kernel_patches/patches/ext3-san-jdike-2.5.73.patch create mode 100644 lustre/kernel_patches/patches/iopen-2.5.73.patch create mode 100644 lustre/kernel_patches/patches/kexec-2.5.73-full.patch create mode 100644 lustre/kernel_patches/patches/kgdb-ga-2.5.73.patch create mode 100644 lustre/kernel_patches/patches/kgdb-ga-docco-fixes-2.5.73.patch create mode 100644 lustre/kernel_patches/patches/kgdb-use-ggdb-2.5.73.patch create mode 100644 lustre/kernel_patches/patches/lkcd-kernel-changes-2.5.73.patch diff --git a/lustre/kernel_patches/patches/dump_netdev.patch b/lustre/kernel_patches/patches/dump_netdev.patch new file mode 100644 index 0000000..c57ebe8 --- /dev/null +++ b/lustre/kernel_patches/patches/dump_netdev.patch @@ -0,0 +1,340 @@ + drivers/net/3c59x.c | 27 +++++++++++++++++++++++++++ + drivers/net/e100/e100_main.c | 19 +++++++++++++++++++ + drivers/net/e1000/e1000_main.c | 13 +++++++++++++ + drivers/net/eepro100.c | 21 +++++++++++++++++++++ + drivers/net/smc-ultra.c | 11 +++++++++++ + drivers/net/tlan.c | 14 +++++++++++++- + drivers/net/tulip/tulip_core.c | 22 ++++++++++++++++++++++ + include/linux/netdevice.h | 3 +++ + net/core/dev.c | 18 ++++++++++++++++-- + 9 files changed, 145 insertions(+), 3 deletions(-) + +--- linux-2.5.73/drivers/net/3c59x.c~dump_netdev Sun Jun 22 11:32:27 2003 ++++ linux-2.5.73-n9560/drivers/net/3c59x.c Mon Jun 30 14:56:29 2003 +@@ -907,6 +907,7 @@ static void set_rx_mode(struct net_devic + static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); + static void vortex_tx_timeout(struct net_device *dev); + static void acpi_set_WOL(struct net_device *dev); ++static void vorboom_poll(struct net_device *dev); + + /* This driver uses 'options' to pass the media type, full-duplex flag, etc. */ + /* Option count limit only -- unlimited interfaces are supported. */ +@@ -1463,6 +1464,9 @@ static int __devinit vortex_probe1(struc + dev->set_multicast_list = set_rx_mode; + dev->tx_timeout = vortex_tx_timeout; + dev->watchdog_timeo = (watchdog * HZ) / 1000; ++#ifdef HAVE_POLL_CONTROLLER ++ dev->poll_controller = &vorboom_poll; ++#endif + if (pdev && vp->enable_wol) { + vp->pm_state_valid = 1; + pci_save_state(VORTEX_PCI(vp), vp->power_state); +@@ -2453,6 +2457,29 @@ handler_exit: + return IRQ_HANDLED; + } + ++#ifdef HAVE_POLL_CONTROLLER ++ ++/* ++ * Polling 'interrupt' - used by things like netconsole to send skbs ++ * without having to re-enable interrupts. It's not called while ++ * the interrupt routine is executing. ++ */ ++ ++static void vorboom_poll (struct net_device *dev) ++{ ++ struct vortex_private *vp = (struct vortex_private *)dev->priv; ++ ++ disable_irq(dev->irq); ++ if (vp->full_bus_master_tx) ++ boomerang_interrupt(dev->irq, dev, 0); ++ else ++ vortex_interrupt(dev->irq, dev, 0); ++ enable_irq(dev->irq); ++} ++ ++#endif ++ ++ + static int vortex_rx(struct net_device *dev) + { + struct vortex_private *vp = (struct vortex_private *)dev->priv; +--- linux-2.5.73/drivers/net/e100/e100_main.c~dump_netdev Sun Jun 22 11:32:44 2003 ++++ linux-2.5.73-n9560/drivers/net/e100/e100_main.c Mon Jun 30 14:56:29 2003 +@@ -558,6 +558,22 @@ e100_trigger_SWI(struct e100_private *bd + readw(&(bdp->scb->scb_status)); /* flushes last write, read-safe */ + } + ++#ifdef HAVE_POLL_CONTROLLER ++ ++/* ++ * Polling 'interrupt' - used by things like netconsole to send skbs ++ * without having to re-enable interrupts. It's not called while ++ * the interrupt routine is executing. ++ */ ++static void ++e100_poll(struct net_device *dev) ++{ ++ disable_irq(dev->irq); ++ e100intr(dev->irq, dev, NULL); ++ enable_irq(dev->irq); ++} ++#endif ++ + static int __devinit + e100_found1(struct pci_dev *pcid, const struct pci_device_id *ent) + { +@@ -576,6 +592,9 @@ e100_found1(struct pci_dev *pcid, const + + SET_MODULE_OWNER(dev); + ++#ifdef HAVE_POLL_CONTROLLER ++ dev->poll_controller = &e100_poll; ++#endif + if (first_time) { + first_time = false; + printk(KERN_NOTICE "%s - version %s\n", +--- linux-2.5.73/drivers/net/e1000/e1000_main.c~dump_netdev Sun Jun 22 11:32:41 2003 ++++ linux-2.5.73-n9560/drivers/net/e1000/e1000_main.c Mon Jun 30 15:28:13 2003 +@@ -145,6 +145,7 @@ static void e1000_leave_82542_rst(struct + static inline void e1000_rx_checksum(struct e1000_adapter *adapter, + struct e1000_rx_desc *rx_desc, + struct sk_buff *skb); ++static void e1000_Poll(struct net_device *dev); + static void e1000_tx_timeout(struct net_device *dev); + static void e1000_tx_timeout_task(struct net_device *dev); + static void e1000_smartspeed(struct e1000_adapter *adapter); +@@ -413,6 +414,9 @@ e1000_probe(struct pci_dev *pdev, + + adapter->bd_number = cards_found; + ++#ifdef HAVE_POLL_CONTROLLER ++ netdev->poll_controller = &e1000_Poll; ++#endif + /* setup the private structure */ + + if(e1000_sw_init(adapter)) +@@ -1699,6 +1703,15 @@ e1000_xmit_frame(struct sk_buff *skb, st + return 0; + } + ++#ifdef HAVE_POLL_CONTROLLER ++static void e1000_Poll(struct net_device *dev) ++{ ++ disable_irq(dev->irq); ++ e1000_intr(dev->irq, dev, NULL); ++ enable_irq(dev->irq); ++} ++#endif ++ + /** + * e1000_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure +--- linux-2.5.73/drivers/net/eepro100.c~dump_netdev Sun Jun 22 11:32:57 2003 ++++ linux-2.5.73-n9560/drivers/net/eepro100.c Mon Jun 30 14:56:29 2003 +@@ -542,6 +542,7 @@ static void speedo_refill_rx_buffers(str + static int speedo_rx(struct net_device *dev); + static void speedo_tx_buffer_gc(struct net_device *dev); + static irqreturn_t speedo_interrupt(int irq, void *dev_instance, struct pt_regs *regs); ++static void poll_speedo (struct net_device *dev); + static int speedo_close(struct net_device *dev); + static struct net_device_stats *speedo_get_stats(struct net_device *dev); + static int speedo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +@@ -880,6 +881,9 @@ static int __devinit speedo_found1(struc + dev->get_stats = &speedo_get_stats; + dev->set_multicast_list = &set_rx_mode; + dev->do_ioctl = &speedo_ioctl; ++#ifdef HAVE_POLL_CONTROLLER ++ dev->poll_controller = &poll_speedo; ++#endif + + return 0; + } +@@ -1661,6 +1665,23 @@ static irqreturn_t speedo_interrupt(int + return IRQ_RETVAL(handled); + } + ++#ifdef HAVE_POLL_CONTROLLER ++ ++/* ++ * Polling 'interrupt' - used by things like netconsole to send skbs ++ * without having to re-enable interrupts. It's not called while ++ * the interrupt routine is executing. ++ */ ++ ++static void poll_speedo (struct net_device *dev) ++{ ++ disable_irq(dev->irq); ++ speedo_interrupt (dev->irq, dev, NULL); ++ enable_irq(dev->irq); ++} ++ ++#endif ++ + static inline struct RxFD *speedo_rx_alloc(struct net_device *dev, int entry) + { + struct speedo_private *sp = (struct speedo_private *)dev->priv; +--- linux-2.5.73/drivers/net/smc-ultra.c~dump_netdev Sun Jun 22 11:32:33 2003 ++++ linux-2.5.73-n9560/drivers/net/smc-ultra.c Mon Jun 30 14:56:29 2003 +@@ -122,6 +122,14 @@ MODULE_DEVICE_TABLE(isapnp, ultra_device + #define ULTRA_IO_EXTENT 32 + #define EN0_ERWCNT 0x08 /* Early receive warning count. */ + ++ ++static void ultra_poll(struct net_device *dev) ++{ ++ disable_irq(dev->irq); ++ ei_interrupt(dev->irq, dev, NULL); ++ enable_irq(dev->irq); ++} ++ + /* Probe for the Ultra. This looks like a 8013 with the station + address PROM at I/O ports +8 to +13, with a checksum + following. +@@ -134,6 +142,9 @@ int __init ultra_probe(struct net_device + + SET_MODULE_OWNER(dev); + ++#ifdef HAVE_POLL_CONTROLLER ++ dev->poll_controller = &ultra_poll; ++#endif + if (base_addr > 0x1ff) /* Check a single specified location. */ + return ultra_probe1(dev, base_addr); + else if (base_addr != 0) /* Don't probe at all. */ +--- linux-2.5.73/drivers/net/tlan.c~dump_netdev Sun Jun 22 11:32:33 2003 ++++ linux-2.5.73-n9560/drivers/net/tlan.c Mon Jun 30 14:56:29 2003 +@@ -345,6 +345,8 @@ static int TLan_EeSendByte( u16, u8, int + static void TLan_EeReceiveByte( u16, u8 *, int ); + static int TLan_EeReadByte( struct net_device *, u8, u8 * ); + ++static void TLan_Poll(struct net_device *); ++ + + static void + TLan_StoreSKB( struct tlan_list_tag *tag, struct sk_buff *skb) +@@ -890,6 +892,9 @@ static int TLan_Init( struct net_device + dev->get_stats = &TLan_GetStats; + dev->set_multicast_list = &TLan_SetMulticastList; + dev->do_ioctl = &TLan_ioctl; ++#ifdef HAVE_POLL_CONTROLLER ++ dev->poll_controller = &TLan_Poll; ++#endif + dev->tx_timeout = &TLan_tx_timeout; + dev->watchdog_timeo = TX_TIMEOUT; + +@@ -1173,7 +1178,14 @@ static irqreturn_t TLan_HandleInterrupt( + return IRQ_HANDLED; + } /* TLan_HandleInterrupts */ + +- ++#ifdef HAVE_POLL_CONTROLLER ++static void TLan_Poll(struct net_device *dev) ++{ ++ disable_irq(dev->irq); ++ TLan_HandleInterrupt(dev->irq, dev, NULL); ++ enable_irq(dev->irq); ++} ++#endif + + + /*************************************************************** +--- linux-2.5.73/drivers/net/tulip/tulip_core.c~dump_netdev Sun Jun 22 11:32:56 2003 ++++ linux-2.5.73-n9560/drivers/net/tulip/tulip_core.c Mon Jun 30 14:56:29 2003 +@@ -243,6 +243,7 @@ static void tulip_down(struct net_device + static struct net_device_stats *tulip_get_stats(struct net_device *dev); + static int private_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); + static void set_rx_mode(struct net_device *dev); ++static void poll_tulip(struct net_device *dev); + + + +@@ -1618,6 +1619,9 @@ static int __devinit tulip_init_one (str + dev->get_stats = tulip_get_stats; + dev->do_ioctl = private_ioctl; + dev->set_multicast_list = set_rx_mode; ++#ifdef HAVE_POLL_CONTROLLER ++ dev->poll_controller = &poll_tulip; ++#endif + + if (register_netdev(dev)) + goto err_out_free_ring; +@@ -1775,6 +1779,24 @@ static void __devexit tulip_remove_one ( + } + + ++#ifdef HAVE_POLL_CONTROLLER ++ ++/* ++ * Polling 'interrupt' - used by things like netconsole to send skbs ++ * without having to re-enable interrupts. It's not called while ++ * the interrupt routine is executing. ++ */ ++ ++static void poll_tulip (struct net_device *dev) ++{ ++ disable_irq(dev->irq); ++ tulip_interrupt (dev->irq, dev, NULL); ++ enable_irq(dev->irq); ++} ++ ++#endif ++ ++ + static struct pci_driver tulip_driver = { + .name = DRV_NAME, + .id_table = tulip_pci_tbl, +--- linux-2.5.73/include/linux/netdevice.h~dump_netdev Sun Jun 22 11:33:18 2003 ++++ linux-2.5.73-n9560/include/linux/netdevice.h Mon Jun 30 14:56:29 2003 +@@ -439,6 +439,9 @@ struct net_device + unsigned char *haddr); + int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); + int (*accept_fastpath)(struct net_device *, struct dst_entry*); ++#define HAVE_POLL_CONTROLLER ++ void (*poll_controller)(struct net_device *dev); ++ int (*rx_hook)(struct sk_buff *skb); + + /* bridge stuff */ + struct net_bridge_port *br_port; +--- linux-2.5.73/net/core/dev.c~dump_netdev Sun Jun 22 11:32:46 2003 ++++ linux-2.5.73-n9560/net/core/dev.c Mon Jun 30 14:56:29 2003 +@@ -1348,8 +1348,6 @@ int netif_rx(struct sk_buff *skb) + struct softnet_data *queue; + unsigned long flags; + +- if (!skb->stamp.tv_sec) +- do_gettimeofday(&skb->stamp); + + /* + * The code is rearranged so that the path is the most +@@ -1359,6 +1357,13 @@ int netif_rx(struct sk_buff *skb) + this_cpu = smp_processor_id(); + queue = &softnet_data[this_cpu]; + ++ if (skb->dev->rx_hook) ++ goto rx_hook; ++rx_hook_continue: ++ ++ if (!skb->stamp.tv_sec ) ++ do_gettimeofday(&skb->stamp); ++ + netdev_rx_stat[this_cpu].total++; + if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { + if (queue->input_pkt_queue.qlen) { +@@ -1401,6 +1406,15 @@ drop: + + kfree_skb(skb); + return NET_RX_DROP; ++rx_hook: ++ { ++ int ret; ++ ++ ret = skb->dev->rx_hook(skb); ++ if (ret == NET_RX_DROP) ++ goto drop; ++ goto rx_hook_continue; ++ } + } + + /* Deliver skb to an old protocol, which is not threaded well + +_ diff --git a/lustre/kernel_patches/patches/ext3-san-jdike-2.5.73.patch b/lustre/kernel_patches/patches/ext3-san-jdike-2.5.73.patch new file mode 100644 index 0000000..afda0bd --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-san-jdike-2.5.73.patch @@ -0,0 +1,106 @@ + fs/ext3/inode.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + fs/ext3/super.c | 4 ++ + 2 files changed, 85 insertions(+) + +--- linux-2.5.73/fs/ext3/inode.c~ext3-san-jdike-2.5.73 2003-06-22 12:32:58.000000000 -0600 ++++ linux-2.5.73-braam/fs/ext3/inode.c 2003-06-30 12:19:21.000000000 -0600 +@@ -2945,3 +2945,84 @@ int ext3_change_inode_journal_flag(struc + + return err; + } ++ ++/* for each block: 1 ind + 1 dind + 1 tind ++ * for each block: 3 bitmap blocks ++ * for each block: 3 group descriptor blocks ++ * i inode block ++ * 1 superblock ++ * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files ++ * ((1+1+1) * 3 * nblocks) + 1 + 1 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS ++ * ++ * XXX assuming: ++ * (1) fs logic block size == page size ++ * (2) ext3 in writeback mode ++ */ ++static inline int ext3_san_write_trans_blocks(int nblocks) ++{ ++ int ret; ++ ++ ret = (1 + 1 + 1) * 3 * nblocks + 1 + 1; ++ ++#ifdef CONFIG_QUOTA ++ ret += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; ++#endif ++ ++ return ret; ++} ++ ++/* Alloc blocks for an inode, while don't create any buffer/page ++ * for data I/O; set the inode size if file is extended. ++ * ++ * @inode: target inode ++ * @blocks: array of logic block number ++ * @nblocks: how many blocks need be alloced ++ * @newsize: new filesize we should set ++ * ++ * return: 0 success, otherwise failed ++ * (*blocks) contains physical block number alloced ++ * ++ * XXX this assume the fs block size == page size ++ */ ++int ext3_prep_san_write(struct inode *inode, long *blocks, ++ int nblocks, loff_t newsize) ++{ ++ handle_t *handle; ++ struct buffer_head bh_tmp; ++ int needed_blocks; ++ int i, ret = 0, ret2; ++ ++ needed_blocks = ext3_san_write_trans_blocks(nblocks); ++ ++ lock_kernel(); ++ handle = ext3_journal_start(inode, needed_blocks); ++ if (IS_ERR(handle)) { ++ unlock_kernel(); ++ return PTR_ERR(handle); ++ } ++ unlock_kernel(); ++ ++ /* alloc blocks one by one */ ++ for (i = 0; i < nblocks; i++) { ++ ret = ext3_get_block_handle(handle, inode, blocks[i], ++ &bh_tmp, 1, 1); ++ if (ret) ++ break; ++ ++ blocks[i] = bh_tmp.b_blocknr; ++ } ++ ++ /* set inode size if needed */ ++ if (!ret && (newsize > inode->i_size)) { ++ inode->i_size = newsize; ++ ext3_mark_inode_dirty(handle, inode); ++ } ++ ++ lock_kernel(); ++ ret2 = ext3_journal_stop(handle); ++ unlock_kernel(); ++ ++ if (!ret) ++ ret = ret2; ++ return ret; ++} +--- linux-2.5.73/fs/ext3/super.c~ext3-san-jdike-2.5.73 2003-06-22 12:33:16.000000000 -0600 ++++ linux-2.5.73-braam/fs/ext3/super.c 2003-06-30 12:16:36.000000000 -0600 +@@ -2080,6 +2080,10 @@ static void __exit exit_ext3_fs(void) + exit_ext3_xattr(); + } + ++int ext3_prep_san_write(struct inode *inode, long *blocks, ++ int nblocks, loff_t newsize); ++EXPORT_SYMBOL(ext3_prep_san_write); ++ + MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); + MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); + MODULE_LICENSE("GPL"); + +_ diff --git a/lustre/kernel_patches/patches/iopen-2.5.73.patch b/lustre/kernel_patches/patches/iopen-2.5.73.patch new file mode 100644 index 0000000..ab3d35f --- /dev/null +++ b/lustre/kernel_patches/patches/iopen-2.5.73.patch @@ -0,0 +1,403 @@ + Documentation/filesystems/ext2.txt | 16 ++ + fs/ext3/Makefile | 2 + fs/ext3/inode.c | 3 + fs/ext3/iopen.c | 239 +++++++++++++++++++++++++++++++++++++ + fs/ext3/iopen.h | 15 ++ + fs/ext3/namei.c | 13 ++ + fs/ext3/super.c | 11 + + include/linux/ext3_fs.h | 2 + 8 files changed, 300 insertions(+), 1 deletion(-) + +--- linux-2.5.73/Documentation/filesystems/ext2.txt~iopen-2.5.73 2003-06-22 12:32:37.000000000 -0600 ++++ linux-2.5.73-braam/Documentation/filesystems/ext2.txt 2003-06-30 12:20:17.000000000 -0600 +@@ -35,6 +35,22 @@ resgid=n The group ID which may use th + + sb=n Use alternate superblock at this location. + ++iopen Makes an invisible pseudo-directory called ++ __iopen__ available in the root directory ++ of the filesystem. Allows open-by-inode- ++ number. i.e., inode 3145 can be accessed ++ via /mntpt/__iopen__/3145 ++ ++iopen_nopriv This option makes the iopen directory be ++ world-readable. This may be safer since it ++ allows daemons to run as an unprivileged user, ++ however it significantly changes the security ++ model of a Unix filesystem, since previously ++ all files under a mode 700 directory were not ++ generally avilable even if the ++ permissions on the file itself is ++ world-readable. ++ + grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. + + +--- linux-2.5.73/fs/ext3/Makefile~iopen-2.5.73 2003-06-22 12:32:32.000000000 -0600 ++++ linux-2.5.73-braam/fs/ext3/Makefile 2003-06-30 12:20:17.000000000 -0600 +@@ -5,7 +5,7 @@ + obj-$(CONFIG_EXT3_FS) += ext3.o + + ext3-objs := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ +- ioctl.o namei.o super.o symlink.o hash.o ++ iopen.o ioctl.o namei.o super.o symlink.o hash.o + + ifeq ($(CONFIG_EXT3_FS_XATTR),y) + ext3-objs += xattr.o xattr_user.o xattr_trusted.o +--- linux-2.5.73/fs/ext3/inode.c~iopen-2.5.73 2003-06-30 12:19:21.000000000 -0600 ++++ linux-2.5.73-braam/fs/ext3/inode.c 2003-06-30 12:20:17.000000000 -0600 +@@ -37,6 +37,7 @@ + #include + #include + #include "xattr.h" ++#include "iopen.h" + #include "acl.h" + + /* +@@ -2376,6 +2377,8 @@ void ext3_read_inode(struct inode * inod + ei->i_acl = EXT3_ACL_NOT_CACHED; + ei->i_default_acl = EXT3_ACL_NOT_CACHED; + #endif ++ if (ext3_iopen_get_inode(inode)) ++ return; + if (ext3_get_inode_loc(inode, &iloc)) + goto bad_inode; + bh = iloc.bh; +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ linux-2.5.73-braam/fs/ext3/iopen.c 2003-06-30 12:20:17.000000000 -0600 +@@ -0,0 +1,239 @@ ++ ++ ++/* ++ * linux/fs/ext3/iopen.c ++ * ++ * Special support for open by inode number ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "iopen.h" ++ ++#ifndef assert ++#define assert(test) J_ASSERT(test) ++#endif ++ ++#define IOPEN_NAME_LEN 32 ++ ++/* ++ * This implements looking up an inode by number. ++ */ ++static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry) ++{ ++ struct inode * inode; ++ unsigned long ino; ++ struct list_head *lp; ++ struct dentry *alternate; ++ char buf[IOPEN_NAME_LEN]; ++ ++ if (dentry->d_name.len >= IOPEN_NAME_LEN) ++ return ERR_PTR(-ENAMETOOLONG); ++ ++ memcpy(buf, dentry->d_name.name, dentry->d_name.len); ++ buf[dentry->d_name.len] = 0; ++ ++ if (strcmp(buf, ".") == 0) ++ ino = dir->i_ino; ++ else if (strcmp(buf, "..") == 0) ++ ino = EXT3_ROOT_INO; ++ else ++ ino = simple_strtoul(buf, 0, 0); ++ ++ if ((ino != EXT3_ROOT_INO && ++ //ino != EXT3_ACL_IDX_INO && ++ //ino != EXT3_ACL_DATA_INO && ++ ino < EXT3_FIRST_INO(dir->i_sb)) || ++ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) ++ return ERR_PTR(-ENOENT); ++ ++ inode = iget(dir->i_sb, ino); ++ if (!inode) ++ return ERR_PTR(-EACCES); ++ if (is_bad_inode(inode)) { ++ iput(inode); ++ return ERR_PTR(-ENOENT); ++ } ++ ++ /* preferrably return a connected dentry */ ++ spin_lock(&dcache_lock); ++ list_for_each(lp, &inode->i_dentry) { ++ alternate = list_entry(lp, struct dentry, d_alias); ++ assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); ++ } ++ ++ if (!list_empty(&inode->i_dentry)) { ++ alternate = list_entry(inode->i_dentry.next, ++ struct dentry, d_alias); ++ dget_locked(alternate); ++ alternate->d_vfs_flags |= DCACHE_REFERENCED; ++ iput(inode); ++ spin_unlock(&dcache_lock); ++ return alternate; ++ } ++ dentry->d_flags |= DCACHE_DISCONNECTED; ++ spin_unlock(&dcache_lock); ++ ++ d_add(dentry, inode); ++ return NULL; ++} ++ ++#define do_switch(x,y) do { \ ++ __typeof__ (x) __tmp = x; \ ++ x = y; y = __tmp; } while (0) ++ ++static inline void switch_names(struct dentry * dentry, struct dentry * target) ++{ ++ const unsigned char *old_name, *new_name; ++ ++ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); ++ old_name = target->d_name.name; ++ new_name = dentry->d_name.name; ++ if (old_name == target->d_iname) ++ old_name = dentry->d_iname; ++ if (new_name == dentry->d_iname) ++ new_name = target->d_iname; ++ target->d_name.name = new_name; ++ dentry->d_name.name = old_name; ++} ++ ++ ++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode) ++{ ++ struct dentry *tmp, *goal = NULL; ++ struct list_head *lp; ++ ++ /* preferrably return a connected dentry */ ++ spin_lock(&dcache_lock); ++ /* verify this dentry is really new */ ++ assert(!de->d_inode); ++ assert(list_empty(&de->d_subdirs)); ++ assert(list_empty(&de->d_alias)); ++ ++ ++ list_for_each(lp, &inode->i_dentry) { ++ tmp = list_entry(lp, struct dentry, d_alias); ++ if (tmp->d_flags & DCACHE_DISCONNECTED) { ++ assert(tmp->d_alias.next == &inode->i_dentry); ++ assert(tmp->d_alias.prev == &inode->i_dentry); ++ goal = tmp; ++ dget_locked(goal); ++ break; ++ } ++ } ++ ++ if (!goal) { ++ spin_unlock(&dcache_lock); ++ return NULL; ++ } ++ ++ /* Move the goal to the de hash queue */ ++ goal->d_flags &= ~DCACHE_DISCONNECTED; ++ hlist_add_before(&goal->d_hash, &de->d_hash); ++ hlist_del(&goal->d_hash); ++ ++ list_del(&goal->d_child); ++ list_del(&de->d_child); ++ ++ /* Switch the parents and the names.. */ ++ switch_names(goal, de); ++ do_switch(goal->d_parent, de->d_parent); ++ do_switch(goal->d_name.len, de->d_name.len); ++ do_switch(goal->d_name.hash, de->d_name.hash); ++ ++ /* And add them back to the (new) parent lists */ ++ list_add(&goal->d_child, &goal->d_parent->d_subdirs); ++ list_add(&de->d_child, &de->d_parent->d_subdirs); ++ ++ spin_unlock(&dcache_lock); ++ return goal; ++} ++ ++/* ++ * These are the special structures for the iopen pseudo directory. ++ */ ++ ++static struct inode_operations iopen_inode_operations = { ++ lookup: iopen_lookup, /* BKL held */ ++}; ++ ++static struct file_operations iopen_file_operations = { ++ read: generic_read_dir, ++}; ++ ++static int match_dentry(struct dentry *dentry, const char *name) ++{ ++ int len; ++ ++ len = strlen(name); ++ if (dentry->d_name.len != len) ++ return 0; ++ if (strncmp(dentry->d_name.name, name, len)) ++ return 0; ++ return 1; ++} ++ ++/* ++ * This function is spliced into ext3_lookup and returns 1 the file ++ * name is __iopen__ and dentry has been filled in appropriately. ++ */ ++int ext3_check_for_iopen(struct inode * dir, struct dentry *dentry) ++{ ++ struct inode * inode; ++ ++ if (dir->i_ino != EXT3_ROOT_INO || ++ !test_opt(dir->i_sb, IOPEN) || ++ !match_dentry(dentry, "__iopen__")) ++ return 0; ++ ++ inode = iget(dir->i_sb, EXT3_BAD_INO); ++ ++ if (!inode) ++ return 0; ++ d_add(dentry, inode); ++ return 1; ++} ++ ++/* ++ * This function is spliced into read_inode; it returns 1 if inode ++ * number is the one for /__iopen__, in which case the inode is filled ++ * in appropriately. Otherwise, this fuction returns 0. ++ */ ++int ext3_iopen_get_inode(struct inode * inode) ++{ ++ if (inode->i_ino != EXT3_BAD_INO) ++ return 0; ++ ++ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; ++ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) ++ inode->i_mode |= 0777; ++ inode->i_uid = 0; ++ inode->i_gid = 0; ++ inode->i_nlink = 1; ++ inode->i_size = 4096; ++ inode->i_atime = CURRENT_TIME; ++ inode->i_ctime = CURRENT_TIME; ++ inode->i_mtime = CURRENT_TIME; ++ EXT3_I(inode)->i_dtime = 0; ++ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size ++ * (for stat), not the fs block ++ * size */ ++ inode->i_blocks = 0; ++ inode->i_version = 1; ++ inode->i_generation = 0; ++ ++ inode->i_op = &iopen_inode_operations; ++ inode->i_fop = &iopen_file_operations; ++ inode->i_mapping->a_ops = 0; ++ ++ return 1; ++} +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ linux-2.5.73-braam/fs/ext3/iopen.h 2003-06-30 12:20:17.000000000 -0600 +@@ -0,0 +1,15 @@ ++/* ++ * iopen.h ++ * ++ * Special support for opening files by inode number. ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ */ ++ ++extern int ext3_check_for_iopen(struct inode * dir, struct dentry *dentry); ++extern int ext3_iopen_get_inode(struct inode * inode); ++ ++ +--- linux-2.5.73/fs/ext3/namei.c~iopen-2.5.73 2003-06-22 12:32:56.000000000 -0600 ++++ linux-2.5.73-braam/fs/ext3/namei.c 2003-06-30 12:22:12.000000000 -0600 +@@ -37,6 +37,7 @@ + #include + #include + #include "xattr.h" ++#include "iopen.h" + #include "acl.h" + + /* +@@ -970,15 +971,21 @@ errout: + } + #endif + ++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode); ++ + static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) + { + struct inode * inode; + struct ext3_dir_entry_2 * de; + struct buffer_head * bh; ++ struct dentry *alternate = NULL; + + if (dentry->d_name.len > EXT3_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + ++ if (ext3_check_for_iopen(dir, dentry)) ++ return NULL; ++ + bh = ext3_find_entry(dentry, &de); + inode = NULL; + if (bh) { +@@ -991,6 +998,12 @@ static struct dentry *ext3_lookup(struct + } + if (inode) + return d_splice_alias(inode, dentry); ++ ++ if (inode && (alternate = iopen_connect_dentry(dentry, inode))) { ++ iput(inode); ++ return alternate; ++ } ++ + d_add(dentry, inode); + return NULL; + } +--- linux-2.5.73/fs/ext3/super.c~iopen-2.5.73 2003-06-30 12:16:36.000000000 -0600 ++++ linux-2.5.73-braam/fs/ext3/super.c 2003-06-30 12:20:17.000000000 -0600 +@@ -752,6 +752,17 @@ static int parse_options (char * options + || !strcmp (this_char, "quota") + || !strcmp (this_char, "usrquota")) + /* Don't do anything ;-) */ ; ++ else if (!strcmp (this_char, "iopen")) { ++ set_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ } else if (!strcmp (this_char, "noiopen")) { ++ clear_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ } ++ else if (!strcmp (this_char, "iopen_nopriv")) { ++ set_opt (sbi->s_mount_opt, IOPEN); ++ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ } + else if (!strcmp (this_char, "journal")) { + /* @@@ FIXME */ + /* Eventually we will want to be able to create +--- linux-2.5.73/include/linux/ext3_fs.h~iopen-2.5.73 2003-06-22 12:32:56.000000000 -0600 ++++ linux-2.5.73-braam/include/linux/ext3_fs.h 2003-06-30 12:20:17.000000000 -0600 +@@ -330,6 +330,8 @@ struct ext3_inode { + #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ + #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ + #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ ++#define EXT3_MOUNT_IOPEN 0x10000 /* Allow access via iopen */ ++#define EXT3_MOUNT_IOPEN_NOPRIV 0x20000 /* Make iopen world-readable */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef _LINUX_EXT2_FS_H + +_ diff --git a/lustre/kernel_patches/patches/kexec-2.5.73-full.patch b/lustre/kernel_patches/patches/kexec-2.5.73-full.patch new file mode 100644 index 0000000..3f45a06 --- /dev/null +++ b/lustre/kernel_patches/patches/kexec-2.5.73-full.patch @@ -0,0 +1,1479 @@ +# This is a BitKeeper generated patch for the following project: +# Project Name: Linux kernel tree +# This patch format is intended for GNU patch command version 2.5 or higher. +# This patch includes the following deltas: +# ChangeSet 1.1376 -> 1.1380 +# arch/i386/kernel/smp.c 1.32 -> 1.33 +# kernel/sys.c 1.47 -> 1.48 +# arch/i386/Kconfig 1.62 -> 1.63 +# arch/i386/kernel/Makefile 1.44 -> 1.45 +# kernel/Makefile 1.28 -> 1.29 +# arch/i386/kernel/entry.S 1.64 -> 1.65 +# arch/i386/kernel/reboot.c 1.8 -> 1.9 +# arch/i386/kernel/io_apic.c 1.71 -> 1.72 +# arch/i386/kernel/dmi_scan.c 1.36 -> 1.37 +# fs/aio.c 1.32 -> 1.33 +# include/asm-i386/apicdef.h 1.8 -> 1.9 +# MAINTAINERS 1.149 -> 1.150 +# include/asm-i386/unistd.h 1.26 -> 1.27 +# arch/i386/defconfig 1.96 -> 1.97 +# arch/i386/kernel/i8259.c 1.25 -> 1.26 +# include/asm-i386/apic.h 1.13 -> 1.14 +# arch/i386/kernel/apic.c 1.42 -> 1.43 +# include/linux/reboot.h 1.4 -> 1.5 +# (new) -> 1.1 include/linux/kexec.h +# (new) -> 1.1 include/asm-i386/kexec.h +# (new) -> 1.1 kernel/kexec.c +# (new) -> 1.1 arch/i386/kernel/relocate_kernel.S +# (new) -> 1.1 arch/i386/kernel/machine_kexec.c +# +# The following is the BitKeeper ChangeSet Log +# -------------------------------------------- +# 03/06/23 andyp@andyp.pdx.osdl.net 1.1377 +# kexec2-2.5.73-common.patch +# -------------------------------------------- +# 03/06/23 andyp@andyp.pdx.osdl.net 1.1378 +# kexec2-2.5.73-x86.patch +# -------------------------------------------- +# 03/06/23 andyp@andyp.pdx.osdl.net 1.1379 +# kexec2-2.5.73-syscall.patch +# -------------------------------------------- +# 03/06/23 andyp@andyp.pdx.osdl.net 1.1380 +# kexec2-2.5.73-defconfig.patch +# -------------------------------------------- +# +diff -Nru a/MAINTAINERS b/MAINTAINERS +--- a/MAINTAINERS Mon Jun 23 12:22:26 2003 ++++ b/MAINTAINERS Mon Jun 23 12:22:26 2003 +@@ -1067,6 +1067,17 @@ + W: http://www.cse.unsw.edu.au/~neilb/patches/linux-devel/ + S: Maintained + ++KEXEC ++P: Eric Biederman ++M: ebiederm@xmission.com ++M: ebiederman@lnxi.com ++W: http://www.xmission.com/~ebiederm/files/kexec/ ++P: Andy Pfiffer ++M: andyp@osdl.org ++W: http://www.osdl.org/archive/andyp/bloom/Code/Linux/Kexec/ ++L: linux-kernel@vger.kernel.org ++S: Maintained ++ + LANMEDIA WAN CARD DRIVER + P: Andrew Stanley-Jones + M: asj@lanmedia.com +diff -Nru a/arch/i386/Kconfig b/arch/i386/Kconfig +--- a/arch/i386/Kconfig Mon Jun 23 12:22:26 2003 ++++ b/arch/i386/Kconfig Mon Jun 23 12:22:26 2003 +@@ -794,6 +794,23 @@ + depends on ((X86_SUMMIT || X86_GENERICARCH) && NUMA) + default y + ++config KEXEC ++ bool "kexec system call (EXPERIMENTAL)" ++ depends on EXPERIMENTAL ++ help ++ kexec is a system call that implements the ability to shutdown your ++ current kernel, and to start another kernel. It is like a reboot ++ but it is indepedent of the system firmware. And like a reboot ++ you can start any kernel with it not just Linux. ++ ++ The name comes from the similiarity to the exec system call. ++ ++ It is on an going process to be certain the hardware in a machine ++ is properly shutdown, so do not be surprised if this code does not ++ initially work for you. It may help to enable device hotplugging ++ support. As of this writing the exact hardware interface is ++ strongly in flux, so no good recommendation can be made. ++ + endmenu + + +diff -Nru a/arch/i386/defconfig b/arch/i386/defconfig +--- a/arch/i386/defconfig Mon Jun 23 12:22:26 2003 ++++ b/arch/i386/defconfig Mon Jun 23 12:22:26 2003 +@@ -72,6 +72,7 @@ + CONFIG_X86_LOCAL_APIC=y + CONFIG_X86_IO_APIC=y + CONFIG_NR_CPUS=32 ++CONFIG_KEXEC=y + CONFIG_X86_MCE=y + # CONFIG_X86_MCE_NONFATAL is not set + CONFIG_X86_MCE_P4THERMAL=y +diff -Nru a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile +--- a/arch/i386/kernel/Makefile Mon Jun 23 12:22:26 2003 ++++ b/arch/i386/kernel/Makefile Mon Jun 23 12:22:26 2003 +@@ -24,6 +24,7 @@ + obj-$(CONFIG_X86_MPPARSE) += mpparse.o + obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o + obj-$(CONFIG_X86_IO_APIC) += io_apic.o ++obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o + obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o suspend_asm.o + obj-$(CONFIG_X86_NUMAQ) += numaq.o + obj-$(CONFIG_X86_SUMMIT) += summit.o +diff -Nru a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c +--- a/arch/i386/kernel/apic.c Mon Jun 23 12:22:26 2003 ++++ b/arch/i386/kernel/apic.c Mon Jun 23 12:22:26 2003 +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -175,6 +176,39 @@ + outb(0x70, 0x22); + outb(0x00, 0x23); + } ++#ifdef CONFIG_KEXEC ++ else { ++ /* Go back to Virtual Wire compatibility mode */ ++ unsigned long value; ++ ++ /* For the spurious interrupt use vector F, and enable it */ ++ value = apic_read(APIC_SPIV); ++ value &= ~APIC_VECTOR_MASK; ++ value |= APIC_SPIV_APIC_ENABLED; ++ value |= 0xf; ++ apic_write_around(APIC_SPIV, value); ++ ++ /* For LVT0 make it edge triggered, active high, external and enabled */ ++ value = apic_read(APIC_LVT0); ++ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | ++ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | ++ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); ++ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; ++ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXINT); ++ apic_write_around(APIC_LVT0, value); ++ ++ /* For LVT1 make it edge triggered, active high, nmi and enabled */ ++ value = apic_read(APIC_LVT1); ++ value &= ~( ++ APIC_MODE_MASK | APIC_SEND_PENDING | ++ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | ++ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); ++ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; ++ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); ++ apic_write_around(APIC_LVT1, value); ++ } ++#endif /* CONFIG_KEXEC */ ++ + } + + void disable_local_APIC(void) +@@ -1113,6 +1147,26 @@ + printk (KERN_INFO "APIC error on CPU%d: %02lx(%02lx)\n", + smp_processor_id(), v , v1); + irq_exit(); ++} ++ ++void stop_apics(void) ++{ ++ /* By resetting the APIC's we disable the nmi watchdog */ ++#if CONFIG_SMP ++ /* ++ * Stop all CPUs and turn off local APICs and the IO-APIC, so ++ * other OSs see a clean IRQ state. ++ */ ++ smp_send_stop(); ++#else ++ disable_local_APIC(); ++#endif ++#if defined(CONFIG_X86_IO_APIC) ++ if (smp_found_config) { ++ disable_IO_APIC(); ++ } ++#endif ++ disconnect_bsp_APIC(); + } + + /* +diff -Nru a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c +--- a/arch/i386/kernel/dmi_scan.c Mon Jun 23 12:22:26 2003 ++++ b/arch/i386/kernel/dmi_scan.c Mon Jun 23 12:22:26 2003 +@@ -222,31 +222,6 @@ + return 0; + } + +-/* +- * Some machines require the "reboot=s" commandline option, this quirk makes that automatic. +- */ +-static __init int set_smp_reboot(struct dmi_blacklist *d) +-{ +-#ifdef CONFIG_SMP +- extern int reboot_smp; +- if (reboot_smp == 0) +- { +- reboot_smp = 1; +- printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident); +- } +-#endif +- return 0; +-} +- +-/* +- * Some machines require the "reboot=b,s" commandline option, this quirk makes that automatic. +- */ +-static __init int set_smp_bios_reboot(struct dmi_blacklist *d) +-{ +- set_smp_reboot(d); +- set_bios_reboot(d); +- return 0; +-} + + /* + * Some bioses have a broken protected mode poweroff and need to use realmode +@@ -527,7 +502,7 @@ + MATCH(DMI_BIOS_VERSION, "4.60 PGMA"), + MATCH(DMI_BIOS_DATE, "134526184"), NO_MATCH + } }, +- { set_smp_bios_reboot, "Dell PowerEdge 1300", { /* Handle problems with rebooting on Dell 1300's */ ++ { set_bios_reboot, "Dell PowerEdge 1300", { /* Handle problems with rebooting on Dell 1300's */ + MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), + MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"), + NO_MATCH, NO_MATCH +diff -Nru a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S +--- a/arch/i386/kernel/entry.S Mon Jun 23 12:22:26 2003 ++++ b/arch/i386/kernel/entry.S Mon Jun 23 12:22:26 2003 +@@ -876,5 +876,6 @@ + .long sys_clock_nanosleep + .long sys_statfs64 + .long sys_fstatfs64 ++ .long sys_kexec_load /* 270 */ + + nr_syscalls=(.-sys_call_table)/4 +diff -Nru a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c +--- a/arch/i386/kernel/i8259.c Mon Jun 23 12:22:26 2003 ++++ b/arch/i386/kernel/i8259.c Mon Jun 23 12:22:26 2003 +@@ -244,9 +244,21 @@ + return 0; + } + ++static int i8259A_shutdown(struct sys_device *dev) ++{ ++ /* Put the i8259A into a quiescent state that ++ * the kernel initialization code can get it ++ * out of. ++ */ ++ outb(0xff, 0x21); /* mask all of 8259A-1 */ ++ outb(0xff, 0xA1); /* mask all of 8259A-1 */ ++ return 0; ++} ++ + static struct sysdev_class i8259_sysdev_class = { + set_kset_name("i8259"), + .resume = i8259A_resume, ++ .shutdown = i8259A_shutdown, + }; + + static struct sys_device device_i8259A = { +diff -Nru a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c +--- a/arch/i386/kernel/io_apic.c Mon Jun 23 12:22:26 2003 ++++ b/arch/i386/kernel/io_apic.c Mon Jun 23 12:22:26 2003 +@@ -1562,8 +1562,6 @@ + * Clear the IO-APIC before rebooting: + */ + clear_IO_APIC(); +- +- disconnect_bsp_APIC(); + } + + /* +diff -Nru a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c +--- /dev/null Wed Dec 31 16:00:00 1969 ++++ b/arch/i386/kernel/machine_kexec.c Mon Jun 23 12:22:26 2003 +@@ -0,0 +1,116 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++/* ++ * machine_kexec ++ * ======================= ++ */ ++ ++ ++static void set_idt(void *newidt, __u16 limit) ++{ ++ unsigned char curidt[6]; ++ ++ /* ia32 supports unaliged loads & stores */ ++ (*(__u16 *)(curidt)) = limit; ++ (*(__u32 *)(curidt +2)) = (unsigned long)(newidt); ++ ++ __asm__ __volatile__ ( ++ "lidt %0\n" ++ : "=m" (curidt) ++ ); ++}; ++ ++ ++static void set_gdt(void *newgdt, __u16 limit) ++{ ++ unsigned char curgdt[6]; ++ ++ /* ia32 supports unaliged loads & stores */ ++ (*(__u16 *)(curgdt)) = limit; ++ (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt); ++ ++ __asm__ __volatile__ ( ++ "lgdt %0\n" ++ : "=m" (curgdt) ++ ); ++}; ++ ++static void load_segments(void) ++{ ++#define __STR(X) #X ++#define STR(X) __STR(X) ++ ++ __asm__ __volatile__ ( ++ "\tljmp $"STR(__KERNEL_CS)",$1f\n" ++ "\t1:\n" ++ "\tmovl $"STR(__KERNEL_DS)",%eax\n" ++ "\tmovl %eax,%ds\n" ++ "\tmovl %eax,%es\n" ++ "\tmovl %eax,%fs\n" ++ "\tmovl %eax,%gs\n" ++ "\tmovl %eax,%ss\n" ++ ); ++#undef STR ++#undef __STR ++} ++ ++typedef void (*relocate_new_kernel_t)( ++ unsigned long indirection_page, unsigned long reboot_code_buffer, ++ unsigned long start_address); ++ ++const extern unsigned char relocate_new_kernel[]; ++extern void relocate_new_kernel_end(void); ++const extern unsigned int relocate_new_kernel_size; ++extern void use_mm(struct mm_struct *mm); ++ ++void machine_kexec(struct kimage *image) ++{ ++ unsigned long indirection_page; ++ unsigned long reboot_code_buffer; ++ relocate_new_kernel_t rnk; ++ ++ /* switch to an mm where the reboot_code_buffer is identity mapped */ ++ use_mm(&init_mm); ++ stop_apics(); ++ ++ /* Interrupts aren't acceptable while we reboot */ ++ local_irq_disable(); ++ reboot_code_buffer = page_to_pfn(image->reboot_code_pages) << PAGE_SHIFT; ++ indirection_page = image->head & PAGE_MASK; ++ ++ /* copy it out */ ++ memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size); ++ ++ /* The segment registers are funny things, they are ++ * automatically loaded from a table, in memory wherever you ++ * set them to a specific selector, but this table is never ++ * accessed again you set the segment to a different selector. ++ * ++ * The more common model is are caches where the behide ++ * the scenes work is done, but is also dropped at arbitrary ++ * times. ++ * ++ * I take advantage of this here by force loading the ++ * segments, before I zap the gdt with an invalid value. ++ */ ++ load_segments(); ++ /* The gdt & idt are now invalid. ++ * If you want to load them you must set up your own idt & gdt. ++ */ ++ set_gdt(phys_to_virt(0),0); ++ set_idt(phys_to_virt(0),0); ++ ++ /* now call it */ ++ rnk = (relocate_new_kernel_t) reboot_code_buffer; ++ (*rnk)(indirection_page, reboot_code_buffer, image->start); ++} +diff -Nru a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c +--- a/arch/i386/kernel/reboot.c Mon Jun 23 12:22:26 2003 ++++ b/arch/i386/kernel/reboot.c Mon Jun 23 12:22:26 2003 +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + #include "mach_reboot.h" + + /* +@@ -20,8 +21,7 @@ + int reboot_thru_bios; + + #ifdef CONFIG_SMP +-int reboot_smp = 0; +-static int reboot_cpu = -1; ++int reboot_cpu = -1; /* specifies the internal linux cpu id, not the apicid */ + /* shamelessly grabbed from lib/vsprintf.c for readability */ + #define is_digit(c) ((c) >= '0' && (c) <= '9') + #endif +@@ -43,7 +43,6 @@ + break; + #ifdef CONFIG_SMP + case 's': /* "smp" reboot by executing reset on BSP or other CPU*/ +- reboot_smp = 1; + if (is_digit(*(str+1))) { + reboot_cpu = (int) (*(str+1) - '0'); + if (is_digit(*(str+2))) +@@ -215,42 +214,7 @@ + + void machine_restart(char * __unused) + { +-#ifdef CONFIG_SMP +- int cpuid; +- +- cpuid = GET_APIC_ID(apic_read(APIC_ID)); +- +- if (reboot_smp) { +- +- /* check to see if reboot_cpu is valid +- if its not, default to the BSP */ +- if ((reboot_cpu == -1) || +- (reboot_cpu > (NR_CPUS -1)) || +- !(phys_cpu_present_map & (1< ++#include ++ ++ /* Must be relocatable PIC code callable as a C function, that once ++ * it starts can not use the previous processes stack. ++ * ++ */ ++ .globl relocate_new_kernel ++relocate_new_kernel: ++ /* read the arguments and say goodbye to the stack */ ++ movl 4(%esp), %ebx /* indirection_page */ ++ movl 8(%esp), %ebp /* reboot_code_buffer */ ++ movl 12(%esp), %edx /* start address */ ++ ++ /* zero out flags, and disable interrupts */ ++ pushl $0 ++ popfl ++ ++ /* set a new stack at the bottom of our page... */ ++ lea 4096(%ebp), %esp ++ ++ /* store the parameters back on the stack */ ++ pushl %edx /* store the start address */ ++ ++ /* Set cr0 to a known state: ++ * 31 0 == Paging disabled ++ * 18 0 == Alignment check disabled ++ * 16 0 == Write protect disabled ++ * 3 0 == No task switch ++ * 2 0 == Don't do FP software emulation. ++ * 0 1 == Proctected mode enabled ++ */ ++ movl %cr0, %eax ++ andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax ++ orl $(1<<0), %eax ++ movl %eax, %cr0 ++ ++ /* Set cr4 to a known state: ++ * Setting everything to zero seems safe. ++ */ ++ movl %cr4, %eax ++ andl $0, %eax ++ movl %eax, %cr4 ++ ++ jmp 1f ++1: ++ ++ /* Flush the TLB (needed?) */ ++ xorl %eax, %eax ++ movl %eax, %cr3 ++ ++ /* Do the copies */ ++ cld ++0: /* top, read another word for the indirection page */ ++ movl %ebx, %ecx ++ movl (%ebx), %ecx ++ addl $4, %ebx ++ testl $0x1, %ecx /* is it a destination page */ ++ jz 1f ++ movl %ecx, %edi ++ andl $0xfffff000, %edi ++ jmp 0b ++1: ++ testl $0x2, %ecx /* is it an indirection page */ ++ jz 1f ++ movl %ecx, %ebx ++ andl $0xfffff000, %ebx ++ jmp 0b ++1: ++ testl $0x4, %ecx /* is it the done indicator */ ++ jz 1f ++ jmp 2f ++1: ++ testl $0x8, %ecx /* is it the source indicator */ ++ jz 0b /* Ignore it otherwise */ ++ movl %ecx, %esi /* For every source page do a copy */ ++ andl $0xfffff000, %esi ++ ++ movl $1024, %ecx ++ rep ; movsl ++ jmp 0b ++ ++2: ++ ++ /* To be certain of avoiding problems with self modifying code ++ * I need to execute a serializing instruction here. ++ * So I flush the TLB, it's handy, and not processor dependent. ++ */ ++ xorl %eax, %eax ++ movl %eax, %cr3 ++ ++ /* set all of the registers to known values */ ++ /* leave %esp alone */ ++ ++ xorl %eax, %eax ++ xorl %ebx, %ebx ++ xorl %ecx, %ecx ++ xorl %edx, %edx ++ xorl %esi, %esi ++ xorl %edi, %edi ++ xorl %ebp, %ebp ++ ret ++relocate_new_kernel_end: ++ ++ .globl relocate_new_kernel_size ++relocate_new_kernel_size: ++ .long relocate_new_kernel_end - relocate_new_kernel +diff -Nru a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c +--- a/arch/i386/kernel/smp.c Mon Jun 23 12:22:26 2003 ++++ b/arch/i386/kernel/smp.c Mon Jun 23 12:22:26 2003 +@@ -547,6 +547,30 @@ + + void smp_send_stop(void) + { ++ extern int reboot_cpu; ++ int reboot_cpu_id; ++ ++ /* The boot cpu is always logical cpu 0 */ ++ reboot_cpu_id = 0; ++ ++ /* See if there has been give a command line override . ++ */ ++ if ((reboot_cpu != -1) && !(reboot_cpu >= NR_CPUS) && ++ test_bit(reboot_cpu, &cpu_online_map)) { ++ reboot_cpu_id = reboot_cpu; ++ } ++ ++ /* Make certain the the cpu I'm rebooting on is online */ ++ if (!test_bit(reboot_cpu_id, &cpu_online_map)) { ++ reboot_cpu_id = smp_processor_id(); ++ } ++ ++ /* Make certain I only run on the appropriate processor */ ++ set_cpus_allowed(current, 1 << reboot_cpu_id); ++ ++ /* O.k. Now that I'm on the appropriate processor stop ++ * all of the others. ++ */ + smp_call_function(stop_this_cpu, NULL, 1, 0); + + local_irq_disable(); +diff -Nru a/fs/aio.c b/fs/aio.c +--- a/fs/aio.c Mon Jun 23 12:22:26 2003 ++++ b/fs/aio.c Mon Jun 23 12:22:26 2003 +@@ -536,7 +536,7 @@ + return ioctx; + } + +-static void use_mm(struct mm_struct *mm) ++void use_mm(struct mm_struct *mm) + { + struct mm_struct *active_mm = current->active_mm; + atomic_inc(&mm->mm_count); +diff -Nru a/include/asm-i386/apic.h b/include/asm-i386/apic.h +--- a/include/asm-i386/apic.h Mon Jun 23 12:22:26 2003 ++++ b/include/asm-i386/apic.h Mon Jun 23 12:22:26 2003 +@@ -97,6 +97,9 @@ + #define NMI_LOCAL_APIC 2 + #define NMI_INVALID 3 + ++extern void stop_apics(void); ++#else ++static inline void stop_apics(void) { } + #endif /* CONFIG_X86_LOCAL_APIC */ + + #endif /* __ASM_APIC_H */ +diff -Nru a/include/asm-i386/apicdef.h b/include/asm-i386/apicdef.h +--- a/include/asm-i386/apicdef.h Mon Jun 23 12:22:26 2003 ++++ b/include/asm-i386/apicdef.h Mon Jun 23 12:22:26 2003 +@@ -86,6 +86,7 @@ + #define APIC_LVT_REMOTE_IRR (1<<14) + #define APIC_INPUT_POLARITY (1<<13) + #define APIC_SEND_PENDING (1<<12) ++#define APIC_MODE_MASK 0x700 + #define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7) + #define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8)) + #define APIC_MODE_FIXED 0x0 +diff -Nru a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h +--- /dev/null Wed Dec 31 16:00:00 1969 ++++ b/include/asm-i386/kexec.h Mon Jun 23 12:22:26 2003 +@@ -0,0 +1,23 @@ ++#ifndef _I386_KEXEC_H ++#define _I386_KEXEC_H ++ ++#include ++ ++/* ++ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. ++ * I.e. Maximum page that is mapped directly into kernel memory, ++ * and kmap is not required. ++ * ++ * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct ++ * calculation for the amount of memory directly mappable into the ++ * kernel memory space. ++ */ ++ ++/* Maximum physical address we can use pages from */ ++#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) ++/* Maximum address we can reach in physical address mode */ ++#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) ++ ++#define KEXEC_REBOOT_CODE_SIZE 4096 ++ ++#endif /* _I386_KEXEC_H */ +diff -Nru a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h +--- a/include/asm-i386/unistd.h Mon Jun 23 12:22:26 2003 ++++ b/include/asm-i386/unistd.h Mon Jun 23 12:22:26 2003 +@@ -275,8 +275,9 @@ + #define __NR_clock_nanosleep (__NR_timer_create+8) + #define __NR_statfs64 268 + #define __NR_fstatfs64 269 ++#define __NR_sys_kexec_load 270 + +-#define NR_syscalls 270 ++#define NR_syscalls 271 + + /* user-visible error numbers are in the range -1 - -124: see */ + +diff -Nru a/include/linux/kexec.h b/include/linux/kexec.h +--- /dev/null Wed Dec 31 16:00:00 1969 ++++ b/include/linux/kexec.h Mon Jun 23 12:22:26 2003 +@@ -0,0 +1,54 @@ ++#ifndef LINUX_KEXEC_H ++#define LINUX_KEXEC_H ++ ++#if CONFIG_KEXEC ++#include ++#include ++#include ++ ++/* ++ * This structure is used to hold the arguments that are used when loading ++ * kernel binaries. ++ */ ++ ++typedef unsigned long kimage_entry_t; ++#define IND_DESTINATION 0x1 ++#define IND_INDIRECTION 0x2 ++#define IND_DONE 0x4 ++#define IND_SOURCE 0x8 ++ ++#define KEXEC_SEGMENT_MAX 8 ++struct kexec_segment { ++ void *buf; ++ size_t bufsz; ++ void *mem; ++ size_t memsz; ++}; ++ ++struct kimage { ++ kimage_entry_t head; ++ kimage_entry_t *entry; ++ kimage_entry_t *last_entry; ++ ++ unsigned long destination; ++ unsigned long offset; ++ ++ unsigned long start; ++ struct page *reboot_code_pages; ++ ++ unsigned long nr_segments; ++ struct kexec_segment segment[KEXEC_SEGMENT_MAX+1]; ++ ++ struct list_head dest_pages; ++ struct list_head unuseable_pages; ++}; ++ ++ ++/* kexec interface functions */ ++extern void machine_kexec(struct kimage *image); ++extern asmlinkage long sys_kexec(unsigned long entry, long nr_segments, ++ struct kexec_segment *segments); ++extern struct kimage *kexec_image; ++#endif ++#endif /* LINUX_KEXEC_H */ ++ +diff -Nru a/include/linux/reboot.h b/include/linux/reboot.h +--- a/include/linux/reboot.h Mon Jun 23 12:22:26 2003 ++++ b/include/linux/reboot.h Mon Jun 23 12:22:26 2003 +@@ -22,6 +22,7 @@ + * POWER_OFF Stop OS and remove all power from system, if possible. + * RESTART2 Restart system using given command string. + * SW_SUSPEND Suspend system using Software Suspend if compiled in ++ * KEXEC Restart the system using a different kernel. + */ + + #define LINUX_REBOOT_CMD_RESTART 0x01234567 +@@ -31,6 +32,7 @@ + #define LINUX_REBOOT_CMD_POWER_OFF 0x4321FEDC + #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4 + #define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2 ++#define LINUX_REBOOT_CMD_KEXEC 0x45584543 + + + #ifdef __KERNEL__ +diff -Nru a/kernel/Makefile b/kernel/Makefile +--- a/kernel/Makefile Mon Jun 23 12:22:26 2003 ++++ b/kernel/Makefile Mon Jun 23 12:22:26 2003 +@@ -18,6 +18,7 @@ + obj-$(CONFIG_CPU_FREQ) += cpufreq.o + obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o + obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o ++obj-$(CONFIG_KEXEC) += kexec.o + obj-$(CONFIG_COMPAT) += compat.o + + ifneq ($(CONFIG_IA64),y) +diff -Nru a/kernel/kexec.c b/kernel/kexec.c +--- /dev/null Wed Dec 31 16:00:00 1969 ++++ b/kernel/kexec.c Mon Jun 23 12:22:26 2003 +@@ -0,0 +1,629 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* When kexec transitions to the new kernel there is a one to one ++ * mapping between physical and virtual addresses. On processors ++ * where you can disable the MMU this is trivial, and easy. For ++ * others it is still a simple predictable page table to setup. ++ * ++ * In that environment kexec copies the new kernel to it's final ++ * resting place. This means I can only support memory whose ++ * physical address can fit in an unsigned long. In particular ++ * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. ++ * If the assembly stub has more restrictive requirements ++ * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be ++ * defined more restrictively in . ++ * ++ * The code for the transition from the current kernel to the ++ * the new kernel is placed in the reboot_code_buffer, whose size ++ * is given by KEXEC_REBOOT_CODE_SIZE. In the best case only a single ++ * page of memory is necessary, but some architectures require more. ++ * Because this memory must be identity mapped in the transition from ++ * virtual to physical addresses it must live in the range ++ * 0 - TASK_SIZE, as only the user space mappings are arbitrarily ++ * modifyable. ++ * ++ * The assembly stub in the reboot code buffer is passed a linked list ++ * of descriptor pages detailing the source pages of the new kernel, ++ * and the destination addresses of those source pages. As this data ++ * structure is not used in the context of the current OS, it must ++ * be self contained. ++ * ++ * The code has been made to work with highmem pages and will use a ++ * destination page in it's final resting place (if it happens ++ * to allocate it). The end product of this is that most of the ++ * physical address space, and most of ram can be used. ++ * ++ * Future directions include: ++ * - allocating a page table with the reboot code buffer identity ++ * mapped, to simplify machine_kexec and make kexec_on_panic, more ++ * reliable. ++ * - allocating the pages for a page table for machines that cannot ++ * disable their MMUs. (Hammer, Alpha...) ++ */ ++ ++/* KIMAGE_NO_DEST is an impossible destination address..., for ++ * allocating pages whose destination address we do not care about. ++ */ ++#define KIMAGE_NO_DEST (-1UL) ++ ++static int kimage_is_destination_range( ++ struct kimage *image, unsigned long start, unsigned long end); ++static struct page *kimage_alloc_reboot_code_pages(struct kimage *image); ++static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long dest); ++ ++ ++static int kimage_alloc(struct kimage **rimage, ++ unsigned long nr_segments, struct kexec_segment *segments) ++{ ++ int result; ++ struct kimage *image; ++ size_t segment_bytes; ++ struct page *reboot_pages; ++ unsigned long i; ++ ++ /* Allocate a controlling structure */ ++ result = -ENOMEM; ++ image = kmalloc(sizeof(*image), GFP_KERNEL); ++ if (!image) { ++ goto out; ++ } ++ memset(image, 0, sizeof(*image)); ++ image->head = 0; ++ image->entry = &image->head; ++ image->last_entry = &image->head; ++ ++ /* Initialize the list of destination pages */ ++ INIT_LIST_HEAD(&image->dest_pages); ++ ++ /* Initialize the list of unuseable pages */ ++ INIT_LIST_HEAD(&image->unuseable_pages); ++ ++ /* Read in the segments */ ++ image->nr_segments = nr_segments; ++ segment_bytes = nr_segments * sizeof*segments; ++ result = copy_from_user(image->segment, segments, segment_bytes); ++ if (result) ++ goto out; ++ ++ /* Verify we have good destination addresses. The caller is ++ * responsible for making certain we don't attempt to load ++ * the new image into invalid or reserved areas of RAM. This ++ * just verifies it is an address we can use. ++ */ ++ result = -EADDRNOTAVAIL; ++ for(i = 0; i < nr_segments; i++) { ++ unsigned long mend; ++ mend = ((unsigned long)(image->segment[i].mem)) + ++ image->segment[i].memsz; ++ if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) ++ goto out; ++ } ++ ++ /* Find a location for the reboot code buffer, and add it ++ * the vector of segments so that it's pages will also be ++ * counted as destination pages. ++ */ ++ result = -ENOMEM; ++ reboot_pages = kimage_alloc_reboot_code_pages(image); ++ if (!reboot_pages) { ++ printk(KERN_ERR "Could not allocate reboot_code_buffer\n"); ++ goto out; ++ } ++ image->reboot_code_pages = reboot_pages; ++ image->segment[nr_segments].buf = 0; ++ image->segment[nr_segments].bufsz = 0; ++ image->segment[nr_segments].mem = (void *)(page_to_pfn(reboot_pages) << PAGE_SHIFT); ++ image->segment[nr_segments].memsz = KEXEC_REBOOT_CODE_SIZE; ++ image->nr_segments++; ++ ++ result = 0; ++ out: ++ if (result == 0) { ++ *rimage = image; ++ } else { ++ kfree(image); ++ } ++ return result; ++} ++ ++static int kimage_is_destination_range( ++ struct kimage *image, unsigned long start, unsigned long end) ++{ ++ unsigned long i; ++ for(i = 0; i < image->nr_segments; i++) { ++ unsigned long mstart, mend; ++ mstart = (unsigned long)image->segment[i].mem; ++ mend = mstart + image->segment[i].memsz; ++ if ((end > mstart) && (start < mend)) { ++ return 1; ++ } ++ } ++ return 0; ++} ++ ++#ifdef CONFIG_MMU ++static int identity_map_pages(struct page *pages, int order) ++{ ++ struct mm_struct *mm; ++ struct vm_area_struct *vma; ++ int error; ++ mm = &init_mm; ++ vma = 0; ++ ++ down_write(&mm->mmap_sem); ++ error = -ENOMEM; ++ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); ++ if (!vma) { ++ goto out; ++ } ++ ++ memset(vma, 0, sizeof(vma)); ++ vma->vm_mm = mm; ++ vma->vm_start = page_to_pfn(pages) << PAGE_SHIFT; ++ vma->vm_end = vma->vm_start + (1 << (order + PAGE_SHIFT)); ++ vma->vm_ops = 0; ++ vma->vm_flags = VM_SHARED \ ++ | VM_READ | VM_WRITE | VM_EXEC \ ++ | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC \ ++ | VM_DONTCOPY | VM_RESERVED; ++ vma->vm_page_prot = protection_map[vma->vm_flags & 0xf]; ++ vma->vm_file = NULL; ++ vma->vm_private_data = NULL; ++ INIT_LIST_HEAD(&vma->shared); ++ insert_vm_struct(mm, vma); ++ ++ error = remap_page_range(vma, vma->vm_start, vma->vm_start, ++ vma->vm_end - vma->vm_start, vma->vm_page_prot); ++ if (error) { ++ goto out; ++ } ++ ++ error = 0; ++ out: ++ if (error && vma) { ++ kmem_cache_free(vm_area_cachep, vma); ++ vma = 0; ++ } ++ up_write(&mm->mmap_sem); ++ ++ return error; ++} ++#else ++#define identity_map_pages(pages, order) 0 ++#endif ++ ++struct page *kimage_alloc_reboot_code_pages(struct kimage *image) ++{ ++ /* The reboot code buffer is special. It is the only set of ++ * pages that must be allocated in their final resting place, ++ * and the only set of pages whose final resting place we can ++ * pick. ++ * ++ * At worst this runs in O(N) of the image size. ++ */ ++ struct list_head extra_pages, *pos, *next; ++ struct page *pages; ++ unsigned long addr; ++ int order, count; ++ order = get_order(KEXEC_REBOOT_CODE_SIZE); ++ count = 1 << order; ++ INIT_LIST_HEAD(&extra_pages); ++ do { ++ int i; ++ pages = alloc_pages(GFP_HIGHUSER, order); ++ if (!pages) ++ break; ++ for(i = 0; i < count; i++) { ++ SetPageReserved(pages +i); ++ } ++ addr = page_to_pfn(pages) << PAGE_SHIFT; ++ if ((page_to_pfn(pages) >= (TASK_SIZE >> PAGE_SHIFT)) || ++ kimage_is_destination_range(image, addr, addr + KEXEC_REBOOT_CODE_SIZE)) { ++ list_add(&pages->list, &extra_pages); ++ pages = 0; ++ } ++ } while(!pages); ++ if (pages) { ++ int result; ++ result = identity_map_pages(pages, order); ++ if (result < 0) { ++ list_add(&pages->list, &extra_pages); ++ pages = 0; ++ } ++ } ++ /* If I could convert a multi page allocation into a buch of ++ * single page allocations I could add these pages to ++ * image->dest_pages. For now it is simpler to just free the ++ * pages again. ++ */ ++ list_for_each_safe(pos, next, &extra_pages) { ++ struct page *page; ++ int i; ++ page = list_entry(pos, struct page, list); ++ for(i = 0; i < count; i++) { ++ ClearPageReserved(pages +i); ++ } ++ list_del(&extra_pages); ++ __free_pages(page, order); ++ } ++ return pages; ++} ++ ++static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) ++{ ++ if (image->offset != 0) { ++ image->entry++; ++ } ++ if (image->entry == image->last_entry) { ++ kimage_entry_t *ind_page; ++ struct page *page; ++ page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); ++ if (!page) { ++ return -ENOMEM; ++ } ++ ind_page = page_address(page); ++ *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; ++ image->entry = ind_page; ++ image->last_entry = ++ ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); ++ } ++ *image->entry = entry; ++ image->entry++; ++ image->offset = 0; ++ return 0; ++} ++ ++static int kimage_set_destination( ++ struct kimage *image, unsigned long destination) ++{ ++ int result; ++ destination &= PAGE_MASK; ++ result = kimage_add_entry(image, destination | IND_DESTINATION); ++ if (result == 0) { ++ image->destination = destination; ++ } ++ return result; ++} ++ ++ ++static int kimage_add_page(struct kimage *image, unsigned long page) ++{ ++ int result; ++ page &= PAGE_MASK; ++ result = kimage_add_entry(image, page | IND_SOURCE); ++ if (result == 0) { ++ image->destination += PAGE_SIZE; ++ } ++ return result; ++} ++ ++ ++static void kimage_free_extra_pages(struct kimage *image) ++{ ++ /* Walk through and free any extra destination pages I may have */ ++ struct list_head *pos, *next; ++ list_for_each_safe(pos, next, &image->dest_pages) { ++ struct page *page; ++ page = list_entry(pos, struct page, list); ++ list_del(&page->list); ++ ClearPageReserved(page); ++ __free_page(page); ++ } ++ /* Walk through and free any unuseable pages I have cached */ ++ list_for_each_safe(pos, next, &image->unuseable_pages) { ++ struct page *page; ++ page = list_entry(pos, struct page, list); ++ list_del(&page->list); ++ ClearPageReserved(page); ++ __free_page(page); ++ } ++ ++} ++static int kimage_terminate(struct kimage *image) ++{ ++ int result; ++ result = kimage_add_entry(image, IND_DONE); ++ if (result == 0) { ++ /* Point at the terminating element */ ++ image->entry--; ++ kimage_free_extra_pages(image); ++ } ++ return result; ++} ++ ++#define for_each_kimage_entry(image, ptr, entry) \ ++ for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ ++ ptr = (entry & IND_INDIRECTION)? \ ++ phys_to_virt((entry & PAGE_MASK)): ptr +1) ++ ++static void kimage_free(struct kimage *image) ++{ ++ kimage_entry_t *ptr, entry; ++ kimage_entry_t ind = 0; ++ int i, count, order; ++ if (!image) ++ return; ++ kimage_free_extra_pages(image); ++ for_each_kimage_entry(image, ptr, entry) { ++ if (entry & IND_INDIRECTION) { ++ /* Free the previous indirection page */ ++ if (ind & IND_INDIRECTION) { ++ free_page((unsigned long)phys_to_virt(ind & PAGE_MASK)); ++ } ++ /* Save this indirection page until we are ++ * done with it. ++ */ ++ ind = entry; ++ } ++ else if (entry & IND_SOURCE) { ++ free_page((unsigned long)phys_to_virt(entry & PAGE_MASK)); ++ } ++ } ++ order = get_order(KEXEC_REBOOT_CODE_SIZE); ++ count = 1 << order; ++ do_munmap(&init_mm, ++ page_to_pfn(image->reboot_code_pages) << PAGE_SHIFT, ++ count << PAGE_SHIFT); ++ for(i = 0; i < count; i++) { ++ ClearPageReserved(image->reboot_code_pages + i); ++ } ++ __free_pages(image->reboot_code_pages, order); ++ kfree(image); ++} ++ ++static kimage_entry_t *kimage_dst_used(struct kimage *image, unsigned long page) ++{ ++ kimage_entry_t *ptr, entry; ++ unsigned long destination = 0; ++ for_each_kimage_entry(image, ptr, entry) { ++ if (entry & IND_DESTINATION) { ++ destination = entry & PAGE_MASK; ++ } ++ else if (entry & IND_SOURCE) { ++ if (page == destination) { ++ return ptr; ++ } ++ destination += PAGE_SIZE; ++ } ++ } ++ return 0; ++} ++ ++static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long destination) ++{ ++ /* Here we implment safe guards to ensure that a source page ++ * is not copied to it's destination page before the data on ++ * the destination page is no longer useful. ++ * ++ * To do this we maintain the invariant that a source page is ++ * either it's own destination page, or it is not a ++ * destination page at all. ++ * ++ * That is slightly stronger than required, but the proof ++ * that no problems will not occur is trivial, and the ++ * implemenation is simply to verify. ++ * ++ * When allocating all pages normally this algorithm will run ++ * in O(N) time, but in the worst case it will run in O(N^2) ++ * time. If the runtime is a problem the data structures can ++ * be fixed. ++ */ ++ struct page *page; ++ unsigned long addr; ++ ++ /* Walk through the list of destination pages, and see if I ++ * have a match. ++ */ ++ list_for_each_entry(page, &image->dest_pages, list) { ++ addr = page_to_pfn(page) << PAGE_SHIFT; ++ if (addr == destination) { ++ list_del(&page->list); ++ return page; ++ } ++ } ++ page = 0; ++ while(1) { ++ kimage_entry_t *old; ++ /* Allocate a page, if we run out of memory give up */ ++ page = alloc_page(gfp_mask); ++ if (!page) { ++ return 0; ++ } ++ SetPageReserved(page); ++ /* If the page cannot be used file it away */ ++ if (page_to_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { ++ list_add(&page->list, &image->unuseable_pages); ++ continue; ++ } ++ addr = page_to_pfn(page) << PAGE_SHIFT; ++ ++ /* If it is the destination page we want use it */ ++ if (addr == destination) ++ break; ++ ++ /* If the page is not a destination page use it */ ++ if (!kimage_is_destination_range(image, addr, addr + PAGE_SIZE)) ++ break; ++ ++ /* I know that the page is someones destination page. ++ * See if there is already a source page for this ++ * destination page. And if so swap the source pages. ++ */ ++ old = kimage_dst_used(image, addr); ++ if (old) { ++ /* If so move it */ ++ unsigned long old_addr; ++ struct page *old_page; ++ ++ old_addr = *old & PAGE_MASK; ++ old_page = pfn_to_page(old_addr >> PAGE_SHIFT); ++ copy_highpage(page, old_page); ++ *old = addr | (*old & ~PAGE_MASK); ++ ++ /* The old page I have found cannot be a ++ * destination page, so return it. ++ */ ++ addr = old_addr; ++ page = old_page; ++ break; ++ } ++ else { ++ /* Place the page on the destination list I ++ * will use it later. ++ */ ++ list_add(&page->list, &image->dest_pages); ++ } ++ } ++ return page; ++} ++ ++static int kimage_load_segment(struct kimage *image, ++ struct kexec_segment *segment) ++{ ++ unsigned long mstart; ++ int result; ++ unsigned long offset; ++ unsigned long offset_end; ++ unsigned char *buf; ++ ++ result = 0; ++ buf = segment->buf; ++ mstart = (unsigned long)segment->mem; ++ ++ offset_end = segment->memsz; ++ ++ result = kimage_set_destination(image, mstart); ++ if (result < 0) { ++ goto out; ++ } ++ for(offset = 0; offset < segment->memsz; offset += PAGE_SIZE) { ++ struct page *page; ++ char *ptr; ++ size_t size, leader; ++ page = kimage_alloc_page(image, GFP_HIGHUSER, mstart + offset); ++ if (page == 0) { ++ result = -ENOMEM; ++ goto out; ++ } ++ result = kimage_add_page(image, page_to_pfn(page) << PAGE_SHIFT); ++ if (result < 0) { ++ goto out; ++ } ++ ptr = kmap(page); ++ if (segment->bufsz < offset) { ++ /* We are past the end zero the whole page */ ++ memset(ptr, 0, PAGE_SIZE); ++ kunmap(page); ++ continue; ++ } ++ size = PAGE_SIZE; ++ leader = 0; ++ if ((offset == 0)) { ++ leader = mstart & ~PAGE_MASK; ++ } ++ if (leader) { ++ /* We are on the first page zero the unused portion */ ++ memset(ptr, 0, leader); ++ size -= leader; ++ ptr += leader; ++ } ++ if (size > (segment->bufsz - offset)) { ++ size = segment->bufsz - offset; ++ } ++ if (size < (PAGE_SIZE - leader)) { ++ /* zero the trailing part of the page */ ++ memset(ptr + size, 0, (PAGE_SIZE - leader) - size); ++ } ++ result = copy_from_user(ptr, buf + offset, size); ++ kunmap(page); ++ if (result) { ++ result = (result < 0)?result : -EIO; ++ goto out; ++ } ++ } ++ out: ++ return result; ++} ++ ++/* ++ * Exec Kernel system call: for obvious reasons only root may call it. ++ * ++ * This call breaks up into three pieces. ++ * - A generic part which loads the new kernel from the current ++ * address space, and very carefully places the data in the ++ * allocated pages. ++ * ++ * - A generic part that interacts with the kernel and tells all of ++ * the devices to shut down. Preventing on-going dmas, and placing ++ * the devices in a consistent state so a later kernel can ++ * reinitialize them. ++ * ++ * - A machine specific part that includes the syscall number ++ * and the copies the image to it's final destination. And ++ * jumps into the image at entry. ++ * ++ * kexec does not sync, or unmount filesystems so if you need ++ * that to happen you need to do that yourself. ++ */ ++struct kimage *kexec_image = 0; ++ ++asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, ++ struct kexec_segment *segments, unsigned long flags) ++{ ++ struct kimage *image; ++ int result; ++ ++ /* We only trust the superuser with rebooting the system. */ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ /* In case we need just a little bit of special behavior for ++ * reboot on panic ++ */ ++ if (flags != 0) ++ return -EINVAL; ++ ++ if (nr_segments > KEXEC_SEGMENT_MAX) ++ return -EINVAL; ++ image = 0; ++ ++ result = 0; ++ if (nr_segments > 0) { ++ unsigned long i; ++ result = kimage_alloc(&image, nr_segments, segments); ++ if (result) { ++ goto out; ++ } ++ image->start = entry; ++ for(i = 0; i < nr_segments; i++) { ++ result = kimage_load_segment(image, &segments[i]); ++ if (result) { ++ goto out; ++ } ++ } ++ result = kimage_terminate(image); ++ if (result) { ++ goto out; ++ } ++ } ++ ++ image = xchg(&kexec_image, image); ++ ++ out: ++ kimage_free(image); ++ return result; ++} +diff -Nru a/kernel/sys.c b/kernel/sys.c +--- a/kernel/sys.c Mon Jun 23 12:22:26 2003 ++++ b/kernel/sys.c Mon Jun 23 12:22:26 2003 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -207,6 +208,7 @@ + cond_syscall(sys_lookup_dcookie) + cond_syscall(sys_swapon) + cond_syscall(sys_swapoff) ++cond_syscall(sys_kexec_load) + cond_syscall(sys_init_module) + cond_syscall(sys_delete_module) + cond_syscall(sys_socketpair) +@@ -450,6 +452,27 @@ + machine_restart(buffer); + break; + ++#ifdef CONFIG_KEXEC ++ case LINUX_REBOOT_CMD_KEXEC: ++ { ++ struct kimage *image; ++ if (arg) { ++ unlock_kernel(); ++ return -EINVAL; ++ } ++ image = xchg(&kexec_image, 0); ++ if (!image) { ++ unlock_kernel(); ++ return -EINVAL; ++ } ++ notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); ++ system_running = 0; ++ device_shutdown(); ++ printk(KERN_EMERG "Starting new kernel\n"); ++ machine_kexec(image); ++ break; ++ } ++#endif + #ifdef CONFIG_SOFTWARE_SUSPEND + case LINUX_REBOOT_CMD_SW_SUSPEND: + if (!software_suspend_enabled) { diff --git a/lustre/kernel_patches/patches/kgdb-ga-2.5.73.patch b/lustre/kernel_patches/patches/kgdb-ga-2.5.73.patch new file mode 100644 index 0000000..b3e0bbe --- /dev/null +++ b/lustre/kernel_patches/patches/kgdb-ga-2.5.73.patch @@ -0,0 +1,5046 @@ + + +This kgdb will get called and will trap almost any kernel +fault WITHOUT BEING ARMED. + +It is entered at boot time via "kgdb" in the boot string, +not "gdb". This entry occurs when the first setup on the +boot string is called, not sometime later. You will not +find a "waiting for gdb" on your console, as the console has +not yet been enabled at this time. (Note, this early stuff +is a bit fragile as the full trap table has yet to be +loaded, something I might address, sometime... So don't try +to look at memory that can not be reached, for example. +Once the full trap table is loaded this restriction goes +away.) + +If you hard code it, you can put a breakpoint() as the FIRST +LINE OF C CODE. + +It does NOT use the serial driver, but if the serial driver +is loaded, it tells it to release the port to avoid +conflict. + +The threads stuff is not configurable, does not require +redirection of schedule() calls and does back track to the +first non schedule() caller on the info threads command. If +you switch to the thread, however, it will show it in the +switch code (as it should). + +It is MUCH more aggressive and paranoid about grabbing the +other cpus on entry. It issues a "send_nmi_all_but_self()" +rather than depending on them to interrupt or hit an NMI +sometime in the distant future. If a cpu does not come to +the party, it will continue without it so all is not lost. + +It does not have anything to do with IOCTL calls, but does +do the control-C thing. + +There is a LOT of info in the patch which ends up in +.../Documentation/i386/kgdb/* + +There is a nifty little thing call kgdb_ts() (kgdb time +stamp) which is a function you can code calls to which puts +some useful stuff in a circular buffer which can be examined +with the supplied gdb macros. + +It also allows you do to do "p foobar(...)" i.e. to call a +function from gdb, just like gdb allows in program +debugging. + +In an SMP system, you can choose to "hold" any given set of +cpus. It also defaults to holding other cpus on single step +(this can be overridden). + +This said, you can imagine my consternation when I found it +"lost it" on continues on 2.5. I found and fixed this this +early pm, a hold cpu on exit goof on my part. + +Oh, and a final point, the configure options are more +extensive (the serial port is set up here, for example, (can +not wait for a command line to do this)). There is one to +do system call exit tests. This is VERY new and causes the +kernel to hit a hard "int 3" if a system call attempts to +exit with preempt count other than zero. This is a fault, +of course, but the current 2.5 is full of them so I don't +recommend turning this on. + + + + + Documentation/i386/kgdb/andthen | 100 + + Documentation/i386/kgdb/debug-nmi.txt | 37 + Documentation/i386/kgdb/gdb-globals.txt | 71 + + Documentation/i386/kgdb/gdbinit | 14 + Documentation/i386/kgdb/gdbinit-modules | 146 ++ + Documentation/i386/kgdb/gdbinit.hw | 117 + + Documentation/i386/kgdb/kgdb.txt | 715 ++++++++++ + Documentation/i386/kgdb/loadmodule.sh | 78 + + MAINTAINERS | 6 + arch/i386/Kconfig | 180 ++ + arch/i386/Makefile | 3 + arch/i386/kernel/Makefile | 1 + arch/i386/kernel/entry.S | 28 + arch/i386/kernel/kgdb_stub.c | 2214 ++++++++++++++++++++++++++++++++ + arch/i386/kernel/nmi.c | 25 + arch/i386/kernel/smp.c | 12 + arch/i386/kernel/traps.c | 86 + + arch/i386/lib/Makefile | 1 + arch/i386/lib/kgdb_serial.c | 485 +++++++ + arch/i386/mm/fault.c | 6 + drivers/char/keyboard.c | 3 + drivers/char/sysrq.c | 15 + drivers/serial/8250.c | 42 + include/asm-i386/bugs.h | 21 + include/asm-i386/kgdb.h | 59 + include/asm-i386/kgdb_local.h | 102 + + include/linux/config.h | 3 + kernel/sched.c | 7 + 28 files changed, 4565 insertions(+), 12 deletions(-) + +diff -puN arch/i386/Kconfig~kgdb-ga arch/i386/Kconfig +--- 25/arch/i386/Kconfig~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/arch/i386/Kconfig 2003-06-25 23:14:17.000000000 -0700 +@@ -1419,14 +1419,194 @@ config DEBUG_SPINLOCK_SLEEP + If you say Y here, various routines which may sleep will become very + noisy if they are called with a spinlock held. + ++config KGDB ++ bool "Include kgdb kernel debugger" ++ depends on DEBUG_KERNEL ++ help ++ If you say Y here, the system will be compiled with the debug ++ option (-g) and a debugging stub will be included in the ++ kernel. This stub communicates with gdb on another (host) ++ computer via a serial port. The host computer should have ++ access to the kernel binary file (vmlinux) and a serial port ++ that is connected to the target machine. Gdb can be made to ++ configure the serial port or you can use stty and setserial to ++ do this. See the 'target' command in gdb. This option also ++ configures in the ability to request a breakpoint early in the ++ boot process. To request the breakpoint just include 'kgdb' ++ as a boot option when booting the target machine. The system ++ will then break as soon as it looks at the boot options. This ++ option also installs a breakpoint in panic and sends any ++ kernel faults to the debugger. For more information see the ++ Documentation/i386/kgdb.txt file. ++ ++choice ++ depends on KGDB ++ prompt "Debug serial port BAUD" ++ default KGDB_115200BAUD ++ help ++ Gdb and the kernel stub need to agree on the baud rate to be ++ used. Some systems (x86 family at this writing) allow this to ++ be configured. ++ ++config KGDB_9600BAUD ++ bool "9600" ++ ++config KGDB_19200BAUD ++ bool "19200" ++ ++config KGDB_38400BAUD ++ bool "38400" ++ ++config KGDB_57600BAUD ++ bool "57600" ++ ++config KGDB_115200BAUD ++ bool "115200" ++endchoice ++ ++config KGDB_PORT ++ hex "hex I/O port address of the debug serial port" ++ depends on KGDB ++ default 3f8 ++ help ++ Some systems (x86 family at this writing) allow the port ++ address to be configured. The number entered is assumed to be ++ hex, don't put 0x in front of it. The standard address are: ++ COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx ++ will tell you what you have. It is good to test the serial ++ connection with a live system before trying to debug. ++ ++config KGDB_IRQ ++ int "IRQ of the debug serial port" ++ depends on KGDB ++ default 4 ++ help ++ This is the irq for the debug port. If everything is working ++ correctly and the kernel has interrupts on a control C to the ++ port should cause a break into the kernel debug stub. ++ ++config DEBUG_INFO ++ bool ++ default y ++ ++config KGDB_MORE ++ bool "Add any additional compile options" ++ depends on KGDB ++ default n ++ help ++ Saying yes here turns on the ability to enter additional ++ compile options. ++ ++ ++config KGDB_OPTIONS ++ depends on KGDB_MORE ++ string "Additional compile arguments" ++ default "-O1" ++ help ++ This option allows you enter additional compile options for ++ the whole kernel compile. Each platform will have a default ++ that seems right for it. For example on PPC "-ggdb -O1", and ++ for i386 "-O1". Note that by configuring KGDB "-g" is already ++ turned on. In addition, on i386 platforms ++ "-fomit-frame-pointer" is deleted from the standard compile ++ options. ++ ++config NO_KGDB_CPUS ++ int "Number of CPUs" ++ depends on KGDB && SMP ++ default NR_CPUS ++ help ++ ++ This option sets the number of cpus for kgdb ONLY. It is used ++ to prune some internal structures so they look "nice" when ++ displayed with gdb. This is to overcome possibly larger ++ numbers that may have been entered above. Enter the real ++ number to get nice clean kgdb_info displays. ++ ++config KGDB_TS ++ bool "Enable kgdb time stamp macros?" ++ depends on KGDB ++ default n ++ help ++ Kgdb event macros allow you to instrument your code with calls ++ to the kgdb event recording function. The event log may be ++ examined with gdb at a break point. Turning on this ++ capability also allows you to choose how many events to ++ keep. Kgdb always keeps the lastest events. ++ ++choice ++ depends on KGDB_TS ++ prompt "Max number of time stamps to save?" ++ default KGDB_TS_128 ++ ++config KGDB_TS_64 ++ bool "64" ++ ++config KGDB_TS_128 ++ bool "128" ++ ++config KGDB_TS_256 ++ bool "256" ++ ++config KGDB_TS_512 ++ bool "512" ++ ++config KGDB_TS_1024 ++ bool "1024" ++ ++endchoice ++ ++config STACK_OVERFLOW_TEST ++ bool "Turn on kernel stack overflow testing?" ++ depends on KGDB ++ default n ++ help ++ This option enables code in the front line interrupt handlers ++ to check for kernel stack overflow on interrupts and system ++ calls. This is part of the kgdb code on x86 systems. ++ ++config KGDB_CONSOLE ++ bool "Enable serial console thru kgdb port" ++ depends on KGDB ++ default n ++ help ++ This option enables the command line "console=kgdb" option. ++ When the system is booted with this option in the command line ++ all kernel printk output is sent to gdb (as well as to other ++ consoles). For this to work gdb must be connected. For this ++ reason, this command line option will generate a breakpoint if ++ gdb has not yet connected. After the gdb continue command is ++ given all pent up console output will be printed by gdb on the ++ host machine. Neither this option, nor KGDB require the ++ serial driver to be configured. ++ ++config KGDB_SYSRQ ++ bool "Turn on SysRq 'G' command to do a break?" ++ depends on KGDB ++ default y ++ help ++ This option includes an option in the SysRq code that allows ++ you to enter SysRq G which generates a breakpoint to the KGDB ++ stub. This will work if the keyboard is alive and can ++ interrupt the system. Because of constraints on when the ++ serial port interrupt can be enabled, this code may allow you ++ to interrupt the system before the serial port control C is ++ available. Just say yes here. ++ + config FRAME_POINTER + bool "Compile the kernel with frame pointers" ++ default KGDB + help + If you say Y here the resulting kernel image will be slightly larger + and slower, but it will give very useful debugging information. + If you don't debug the kernel, you can say N, but we may not be able + to solve problems without frame pointers. + ++config MAGIC_SYSRQ ++ bool ++ depends on KGDB_SYSRQ ++ default y ++ + config X86_EXTRA_IRQS + bool + depends on X86_LOCAL_APIC || X86_VOYAGER +diff -puN arch/i386/kernel/entry.S~kgdb-ga arch/i386/kernel/entry.S +--- 25/arch/i386/kernel/entry.S~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/arch/i386/kernel/entry.S 2003-06-25 23:14:17.000000000 -0700 +@@ -48,6 +48,18 @@ + #include + #include + #include "irq_vectors.h" ++ /* We do not recover from a stack overflow, but at least ++ * we know it happened and should be able to track it down. ++ */ ++#ifdef CONFIG_STACK_OVERFLOW_TEST ++#define STACK_OVERFLOW_TEST \ ++ testl $7680,%esp; \ ++ jnz 10f; \ ++ call stack_overflow; \ ++10: ++#else ++#define STACK_OVERFLOW_TEST ++#endif + + EBX = 0x00 + ECX = 0x04 +@@ -98,7 +110,8 @@ TSS_ESP0_OFFSET = (4 - 0x200) + pushl %ebx; \ + movl $(__USER_DS), %edx; \ + movl %edx, %ds; \ +- movl %edx, %es; ++ movl %edx, %es; \ ++ STACK_OVERFLOW_TEST + + #define RESTORE_INT_REGS \ + popl %ebx; \ +@@ -298,6 +311,19 @@ syscall_exit: + testw $_TIF_ALLWORK_MASK, %cx # current->work + jne syscall_exit_work + restore_all: ++#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS ++ movl EFLAGS(%esp), %eax # mix EFLAGS and CS ++ movb CS(%esp), %al ++ testl $(VM_MASK | 3), %eax ++ jz resume_kernelX # returning to kernel or vm86-space ++ ++ cmpl $0,TI_PRE_COUNT(%ebx) # non-zero preempt_count ? ++ jz resume_kernelX ++ ++ int $3 ++ ++resume_kernelX: ++#endif + RESTORE_ALL + + # perform work that needs to be done immediately before resumption +diff -puN /dev/null arch/i386/kernel/kgdb_stub.c +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25-akpm/arch/i386/kernel/kgdb_stub.c 2003-06-25 23:14:17.000000000 -0700 +@@ -0,0 +1,2214 @@ ++/* ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2, or (at your option) any ++ * later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ */ ++ ++/* ++ * Copyright (c) 2000 VERITAS Software Corporation. ++ * ++ */ ++/**************************************************************************** ++ * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ ++ * ++ * Module name: remcom.c $ ++ * Revision: 1.34 $ ++ * Date: 91/03/09 12:29:49 $ ++ * Contributor: Lake Stevens Instrument Division$ ++ * ++ * Description: low level support for gdb debugger. $ ++ * ++ * Considerations: only works on target hardware $ ++ * ++ * Written by: Glenn Engel $ ++ * Updated by: David Grothe ++ * ModuleState: Experimental $ ++ * ++ * NOTES: See Below $ ++ * ++ * Modified for 386 by Jim Kingdon, Cygnus Support. ++ * Compatibility with 2.1.xx kernel by David Grothe ++ * ++ * Changes to allow auto initilization. All that is needed is that it ++ * be linked with the kernel and a break point (int 3) be executed. ++ * The header file defines BREAKPOINT to allow one to do ++ * this. It should also be possible, once the interrupt system is up, to ++ * call putDebugChar("+"). Once this is done, the remote debugger should ++ * get our attention by sending a ^C in a packet. George Anzinger ++ * ++ * Integrated into 2.2.5 kernel by Tigran Aivazian ++ * Added thread support, support for multiple processors, ++ * support for ia-32(x86) hardware debugging. ++ * Amit S. Kale ( akale@veritas.com ) ++ * ++ * ++ * To enable debugger support, two things need to happen. One, a ++ * call to set_debug_traps() is necessary in order to allow any breakpoints ++ * or error conditions to be properly intercepted and reported to gdb. ++ * Two, a breakpoint needs to be generated to begin communication. This ++ * is most easily accomplished by a call to breakpoint(). Breakpoint() ++ * simulates a breakpoint by executing an int 3. ++ * ++ ************* ++ * ++ * The following gdb commands are supported: ++ * ++ * command function Return value ++ * ++ * g return the value of the CPU registers hex data or ENN ++ * G set the value of the CPU registers OK or ENN ++ * ++ * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN ++ * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN ++ * ++ * c Resume at current address SNN ( signal NN) ++ * cAA..AA Continue at address AA..AA SNN ++ * ++ * s Step one instruction SNN ++ * sAA..AA Step one instruction from AA..AA SNN ++ * ++ * k kill ++ * ++ * ? What was the last sigval ? SNN (signal NN) ++ * ++ * All commands and responses are sent with a packet which includes a ++ * checksum. A packet consists of ++ * ++ * $#. ++ * ++ * where ++ * :: ++ * :: < two hex digits computed as modulo 256 sum of > ++ * ++ * When a packet is received, it is first acknowledged with either '+' or '-'. ++ * '+' indicates a successful transfer. '-' indicates a failed transfer. ++ * ++ * Example: ++ * ++ * Host: Reply: ++ * $m0,10#2a +$00010203040506070809101112131415#42 ++ * ++ ****************************************************************************/ ++#define KGDB_VERSION "<20030530.0126.22>" ++#include ++#include ++#include /* for strcpy */ ++#include ++#include ++#include ++#include ++#include /* for linux pt_regs struct */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/************************************************************************ ++ * ++ * external low-level support routines ++ */ ++typedef void (*Function) (void); /* pointer to a function */ ++ ++/* Thread reference */ ++typedef unsigned char threadref[8]; ++ ++extern void putDebugChar(int); /* write a single character */ ++extern int getDebugChar(void); /* read and return a single char */ ++ ++/************************************************************************/ ++/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ ++/* at least NUMREGBYTES*2 are needed for register packets */ ++/* Longer buffer is needed to list all threads */ ++#define BUFMAX 1024 ++ ++char *kgdb_version = KGDB_VERSION; ++ ++/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ ++int debug_regs = 0; /* set to non-zero to print registers */ ++ ++/* filled in by an external module */ ++char *gdb_module_offsets; ++ ++static const char hexchars[] = "0123456789abcdef"; ++ ++/* Number of bytes of registers. */ ++#define NUMREGBYTES 64 ++/* ++ * Note that this register image is in a different order than ++ * the register image that Linux produces at interrupt time. ++ * ++ * Linux's register image is defined by struct pt_regs in ptrace.h. ++ * Just why GDB uses a different order is a historical mystery. ++ */ ++enum regnames { _EAX, /* 0 */ ++ _ECX, /* 1 */ ++ _EDX, /* 2 */ ++ _EBX, /* 3 */ ++ _ESP, /* 4 */ ++ _EBP, /* 5 */ ++ _ESI, /* 6 */ ++ _EDI, /* 7 */ ++ _PC /* 8 also known as eip */ , ++ _PS /* 9 also known as eflags */ , ++ _CS, /* 10 */ ++ _SS, /* 11 */ ++ _DS, /* 12 */ ++ _ES, /* 13 */ ++ _FS, /* 14 */ ++ _GS /* 15 */ ++}; ++ ++/*************************** ASSEMBLY CODE MACROS *************************/ ++/* ++ * Put the error code here just in case the user cares. ++ * Likewise, the vector number here (since GDB only gets the signal ++ * number through the usual means, and that's not very specific). ++ * The called_from is the return address so he can tell how we entered kgdb. ++ * This will allow him to seperate out the various possible entries. ++ */ ++#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ ++ ++#define PID_MAX PID_MAX_DEFAULT ++ ++#ifdef CONFIG_SMP ++void smp_send_nmi_allbutself(void); ++#define IF_SMP(x) x ++#undef MAX_NO_CPUS ++#ifndef CONFIG_NO_KGDB_CPUS ++#define CONFIG_NO_KGDB_CPUS 2 ++#endif ++#if CONFIG_NO_KGDB_CPUS > NR_CPUS ++#define MAX_NO_CPUS NR_CPUS ++#else ++#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS ++#endif ++#define hold_init hold_on_sstep: 1, ++#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) ++#define NUM_CPUS num_online_cpus() ++extern volatile unsigned long cpu_callout_map; ++#else ++#define IF_SMP(x) ++#define hold_init ++#undef MAX_NO_CPUS ++#define MAX_NO_CPUS 1 ++#define NUM_CPUS 1 ++#endif ++#define NOCPU (struct task_struct *)0xbad1fbad ++/* *INDENT-OFF* */ ++struct kgdb_info { ++ int used_malloc; ++ void *called_from; ++ long long entry_tsc; ++ int errcode; ++ int vector; ++ int print_debug_info; ++#ifdef CONFIG_SMP ++ int hold_on_sstep; ++ struct { ++ volatile struct task_struct *task; ++ int pid; ++ int hold; ++ struct pt_regs *regs; ++ } cpus_waiting[MAX_NO_CPUS]; ++#endif ++} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; ++ ++/* *INDENT-ON* */ ++ ++#define used_m kgdb_info.used_malloc ++/* ++ * This is little area we set aside to contain the stack we ++ * need to build to allow gdb to call functions. We use one ++ * per cpu to avoid locking issues. We will do all this work ++ * with interrupts off so that should take care of the protection ++ * issues. ++ */ ++#define LOOKASIDE_SIZE 200 /* should be more than enough */ ++#define MALLOC_MAX 200 /* Max malloc size */ ++struct { ++ unsigned int esp; ++ int array[LOOKASIDE_SIZE]; ++} fn_call_lookaside[MAX_NO_CPUS]; ++ ++static int trap_cpu; ++static unsigned int OLD_esp; ++ ++#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] ++#define IF_BIT 0x200 ++#define TF_BIT 0x100 ++ ++#define MALLOC_ROUND 8-1 ++ ++static char malloc_array[MALLOC_MAX]; ++IF_SMP(static void to_gdb(const char *mess)); ++void * ++malloc(int size) ++{ ++ ++ if (size <= (MALLOC_MAX - used_m)) { ++ int old_used = used_m; ++ used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); ++ return &malloc_array[old_used]; ++ } else { ++ return NULL; ++ } ++} ++ ++/* ++ * Gdb calls functions by pushing agruments, including a return address ++ * on the stack and the adjusting EIP to point to the function. The ++ * whole assumption in GDB is that we are on a different stack than the ++ * one the "user" i.e. code that hit the break point, is on. This, of ++ * course is not true in the kernel. Thus various dodges are needed to ++ * do the call without directly messing with EIP (which we can not change ++ * as it is just a location and not a register. To adjust it would then ++ * require that we move every thing below EIP up or down as needed. This ++ * will not work as we may well have stack relative pointer on the stack ++ * (such as the pointer to regs, for example). ++ ++ * So here is what we do: ++ * We detect gdb attempting to store into the stack area and instead, store ++ * into the fn_call_lookaside.array at the same relative location as if it ++ * were the area ESP pointed at. We also trap ESP modifications ++ * and uses these to adjust fn_call_lookaside.esp. On entry ++ * fn_call_lookaside.esp will be set to point at the last entry in ++ * fn_call_lookaside.array. This allows us to check if it has changed, and ++ * if so, on exit, we add the registers we will use to do the move and a ++ * trap/ interrupt return exit sequence. We then adjust the eflags in the ++ * regs array (remember we now have a copy in the fn_call_lookaside.array) to ++ * kill the interrupt bit, AND we change EIP to point at our set up stub. ++ * As part of the register set up we preset the registers to point at the ++ * begining and end of the fn_call_lookaside.array, so all the stub needs to ++ * do is move words from the array to the stack until ESP= the desired value ++ * then do the rti. This will then transfer to the desired function with ++ * all the correct registers. Nifty huh? ++ */ ++extern asmlinkage void fn_call_stub(void); ++extern asmlinkage void fn_rtn_stub(void); ++/* *INDENT-OFF* */ ++__asm__("fn_rtn_stub:\n\t" ++ "movl %eax,%esp\n\t" ++ "fn_call_stub:\n\t" ++ "1:\n\t" ++ "addl $-4,%ebx\n\t" ++ "movl (%ebx), %eax\n\t" ++ "pushl %eax\n\t" ++ "cmpl %esp,%ecx\n\t" ++ "jne 1b\n\t" ++ "popl %eax\n\t" ++ "popl %ebx\n\t" ++ "popl %ecx\n\t" ++ "iret \n\t"); ++/* *INDENT-ON* */ ++#define gdb_i386vector kgdb_info.vector ++#define gdb_i386errcode kgdb_info.errcode ++#define waiting_cpus kgdb_info.cpus_waiting ++#define remote_debug kgdb_info.print_debug_info ++#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold ++/* gdb locks */ ++ ++#ifdef CONFIG_SMP ++static int in_kgdb_called; ++static spinlock_t waitlocks[MAX_NO_CPUS] = ++ {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; ++/* ++ * The following array has the thread pointer of each of the "other" ++ * cpus. We make it global so it can be seen by gdb. ++ */ ++volatile int in_kgdb_entry_log[MAX_NO_CPUS]; ++volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; ++/* ++static spinlock_t continuelocks[MAX_NO_CPUS]; ++*/ ++spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; ++/* waiters on our spinlock plus us */ ++static atomic_t spinlock_waiters = ATOMIC_INIT(1); ++static int spinlock_count = 0; ++static int spinlock_cpu = 0; ++/* ++ * Note we use nested spin locks to account for the case where a break ++ * point is encountered when calling a function by user direction from ++ * kgdb. Also there is the memory exception recursion to account for. ++ * Well, yes, but this lets other cpus thru too. Lets add a ++ * cpu id to the lock. ++ */ ++#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ ++ spinlock_cpu != smp_processor_id()){\ ++ atomic_inc(&spinlock_waiters); \ ++ while (! spin_trylock(x)) {\ ++ in_kgdb(®s);\ ++ }\ ++ atomic_dec(&spinlock_waiters); \ ++ spinlock_count = 1; \ ++ spinlock_cpu = smp_processor_id(); \ ++ }else{ \ ++ spinlock_count++; \ ++ } ++#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) ++extern volatile unsigned long cpu_callout_map; ++#else ++unsigned kgdb_spinlock = 0; ++#define KGDB_SPIN_LOCK(x) --*x ++#define KGDB_SPIN_UNLOCK(x) ++*x ++#endif ++ ++int ++hex(char ch) ++{ ++ if ((ch >= 'a') && (ch <= 'f')) ++ return (ch - 'a' + 10); ++ if ((ch >= '0') && (ch <= '9')) ++ return (ch - '0'); ++ if ((ch >= 'A') && (ch <= 'F')) ++ return (ch - 'A' + 10); ++ return (-1); ++} ++ ++/* scan for the sequence $# */ ++void ++getpacket(char *buffer) ++{ ++ unsigned char checksum; ++ unsigned char xmitcsum; ++ int i; ++ int count; ++ char ch; ++ ++ do { ++ /* wait around for the start character, ignore all other characters */ ++ while ((ch = (getDebugChar() & 0x7f)) != '$') ; ++ checksum = 0; ++ xmitcsum = -1; ++ ++ count = 0; ++ ++ /* now, read until a # or end of buffer is found */ ++ while (count < BUFMAX) { ++ ch = getDebugChar() & 0x7f; ++ if (ch == '#') ++ break; ++ checksum = checksum + ch; ++ buffer[count] = ch; ++ count = count + 1; ++ } ++ buffer[count] = 0; ++ ++ if (ch == '#') { ++ xmitcsum = hex(getDebugChar() & 0x7f) << 4; ++ xmitcsum += hex(getDebugChar() & 0x7f); ++ if ((remote_debug) && (checksum != xmitcsum)) { ++ printk ++ ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", ++ checksum, xmitcsum, buffer); ++ } ++ ++ if (checksum != xmitcsum) ++ putDebugChar('-'); /* failed checksum */ ++ else { ++ putDebugChar('+'); /* successful transfer */ ++ /* if a sequence char is present, reply the sequence ID */ ++ if (buffer[2] == ':') { ++ putDebugChar(buffer[0]); ++ putDebugChar(buffer[1]); ++ /* remove sequence chars from buffer */ ++ count = strlen(buffer); ++ for (i = 3; i <= count; i++) ++ buffer[i - 3] = buffer[i]; ++ } ++ } ++ } ++ } while (checksum != xmitcsum); ++ ++ if (remote_debug) ++ printk("R:%s\n", buffer); ++} ++ ++/* send the packet in buffer. */ ++ ++void ++putpacket(char *buffer) ++{ ++ unsigned char checksum; ++ int count; ++ char ch; ++ ++ /* $#. */ ++ do { ++ if (remote_debug) ++ printk("T:%s\n", buffer); ++ putDebugChar('$'); ++ checksum = 0; ++ count = 0; ++ ++ while ((ch = buffer[count])) { ++ putDebugChar(ch); ++ checksum += ch; ++ count += 1; ++ } ++ ++ putDebugChar('#'); ++ putDebugChar(hexchars[checksum >> 4]); ++ putDebugChar(hexchars[checksum % 16]); ++ ++ } while ((getDebugChar() & 0x7f) != '+'); ++ ++} ++ ++static char remcomInBuffer[BUFMAX]; ++static char remcomOutBuffer[BUFMAX]; ++static short error; ++ ++void ++debug_error(char *format, char *parm) ++{ ++ if (remote_debug) ++ printk(format, parm); ++} ++ ++static void ++print_regs(struct pt_regs *regs) ++{ ++ printk("EAX=%08lx ", regs->eax); ++ printk("EBX=%08lx ", regs->ebx); ++ printk("ECX=%08lx ", regs->ecx); ++ printk("EDX=%08lx ", regs->edx); ++ printk("\n"); ++ printk("ESI=%08lx ", regs->esi); ++ printk("EDI=%08lx ", regs->edi); ++ printk("EBP=%08lx ", regs->ebp); ++ printk("ESP=%08lx ", (long) ®s->esp); ++ printk("\n"); ++ printk(" DS=%08x ", regs->xds); ++ printk(" ES=%08x ", regs->xes); ++ printk(" SS=%08x ", __KERNEL_DS); ++ printk(" FL=%08lx ", regs->eflags); ++ printk("\n"); ++ printk(" CS=%08x ", regs->xcs); ++ printk(" IP=%08lx ", regs->eip); ++#if 0 ++ printk(" FS=%08x ", regs->fs); ++ printk(" GS=%08x ", regs->gs); ++#endif ++ printk("\n"); ++ ++} /* print_regs */ ++ ++#define NEW_esp fn_call_lookaside[trap_cpu].esp ++ ++static void ++regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) ++{ ++ gdb_regs[_EAX] = regs->eax; ++ gdb_regs[_EBX] = regs->ebx; ++ gdb_regs[_ECX] = regs->ecx; ++ gdb_regs[_EDX] = regs->edx; ++ gdb_regs[_ESI] = regs->esi; ++ gdb_regs[_EDI] = regs->edi; ++ gdb_regs[_EBP] = regs->ebp; ++ gdb_regs[_DS] = regs->xds; ++ gdb_regs[_ES] = regs->xes; ++ gdb_regs[_PS] = regs->eflags; ++ gdb_regs[_CS] = regs->xcs; ++ gdb_regs[_PC] = regs->eip; ++ /* Note, as we are a debugging the kernel, we will always ++ * trap in kernel code, this means no priviledge change, ++ * and so the pt_regs structure is not completely valid. In a non ++ * privilege change trap, only EFLAGS, CS and EIP are put on the stack, ++ * SS and ESP are not stacked, this means that the last 2 elements of ++ * pt_regs is not valid (they would normally refer to the user stack) ++ * also, using regs+1 is no good because you end up will a value that is ++ * 2 longs (8) too high. This used to cause stepping over functions ++ * to fail, so my fix is to use the address of regs->esp, which ++ * should point at the end of the stack frame. Note I have ignored ++ * completely exceptions that cause an error code to be stacked, such ++ * as double fault. Stuart Hughes, Zentropix. ++ * original code: gdb_regs[_ESP] = (int) (regs + 1) ; ++ ++ * this is now done on entry and moved to OLD_esp (as well as NEW_esp). ++ */ ++ gdb_regs[_ESP] = NEW_esp; ++ gdb_regs[_SS] = __KERNEL_DS; ++ gdb_regs[_FS] = 0xFFFF; ++ gdb_regs[_GS] = 0xFFFF; ++} /* regs_to_gdb_regs */ ++ ++static void ++gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) ++{ ++ regs->eax = gdb_regs[_EAX]; ++ regs->ebx = gdb_regs[_EBX]; ++ regs->ecx = gdb_regs[_ECX]; ++ regs->edx = gdb_regs[_EDX]; ++ regs->esi = gdb_regs[_ESI]; ++ regs->edi = gdb_regs[_EDI]; ++ regs->ebp = gdb_regs[_EBP]; ++ regs->xds = gdb_regs[_DS]; ++ regs->xes = gdb_regs[_ES]; ++ regs->eflags = gdb_regs[_PS]; ++ regs->xcs = gdb_regs[_CS]; ++ regs->eip = gdb_regs[_PC]; ++ NEW_esp = gdb_regs[_ESP]; /* keep the value */ ++#if 0 /* can't change these */ ++ regs->esp = gdb_regs[_ESP]; ++ regs->xss = gdb_regs[_SS]; ++ regs->fs = gdb_regs[_FS]; ++ regs->gs = gdb_regs[_GS]; ++#endif ++ ++} /* gdb_regs_to_regs */ ++extern void scheduling_functions_start_here(void); ++extern void scheduling_functions_end_here(void); ++#define first_sched ((unsigned long) scheduling_functions_start_here) ++#define last_sched ((unsigned long) scheduling_functions_end_here) ++ ++int thread_list = 0; ++ ++void ++get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) ++{ ++ unsigned long stack_page; ++ int count = 0; ++ IF_SMP(int i); ++ if (!p || p == current) { ++ regs_to_gdb_regs(gdb_regs, regs); ++ return; ++ } ++#ifdef CONFIG_SMP ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (p == kgdb_info.cpus_waiting[i].task) { ++ regs_to_gdb_regs(gdb_regs, ++ kgdb_info.cpus_waiting[i].regs); ++ gdb_regs[_ESP] = ++ (int) &kgdb_info.cpus_waiting[i].regs->esp; ++ ++ return; ++ } ++ } ++#endif ++ memset(gdb_regs, 0, NUMREGBYTES); ++ gdb_regs[_ESP] = p->thread.esp; ++ gdb_regs[_PC] = p->thread.eip; ++ gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; ++ gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); ++ gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); ++ ++/* ++ * This code is to give a more informative notion of where a process ++ * is waiting. It is used only when the user asks for a thread info ++ * list. If he then switches to the thread, s/he will find the task ++ * is in schedule, but a back trace should show the same info we come ++ * up with. This code was shamelessly purloined from process.c. It was ++ * then enhanced to provide more registers than simply the program ++ * counter. ++ */ ++ ++ if (!thread_list) { ++ return; ++ } ++ ++ if (p->state == TASK_RUNNING) ++ return; ++ stack_page = (unsigned long) p->thread_info; ++ if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > 8188 + stack_page) ++ return; ++ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ ++ do { ++ if (gdb_regs[_EBP] < stack_page || ++ gdb_regs[_EBP] > 8184 + stack_page) ++ return; ++ gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); ++ gdb_regs[_ESP] = gdb_regs[_EBP] + 8; ++ gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; ++ if (gdb_regs[_PC] < first_sched || gdb_regs[_PC] >= last_sched) ++ return; ++ } while (count++ < 16); ++ return; ++} ++ ++/* Indicate to caller of mem2hex or hex2mem that there has been an ++ error. */ ++static volatile int mem_err = 0; ++static volatile int mem_err_expected = 0; ++static volatile int mem_err_cnt = 0; ++static int garbage_loc = -1; ++ ++int ++get_char(char *addr) ++{ ++ return *addr; ++} ++ ++void ++set_char(char *addr, int val, int may_fault) ++{ ++ /* ++ * This code traps references to the area mapped to the kernel ++ * stack as given by the regs and, instead, stores to the ++ * fn_call_lookaside[cpu].array ++ */ ++ if (may_fault && ++ (unsigned int) addr < OLD_esp && ++ ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { ++ addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); ++ } ++ *addr = val; ++} ++ ++/* convert the memory pointed to by mem into hex, placing result in buf */ ++/* return a pointer to the last char put in buf (null) */ ++/* If MAY_FAULT is non-zero, then we should set mem_err in response to ++ a fault; if zero treat a fault like any other fault in the stub. */ ++char * ++mem2hex(char *mem, char *buf, int count, int may_fault) ++{ ++ int i; ++ unsigned char ch; ++ ++ if (may_fault) { ++ mem_err_expected = 1; ++ mem_err = 0; ++ } ++ for (i = 0; i < count; i++) { ++ /* printk("%lx = ", mem) ; */ ++ ++ ch = get_char(mem++); ++ ++ /* printk("%02x\n", ch & 0xFF) ; */ ++ if (may_fault && mem_err) { ++ if (remote_debug) ++ printk("Mem fault fetching from addr %lx\n", ++ (long) (mem - 1)); ++ *buf = 0; /* truncate buffer */ ++ return (buf); ++ } ++ *buf++ = hexchars[ch >> 4]; ++ *buf++ = hexchars[ch % 16]; ++ } ++ *buf = 0; ++ if (may_fault) ++ mem_err_expected = 0; ++ return (buf); ++} ++ ++/* convert the hex array pointed to by buf into binary to be placed in mem */ ++/* return a pointer to the character AFTER the last byte written */ ++/* NOTE: We use the may fault flag to also indicate if the write is to ++ * the registers (0) or "other" memory (!=0) ++ */ ++char * ++hex2mem(char *buf, char *mem, int count, int may_fault) ++{ ++ int i; ++ unsigned char ch; ++ ++ if (may_fault) { ++ mem_err_expected = 1; ++ mem_err = 0; ++ } ++ for (i = 0; i < count; i++) { ++ ch = hex(*buf++) << 4; ++ ch = ch + hex(*buf++); ++ set_char(mem++, ch, may_fault); ++ ++ if (may_fault && mem_err) { ++ if (remote_debug) ++ printk("Mem fault storing to addr %lx\n", ++ (long) (mem - 1)); ++ return (mem); ++ } ++ } ++ if (may_fault) ++ mem_err_expected = 0; ++ return (mem); ++} ++ ++/**********************************************/ ++/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ ++/* RETURN NUMBER OF CHARS PROCESSED */ ++/**********************************************/ ++int ++hexToInt(char **ptr, int *intValue) ++{ ++ int numChars = 0; ++ int hexValue; ++ ++ *intValue = 0; ++ ++ while (**ptr) { ++ hexValue = hex(**ptr); ++ if (hexValue >= 0) { ++ *intValue = (*intValue << 4) | hexValue; ++ numChars++; ++ } else ++ break; ++ ++ (*ptr)++; ++ } ++ ++ return (numChars); ++} ++ ++#define stubhex(h) hex(h) ++ ++static int ++stub_unpack_int(char *buff, int fieldlength) ++{ ++ int nibble; ++ int retval = 0; ++ ++ while (fieldlength) { ++ nibble = stubhex(*buff++); ++ retval |= nibble; ++ fieldlength--; ++ if (fieldlength) ++ retval = retval << 4; ++ } ++ return retval; ++} ++ ++static char * ++pack_hex_byte(char *pkt, int byte) ++{ ++ *pkt++ = hexchars[(byte >> 4) & 0xf]; ++ *pkt++ = hexchars[(byte & 0xf)]; ++ return pkt; ++} ++ ++#define BUF_THREAD_ID_SIZE 16 ++ ++static char * ++pack_threadid(char *pkt, threadref * id) ++{ ++ char *limit; ++ unsigned char *altid; ++ ++ altid = (unsigned char *) id; ++ limit = pkt + BUF_THREAD_ID_SIZE; ++ while (pkt < limit) ++ pkt = pack_hex_byte(pkt, *altid++); ++ return pkt; ++} ++ ++static char * ++unpack_byte(char *buf, int *value) ++{ ++ *value = stub_unpack_int(buf, 2); ++ return buf + 2; ++} ++ ++static char * ++unpack_threadid(char *inbuf, threadref * id) ++{ ++ char *altref; ++ char *limit = inbuf + BUF_THREAD_ID_SIZE; ++ int x, y; ++ ++ altref = (char *) id; ++ ++ while (inbuf < limit) { ++ x = stubhex(*inbuf++); ++ y = stubhex(*inbuf++); ++ *altref++ = (x << 4) | y; ++ } ++ return inbuf; ++} ++ ++void ++int_to_threadref(threadref * id, int value) ++{ ++ unsigned char *scan; ++ ++ scan = (unsigned char *) id; ++ { ++ int i = 4; ++ while (i--) ++ *scan++ = 0; ++ } ++ *scan++ = (value >> 24) & 0xff; ++ *scan++ = (value >> 16) & 0xff; ++ *scan++ = (value >> 8) & 0xff; ++ *scan++ = (value & 0xff); ++} ++ ++static int ++threadref_to_int(threadref * ref) ++{ ++ int i, value = 0; ++ unsigned char *scan; ++ ++ scan = (char *) ref; ++ scan += 4; ++ i = 4; ++ while (i-- > 0) ++ value = (value << 8) | ((*scan++) & 0xff); ++ return value; ++} ++ ++#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ ++extern struct task_struct *kgdb_get_idle(int cpu); ++#define idle_task(cpu) kgdb_get_idle(cpu) ++#else ++#define idle_task(cpu) init_tasks[cpu] ++#endif ++ ++struct task_struct * ++getthread(int pid) ++{ ++ struct task_struct *thread; ++ if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { ++ ++ return idle_task(pid - PID_MAX); ++ } else { ++ /* ++ * find_task_by_pid is relatively safe all the time ++ * Other pid functions require lock downs which imply ++ * that we may be interrupting them (as we get here ++ * in the middle of most any lock down) ++ */ ++ thread = find_task_by_pid(pid); ++ if (thread) { ++ return thread; ++ } ++ } ++ return NULL; ++} ++/* *INDENT-OFF* */ ++struct hw_breakpoint { ++ unsigned enabled; ++ unsigned type; ++ unsigned len; ++ unsigned addr; ++} breakinfo[4] = { {enabled:0}, ++ {enabled:0}, ++ {enabled:0}, ++ {enabled:0}}; ++/* *INDENT-ON* */ ++unsigned hw_breakpoint_status; ++void ++correct_hw_break(void) ++{ ++ int breakno; ++ int correctit; ++ int breakbit; ++ unsigned dr7; ++ ++ asm volatile ("movl %%db7, %0\n":"=r" (dr7) ++ :); ++ /* *INDENT-OFF* */ ++ do { ++ unsigned addr0, addr1, addr2, addr3; ++ asm volatile ("movl %%db0, %0\n" ++ "movl %%db1, %1\n" ++ "movl %%db2, %2\n" ++ "movl %%db3, %3\n" ++ :"=r" (addr0), "=r"(addr1), ++ "=r"(addr2), "=r"(addr3) ++ :); ++ } while (0); ++ /* *INDENT-ON* */ ++ correctit = 0; ++ for (breakno = 0; breakno < 3; breakno++) { ++ breakbit = 2 << (breakno << 1); ++ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 |= breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ dr7 |= (((breakinfo[breakno].len << 2) | ++ breakinfo[breakno].type) << 16) << ++ (breakno << 2); ++ switch (breakno) { ++ case 0: ++ asm volatile ("movl %0, %%dr0\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ ++ case 1: ++ asm volatile ("movl %0, %%dr1\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ ++ case 2: ++ asm volatile ("movl %0, %%dr2\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ ++ case 3: ++ asm volatile ("movl %0, %%dr3\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ } ++ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 &= ~breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ } ++ } ++ if (correctit) { ++ asm volatile ("movl %0, %%db7\n"::"r" (dr7)); ++ } ++} ++ ++int ++remove_hw_break(unsigned breakno) ++{ ++ if (!breakinfo[breakno].enabled) { ++ return -1; ++ } ++ breakinfo[breakno].enabled = 0; ++ return 0; ++} ++ ++int ++set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) ++{ ++ if (breakinfo[breakno].enabled) { ++ return -1; ++ } ++ breakinfo[breakno].enabled = 1; ++ breakinfo[breakno].type = type; ++ breakinfo[breakno].len = len; ++ breakinfo[breakno].addr = addr; ++ return 0; ++} ++ ++#ifdef CONFIG_SMP ++static int in_kgdb_console = 0; ++ ++int ++in_kgdb(struct pt_regs *regs) ++{ ++ unsigned flags; ++ int cpu = smp_processor_id(); ++ in_kgdb_called = 1; ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ ++ in_kgdb_console) { /* or we are doing slow i/o */ ++ return 1; ++ } ++ return 0; ++ } ++ ++ /* As I see it the only reason not to let all cpus spin on ++ * the same spin_lock is to allow selected ones to proceed. ++ * This would be a good thing, so we leave it this way. ++ * Maybe someday.... Done ! ++ ++ * in_kgdb() is called from an NMI so we don't pretend ++ * to have any resources, like printk() for example. ++ */ ++ ++ kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ ++ /* ++ * log arival of this cpu ++ * The NMI keeps on ticking. Protect against recurring more ++ * than once, and ignor the cpu that has the kgdb lock ++ */ ++ in_kgdb_entry_log[cpu]++; ++ in_kgdb_here_log[cpu] = regs; ++ if (cpu == spinlock_cpu || waiting_cpus[cpu].task) { ++ goto exit_in_kgdb; ++ } ++ /* ++ * For protection of the initilization of the spin locks by kgdb ++ * it locks the kgdb spinlock before it gets the wait locks set ++ * up. We wait here for the wait lock to be taken. If the ++ * kgdb lock goes away first?? Well, it could be a slow exit ++ * sequence where the wait lock is removed prior to the kgdb lock ++ * so if kgdb gets unlocked, we just exit. ++ */ ++ while (spin_is_locked(&kgdb_spinlock) && ++ !spin_is_locked(waitlocks + cpu)) ; ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ goto exit_in_kgdb; ++ } ++ waiting_cpus[cpu].task = current; ++ waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); ++ waiting_cpus[cpu].regs = regs; ++ ++ spin_unlock_wait(waitlocks + cpu); ++ /* ++ * log departure of this cpu ++ */ ++ waiting_cpus[cpu].task = 0; ++ waiting_cpus[cpu].pid = 0; ++ waiting_cpus[cpu].regs = 0; ++ correct_hw_break(); ++ exit_in_kgdb: ++ in_kgdb_here_log[cpu] = 0; ++ kgdb_local_irq_restore(flags); ++ return 1; ++ /* ++ spin_unlock(continuelocks + smp_processor_id()); ++ */ ++} ++ ++void ++smp__in_kgdb(struct pt_regs regs) ++{ ++ ack_APIC_irq(); ++ in_kgdb(®s); ++} ++#else ++int ++in_kgdb(struct pt_regs *regs) ++{ ++ return (kgdb_spinlock); ++} ++#endif ++ ++void ++printexceptioninfo(int exceptionNo, int errorcode, char *buffer) ++{ ++ unsigned dr6; ++ int i; ++ switch (exceptionNo) { ++ case 1: /* debug exception */ ++ break; ++ case 3: /* breakpoint */ ++ sprintf(buffer, "Software breakpoint"); ++ return; ++ default: ++ sprintf(buffer, "Details not available"); ++ return; ++ } ++ asm volatile ("movl %%db6, %0\n":"=r" (dr6) ++ :); ++ if (dr6 & 0x4000) { ++ sprintf(buffer, "Single step"); ++ return; ++ } ++ for (i = 0; i < 4; ++i) { ++ if (dr6 & (1 << i)) { ++ sprintf(buffer, "Hardware breakpoint %d", i); ++ return; ++ } ++ } ++ sprintf(buffer, "Unknown trap"); ++ return; ++} ++ ++/* ++ * This function does all command procesing for interfacing to gdb. ++ * ++ * NOTE: The INT nn instruction leaves the state of the interrupt ++ * enable flag UNCHANGED. That means that when this routine ++ * is entered via a breakpoint (INT 3) instruction from code ++ * that has interrupts enabled, then interrupts will STILL BE ++ * enabled when this routine is entered. The first thing that ++ * we do here is disable interrupts so as to prevent recursive ++ * entries and bothersome serial interrupts while we are ++ * trying to run the serial port in polled mode. ++ * ++ * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so ++ * it is always necessary to do a restore_flags before returning ++ * so as to let go of that lock. ++ */ ++int ++kgdb_handle_exception(int exceptionVector, ++ int signo, int err_code, struct pt_regs *linux_regs) ++{ ++ struct task_struct *usethread = NULL; ++ struct task_struct *thread_list_start = 0, *thread = NULL; ++ int addr, length; ++ int breakno, breaktype; ++ char *ptr; ++ int newPC; ++ threadref thref; ++ int threadid; ++ int thread_min = PID_MAX + MAX_NO_CPUS; ++ int maxthreads; ++ int nothreads; ++ unsigned long flags; ++ int gdb_regs[NUMREGBYTES / 4]; ++ int dr6; ++ IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ ++#define NO_NMI 1 ++#define NO_SYNC 2 ++#define regs (*linux_regs) ++#define NUMREGS NUMREGBYTES/4 ++ /* ++ * If the entry is not from the kernel then return to the Linux ++ * trap handler and let it process the interrupt normally. ++ */ ++ if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { ++ printk("ignoring non-kernel exception\n"); ++ print_regs(®s); ++ return (0); ++ } ++ ++ kgdb_local_irq_save(flags); ++ ++ /* Get kgdb spinlock */ ++ ++ KGDB_SPIN_LOCK(&kgdb_spinlock); ++ rdtscll(kgdb_info.entry_tsc); ++ /* ++ * We depend on this spinlock and the NMI watch dog to control the ++ * other cpus. They will arrive at "in_kgdb()" as a result of the ++ * NMI and will wait there for the following spin locks to be ++ * released. ++ */ ++#ifdef CONFIG_SMP ++ ++ if (cpu_callout_map & ~MAX_CPU_MASK) { ++ printk("kgdb : too many cpus, possibly not mapped" ++ " in contiguous space, change MAX_NO_CPUS" ++ " in kgdb_stub and make new kernel.\n" ++ " cpu_callout_map is %lx\n", cpu_callout_map); ++ goto exit_just_unlock; ++ } ++ ++ if (spinlock_count == 1) { ++ int time, end_time, dum; ++ int i; ++ int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) ++ }; ++ if (remote_debug) { ++ printk("kgdb : cpu %d entry, syncing others\n", ++ smp_processor_id()); ++ } ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ /* ++ * Use trylock as we may already hold the lock if ++ * we are holding the cpu. Net result is all ++ * locked. ++ */ ++ spin_trylock(&waitlocks[i]); ++ } ++ for (i = 0; i < MAX_NO_CPUS; i++) ++ cpu_logged_in[i] = 0; ++ /* ++ * Wait for their arrival. We know the watch dog is active if ++ * in_kgdb() has ever been called, as it is always called on a ++ * watchdog tick. ++ */ ++ rdtsc(dum, time); ++ end_time = time + 2; /* Note: we use the High order bits! */ ++ i = 1; ++ if (num_online_cpus() > 1) { ++ int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; ++ smp_send_nmi_allbutself(); ++ while (i < num_online_cpus() && time != end_time) { ++ int j; ++ for (j = 0; j < MAX_NO_CPUS; j++) { ++ if (waiting_cpus[j].task && ++ !cpu_logged_in[j]) { ++ i++; ++ cpu_logged_in[j] = 1; ++ if (remote_debug) { ++ printk ++ ("kgdb : cpu %d arrived at kgdb\n", ++ j); ++ } ++ break; ++ } else if (!waiting_cpus[j].task && ++ !cpu_online(j)) { ++ waiting_cpus[j].task = NOCPU; ++ cpu_logged_in[j] = 1; ++ waiting_cpus[j].hold = 1; ++ break; ++ } ++ if (!waiting_cpus[j].task && ++ in_kgdb_here_log[j]) { ++ ++ int wait = 100000; ++ while (wait--) ; ++ if (!waiting_cpus[j].task && ++ in_kgdb_here_log[j]) { ++ printk ++ ("kgdb : cpu %d stall" ++ " in in_kgdb\n", ++ j); ++ i++; ++ cpu_logged_in[j] = 1; ++ waiting_cpus[j].task = ++ (struct task_struct ++ *) 1; ++ } ++ } ++ } ++ ++ if (in_kgdb_entry_log[smp_processor_id()] > ++ (me_in_kgdb + 10)) { ++ break; ++ } ++ ++ rdtsc(dum, time); ++ } ++ if (i < num_online_cpus()) { ++ printk ++ ("kgdb : time out, proceeding without sync\n"); ++#if 0 ++ printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", ++ waiting_cpus[0].task != 0, ++ waiting_cpus[1].task != 0); ++ printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", ++ cpu_logged_in[0], cpu_logged_in[1]); ++ printk ++ ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", ++ in_kgdb_here_log[0] != 0, ++ in_kgdb_here_log[1] != 0); ++#endif ++ entry_state = NO_SYNC; ++ } else { ++#if 0 ++ int ent = ++ in_kgdb_entry_log[smp_processor_id()] - ++ me_in_kgdb; ++ printk("kgdb : sync after %d entries\n", ent); ++#endif ++ } ++ } else { ++ if (remote_debug) { ++ printk ++ ("kgdb : %d cpus, but watchdog not active\n" ++ "proceeding without locking down other cpus\n", ++ num_online_cpus()); ++ entry_state = NO_NMI; ++ } ++ } ++ } ++#endif ++ ++ if (remote_debug) { ++ unsigned long *lp = (unsigned long *) &linux_regs; ++ ++ printk("handle_exception(exceptionVector=%d, " ++ "signo=%d, err_code=%d, linux_regs=%p)\n", ++ exceptionVector, signo, err_code, linux_regs); ++ if (debug_regs) { ++ print_regs(®s); ++ printk("Stk: %8lx %8lx %8lx %8lx" ++ " %8lx %8lx %8lx %8lx\n", ++ lp[0], lp[1], lp[2], lp[3], ++ lp[4], lp[5], lp[6], lp[7]); ++ printk(" %8lx %8lx %8lx %8lx" ++ " %8lx %8lx %8lx %8lx\n", ++ lp[8], lp[9], lp[10], lp[11], ++ lp[12], lp[13], lp[14], lp[15]); ++ printk(" %8lx %8lx %8lx %8lx " ++ "%8lx %8lx %8lx %8lx\n", ++ lp[16], lp[17], lp[18], lp[19], ++ lp[20], lp[21], lp[22], lp[23]); ++ printk(" %8lx %8lx %8lx %8lx " ++ "%8lx %8lx %8lx %8lx\n", ++ lp[24], lp[25], lp[26], lp[27], ++ lp[28], lp[29], lp[30], lp[31]); ++ } ++ } ++ ++ /* Disable hardware debugging while we are in kgdb */ ++ /* Get the debug register status register */ ++/* *INDENT-OFF* */ ++ __asm__("movl %0,%%db7" ++ : /* no output */ ++ :"r"(0)); ++ ++ asm volatile ("movl %%db6, %0\n" ++ :"=r" (hw_breakpoint_status) ++ :); ++ ++/* *INDENT-ON* */ ++ switch (exceptionVector) { ++ case 0: /* divide error */ ++ case 1: /* debug exception */ ++ case 2: /* NMI */ ++ case 3: /* breakpoint */ ++ case 4: /* overflow */ ++ case 5: /* bounds check */ ++ case 6: /* invalid opcode */ ++ case 7: /* device not available */ ++ case 8: /* double fault (errcode) */ ++ case 10: /* invalid TSS (errcode) */ ++ case 12: /* stack fault (errcode) */ ++ case 16: /* floating point error */ ++ case 17: /* alignment check (errcode) */ ++ default: /* any undocumented */ ++ break; ++ case 11: /* segment not present (errcode) */ ++ case 13: /* general protection (errcode) */ ++ case 14: /* page fault (special errcode) */ ++ case 19: /* cache flush denied */ ++ if (mem_err_expected) { ++ /* ++ * This fault occured because of the ++ * get_char or set_char routines. These ++ * two routines use either eax of edx to ++ * indirectly reference the location in ++ * memory that they are working with. ++ * For a page fault, when we return the ++ * instruction will be retried, so we ++ * have to make sure that these ++ * registers point to valid memory. ++ */ ++ mem_err = 1; /* set mem error flag */ ++ mem_err_expected = 0; ++ mem_err_cnt++; /* helps in debugging */ ++ /* make valid address */ ++ regs.eax = (long) &garbage_loc; ++ /* make valid address */ ++ regs.edx = (long) &garbage_loc; ++ if (remote_debug) ++ printk("Return after memory error: " ++ "mem_err_cnt=%d\n", mem_err_cnt); ++ if (debug_regs) ++ print_regs(®s); ++ goto exit_kgdb; ++ } ++ break; ++ } ++ if (remote_debug) ++ printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); ++ ++ gdb_i386vector = exceptionVector; ++ gdb_i386errcode = err_code; ++ kgdb_info.called_from = __builtin_return_address(0); ++#ifdef CONFIG_SMP ++ /* ++ * OK, we can now communicate, lets tell gdb about the sync. ++ * but only if we had a problem. ++ */ ++ switch (entry_state) { ++ case NO_NMI: ++ to_gdb("NMI not active, other cpus not stopped\n"); ++ break; ++ case NO_SYNC: ++ to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); ++ default:; ++ } ++ ++#endif ++/* ++ * Set up the gdb function call area. ++ */ ++ trap_cpu = smp_processor_id(); ++ OLD_esp = NEW_esp = (int) (&linux_regs->esp); ++ ++ IF_SMP(once_again:) ++ /* reply to host that an exception has occurred */ ++ remcomOutBuffer[0] = 'S'; ++ remcomOutBuffer[1] = hexchars[signo >> 4]; ++ remcomOutBuffer[2] = hexchars[signo % 16]; ++ remcomOutBuffer[3] = 0; ++ ++ putpacket(remcomOutBuffer); ++ ++ while (1 == 1) { ++ error = 0; ++ remcomOutBuffer[0] = 0; ++ getpacket(remcomInBuffer); ++ switch (remcomInBuffer[0]) { ++ case '?': ++ remcomOutBuffer[0] = 'S'; ++ remcomOutBuffer[1] = hexchars[signo >> 4]; ++ remcomOutBuffer[2] = hexchars[signo % 16]; ++ remcomOutBuffer[3] = 0; ++ break; ++ case 'd': ++ remote_debug = !(remote_debug); /* toggle debug flag */ ++ printk("Remote debug %s\n", ++ remote_debug ? "on" : "off"); ++ break; ++ case 'g': /* return the value of the CPU registers */ ++ get_gdb_regs(usethread, ®s, gdb_regs); ++ mem2hex((char *) gdb_regs, ++ remcomOutBuffer, NUMREGBYTES, 0); ++ break; ++ case 'G': /* set the value of the CPU registers - return OK */ ++ hex2mem(&remcomInBuffer[1], ++ (char *) gdb_regs, NUMREGBYTES, 0); ++ if (!usethread || usethread == current) { ++ gdb_regs_to_regs(gdb_regs, ®s); ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "E00"); ++ } ++ break; ++ ++ case 'P':{ /* set the value of a single CPU register - ++ return OK */ ++ /* ++ * For some reason, gdb wants to talk about psudo ++ * registers (greater than 15). These may have ++ * meaning for ptrace, but for us it is safe to ++ * ignor them. We do this by dumping them into ++ * _GS which we also ignor, but do have memory for. ++ */ ++ int regno; ++ ++ ptr = &remcomInBuffer[1]; ++ regs_to_gdb_regs(gdb_regs, ®s); ++ if ((!usethread || usethread == current) && ++ hexToInt(&ptr, ®no) && ++ *ptr++ == '=' && (regno >= 0)) { ++ regno = ++ (regno >= NUMREGS ? _GS : regno); ++ hex2mem(ptr, (char *) &gdb_regs[regno], ++ 4, 0); ++ gdb_regs_to_regs(gdb_regs, ®s); ++ strcpy(remcomOutBuffer, "OK"); ++ break; ++ } ++ strcpy(remcomOutBuffer, "E01"); ++ break; ++ } ++ ++ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ ++ case 'm': ++ /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr) && ++ (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { ++ ptr = 0; ++ /* ++ * hex doubles the byte count ++ */ ++ if (length > (BUFMAX / 2)) ++ length = BUFMAX / 2; ++ mem2hex((char *) addr, ++ remcomOutBuffer, length, 1); ++ if (mem_err) { ++ strcpy(remcomOutBuffer, "E03"); ++ debug_error("memory fault\n", NULL); ++ } ++ } ++ ++ if (ptr) { ++ strcpy(remcomOutBuffer, "E01"); ++ debug_error ++ ("malformed read memory command: %s\n", ++ remcomInBuffer); ++ } ++ break; ++ ++ /* MAA..AA,LLLL: ++ Write LLLL bytes at address AA.AA return OK */ ++ case 'M': ++ /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr) && ++ (*(ptr++) == ',') && ++ (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { ++ hex2mem(ptr, (char *) addr, length, 1); ++ ++ if (mem_err) { ++ strcpy(remcomOutBuffer, "E03"); ++ debug_error("memory fault\n", NULL); ++ } else { ++ strcpy(remcomOutBuffer, "OK"); ++ } ++ ++ ptr = 0; ++ } ++ if (ptr) { ++ strcpy(remcomOutBuffer, "E02"); ++ debug_error ++ ("malformed write memory command: %s\n", ++ remcomInBuffer); ++ } ++ break; ++ ++ /* cAA..AA Continue at address AA..AA(optional) */ ++ /* sAA..AA Step one instruction from AA..AA(optional) */ ++ /* D detach, reply OK and then continue */ ++ case 'c': ++ case 's': ++ case 'D': ++ ++ /* try to read optional parameter, ++ pc unchanged if no parm */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr)) { ++ if (remote_debug) ++ printk("Changing EIP to 0x%x\n", addr); ++ ++ regs.eip = addr; ++ } ++ ++ newPC = regs.eip; ++ ++ /* clear the trace bit */ ++ regs.eflags &= 0xfffffeff; ++ ++ /* set the trace bit if we're stepping */ ++ if (remcomInBuffer[0] == 's') ++ regs.eflags |= 0x100; ++ ++ /* detach is a friendly version of continue. Note that ++ debugging is still enabled (e.g hit control C) ++ until the process that issued an ioctl TIOCGDB ++ terminates ++ */ ++ if (remcomInBuffer[0] == 'D') { ++ strcpy(remcomOutBuffer, "OK"); ++ putpacket(remcomOutBuffer); ++ } ++ ++ if (remote_debug) { ++ printk("Resuming execution\n"); ++ print_regs(®s); ++ } ++ asm volatile ("movl %%db6, %0\n":"=r" (dr6) ++ :); ++ if (!(dr6 & 0x4000)) { ++ for (breakno = 0; breakno < 4; ++breakno) { ++ if (dr6 & (1 << breakno) && ++ (breakinfo[breakno].type == 0)) { ++ /* Set restore flag */ ++ regs.eflags |= 0x10000; ++ break; ++ } ++ } ++ } ++ correct_hw_break(); ++ asm volatile ("movl %0, %%db6\n"::"r" (0)); ++ goto exit_kgdb; ++ ++ /* kill the program */ ++ case 'k': /* do nothing */ ++ break; ++ ++ /* query */ ++ case 'q': ++ switch (remcomInBuffer[1]) { ++ case 'L': ++ /* List threads */ ++ thread_list = 2; ++ thread_list_start = (usethread ? : current); ++ unpack_byte(remcomInBuffer + 3, &maxthreads); ++ unpack_threadid(remcomInBuffer + 5, &thref); ++ do { ++ int buf_thread_limit = ++ (BUFMAX - 22) / BUF_THREAD_ID_SIZE; ++ if (maxthreads > buf_thread_limit) { ++ maxthreads = buf_thread_limit; ++ } ++ } while (0); ++ remcomOutBuffer[0] = 'q'; ++ remcomOutBuffer[1] = 'M'; ++ remcomOutBuffer[4] = '0'; ++ pack_threadid(remcomOutBuffer + 5, &thref); ++ ++ threadid = threadref_to_int(&thref); ++ for (nothreads = 0; ++ nothreads < maxthreads && ++ threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ thread = getthread(threadid); ++ if (thread) { ++ int_to_threadref(&thref, ++ threadid); ++ pack_threadid(remcomOutBuffer + ++ 21 + ++ nothreads * 16, ++ &thref); ++ nothreads++; ++ if (thread_min > threadid) ++ thread_min = threadid; ++ } ++ } ++ ++ if (threadid == PID_MAX + MAX_NO_CPUS) { ++ remcomOutBuffer[4] = '1'; ++ } ++ pack_hex_byte(remcomOutBuffer + 2, nothreads); ++ remcomOutBuffer[21 + nothreads * 16] = '\0'; ++ break; ++ ++ case 'C': ++ /* Current thread id */ ++ remcomOutBuffer[0] = 'Q'; ++ remcomOutBuffer[1] = 'C'; ++ threadid = current->pid; ++ if (!threadid) { ++ /* ++ * idle thread ++ */ ++ for (threadid = PID_MAX; ++ threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ if (current == ++ idle_task(threadid - ++ PID_MAX)) ++ break; ++ } ++ } ++ int_to_threadref(&thref, threadid); ++ pack_threadid(remcomOutBuffer + 2, &thref); ++ remcomOutBuffer[18] = '\0'; ++ break; ++ ++ case 'E': ++ /* Print exception info */ ++ printexceptioninfo(exceptionVector, ++ err_code, remcomOutBuffer); ++ break; ++ } ++ break; ++ ++ /* task related */ ++ case 'H': ++ switch (remcomInBuffer[1]) { ++ case 'g': ++ ptr = &remcomInBuffer[2]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ if (!thread) { ++ remcomOutBuffer[0] = 'E'; ++ remcomOutBuffer[1] = '\0'; ++ break; ++ } ++ /* ++ * Just in case I forget what this is all about, ++ * the "thread info" command to gdb causes it ++ * to ask for a thread list. It then switches ++ * to each thread and asks for the registers. ++ * For this (and only this) usage, we want to ++ * fudge the registers of tasks not on the run ++ * list (i.e. waiting) to show the routine that ++ * called schedule. Also, gdb, is a minimalist ++ * in that if the current thread is the last ++ * it will not re-read the info when done. ++ * This means that in this case we must show ++ * the real registers. So here is how we do it: ++ * Each entry we keep track of the min ++ * thread in the list (the last that gdb will) ++ * get info for. We also keep track of the ++ * starting thread. ++ * "thread_list" is cleared when switching back ++ * to the min thread if it is was current, or ++ * if it was not current, thread_list is set ++ * to 1. When the switch to current comes, ++ * if thread_list is 1, clear it, else do ++ * nothing. ++ */ ++ usethread = thread; ++ if ((thread_list == 1) && ++ (thread == thread_list_start)) { ++ thread_list = 0; ++ } ++ if (thread_list && (threadid == thread_min)) { ++ if (thread == thread_list_start) { ++ thread_list = 0; ++ } else { ++ thread_list = 1; ++ } ++ } ++ /* follow through */ ++ case 'c': ++ remcomOutBuffer[0] = 'O'; ++ remcomOutBuffer[1] = 'K'; ++ remcomOutBuffer[2] = '\0'; ++ break; ++ } ++ break; ++ ++ /* Query thread status */ ++ case 'T': ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ if (thread) { ++ remcomOutBuffer[0] = 'O'; ++ remcomOutBuffer[1] = 'K'; ++ remcomOutBuffer[2] = '\0'; ++ if (thread_min > threadid) ++ thread_min = threadid; ++ } else { ++ remcomOutBuffer[0] = 'E'; ++ remcomOutBuffer[1] = '\0'; ++ } ++ break; ++ ++ case 'Y': ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &breakno); ++ ptr++; ++ hexToInt(&ptr, &breaktype); ++ ptr++; ++ hexToInt(&ptr, &length); ++ ptr++; ++ hexToInt(&ptr, &addr); ++ if (set_hw_break(breakno & 0x3, ++ breaktype & 0x3, ++ length & 0x3, addr) == 0) { ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "ERROR"); ++ } ++ break; ++ ++ /* Remove hardware breakpoint */ ++ case 'y': ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &breakno); ++ if (remove_hw_break(breakno & 0x3) == 0) { ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "ERROR"); ++ } ++ break; ++ ++ case 'r': /* reboot */ ++ strcpy(remcomOutBuffer, "OK"); ++ putpacket(remcomOutBuffer); ++ /*to_gdb("Rebooting\n"); */ ++ /* triplefault no return from here */ ++ { ++ static long no_idt[2]; ++ __asm__ __volatile__("lidt %0"::"m"(no_idt)); ++ BREAKPOINT; ++ } ++ ++ } /* switch */ ++ ++ /* reply to the request */ ++ putpacket(remcomOutBuffer); ++ } /* while(1==1) */ ++ /* ++ * reached by goto only. ++ */ ++ exit_kgdb: ++ /* ++ * Here is where we set up to trap a gdb function call. NEW_esp ++ * will be changed if we are trying to do this. We handle both ++ * adding and subtracting, thus allowing gdb to put grung on ++ * the stack which it removes later. ++ */ ++ if (NEW_esp != OLD_esp) { ++ int *ptr = END_OF_LOOKASIDE; ++ if (NEW_esp < OLD_esp) ++ ptr -= (OLD_esp - NEW_esp) / sizeof (int); ++ *--ptr = linux_regs->eflags; ++ *--ptr = linux_regs->xcs; ++ *--ptr = linux_regs->eip; ++ *--ptr = linux_regs->ecx; ++ *--ptr = linux_regs->ebx; ++ *--ptr = linux_regs->eax; ++ linux_regs->ecx = NEW_esp - (sizeof (int) * 6); ++ linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; ++ if (NEW_esp < OLD_esp) { ++ linux_regs->eip = (unsigned int) fn_call_stub; ++ } else { ++ linux_regs->eip = (unsigned int) fn_rtn_stub; ++ linux_regs->eax = NEW_esp; ++ } ++ linux_regs->eflags &= ~(IF_BIT | TF_BIT); ++ } ++#ifdef CONFIG_SMP ++ /* ++ * Release gdb wait locks ++ * Sanity check time. Must have at least one cpu to run. Also single ++ * step must not be done if the current cpu is on hold. ++ */ ++ if (spinlock_count == 1) { ++ int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; ++ int cpu_avail = 0; ++ int i; ++ ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (!cpu_online(i)) ++ break; ++ if (!hold_cpu(i)) { ++ cpu_avail = 1; ++ } ++ } ++ /* ++ * Early in the bring up there will be NO cpus on line... ++ */ ++ if (!cpu_avail && cpu_online_map) { ++ to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); ++ goto once_again; ++ } ++ if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { ++ to_gdb ++ ("Current cpu must be unblocked to single step\n"); ++ goto once_again; ++ } ++ if (!(ss_hold)) { ++ int i; ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (!hold_cpu(i)) { ++ spin_unlock(&waitlocks[i]); ++ } ++ } ++ } else { ++ spin_unlock(&waitlocks[smp_processor_id()]); ++ } ++ /* Release kgdb spinlock */ ++ KGDB_SPIN_UNLOCK(&kgdb_spinlock); ++ /* ++ * If this cpu is on hold, this is where we ++ * do it. Note, the NMI will pull us out of here, ++ * but will return as the above lock is not held. ++ * We will stay here till another cpu releases the lock for us. ++ */ ++ spin_unlock_wait(waitlocks + smp_processor_id()); ++ kgdb_local_irq_restore(flags); ++ return (0); ++ } ++ exit_just_unlock: ++#endif ++ /* Release kgdb spinlock */ ++ KGDB_SPIN_UNLOCK(&kgdb_spinlock); ++ kgdb_local_irq_restore(flags); ++ return (0); ++} ++ ++/* this function is used to set up exception handlers for tracing and ++ * breakpoints. ++ * This function is not needed as the above line does all that is needed. ++ * We leave it for backward compatitability... ++ */ ++void ++set_debug_traps(void) ++{ ++ /* ++ * linux_debug_hook is defined in traps.c. We store a pointer ++ * to our own exception handler into it. ++ ++ * But really folks, every hear of labeled common, an old Fortran ++ * concept. Lots of folks can reference it and it is define if ++ * anyone does. Only one can initialize it at link time. We do ++ * this with the hook. See the statement above. No need for any ++ * executable code and it is ready as soon as the kernel is ++ * loaded. Very desirable in kernel debugging. ++ ++ linux_debug_hook = handle_exception ; ++ */ ++ ++ /* In case GDB is started before us, ack any packets (presumably ++ "$?#xx") sitting there. ++ putDebugChar ('+'); ++ ++ initialized = 1; ++ */ ++} ++ ++/* This function will generate a breakpoint exception. It is used at the ++ beginning of a program to sync up with a debugger and can be used ++ otherwise as a quick means to stop program execution and "break" into ++ the debugger. */ ++/* But really, just use the BREAKPOINT macro. We will handle the int stuff ++ */ ++ ++#ifdef later ++/* ++ * possibly we should not go thru the traps.c code at all? Someday. ++ */ ++void ++do_kgdb_int3(struct pt_regs *regs, long error_code) ++{ ++ kgdb_handle_exception(3, 5, error_code, regs); ++ return; ++} ++#endif ++#undef regs ++#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS ++asmlinkage void ++bad_sys_call_exit(int stuff) ++{ ++ struct pt_regs *regs = (struct pt_regs *) &stuff; ++ printk("Sys call %d return with %x preempt_count\n", ++ (int) regs->orig_eax, preempt_count()); ++} ++#endif ++#ifdef CONFIG_STACK_OVERFLOW_TEST ++#include ++asmlinkage void ++stack_overflow(void) ++{ ++#ifdef BREAKPOINT ++ BREAKPOINT; ++#else ++ printk("Kernel stack overflow, looping forever\n"); ++#endif ++ while (1) { ++ } ++} ++#endif ++ ++#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) ++char gdbconbuf[BUFMAX]; ++ ++static void ++kgdb_gdb_message(const char *s, unsigned count) ++{ ++ int i; ++ int wcount; ++ char *bufptr; ++ /* ++ * This takes care of NMI while spining out chars to gdb ++ */ ++ IF_SMP(in_kgdb_console = 1); ++ gdbconbuf[0] = 'O'; ++ bufptr = gdbconbuf + 1; ++ while (count > 0) { ++ if ((count << 1) > (BUFMAX - 2)) { ++ wcount = (BUFMAX - 2) >> 1; ++ } else { ++ wcount = count; ++ } ++ count -= wcount; ++ for (i = 0; i < wcount; i++) { ++ bufptr = pack_hex_byte(bufptr, s[i]); ++ } ++ *bufptr = '\0'; ++ s += wcount; ++ ++ putpacket(gdbconbuf); ++ ++ } ++ IF_SMP(in_kgdb_console = 0); ++} ++#endif ++#ifdef CONFIG_SMP ++static void ++to_gdb(const char *s) ++{ ++ int count = 0; ++ while (s[count] && (count++ < BUFMAX)) ; ++ kgdb_gdb_message(s, count); ++} ++#endif ++#ifdef CONFIG_KGDB_CONSOLE ++#include ++#include ++#include ++#include ++#include ++ ++void ++kgdb_console_write(struct console *co, const char *s, unsigned count) ++{ ++ ++ if (gdb_i386vector == -1) { ++ /* ++ * We have not yet talked to gdb. What to do... ++ * lets break, on continue we can do the write. ++ * But first tell him whats up. Uh, well no can do, ++ * as this IS the console. Oh well... ++ * We do need to wait or the messages will be lost. ++ * Other option would be to tell the above code to ++ * ignore this breakpoint and do an auto return, ++ * but that might confuse gdb. Also this happens ++ * early enough in boot up that we don't have the traps ++ * set up yet, so... ++ */ ++ breakpoint(); ++ } ++ kgdb_gdb_message(s, count); ++} ++ ++/* ++ * ------------------------------------------------------------ ++ * Serial KGDB driver ++ * ------------------------------------------------------------ ++ */ ++ ++static struct console kgdbcons = { ++ name:"kgdb", ++ write:kgdb_console_write, ++#ifdef CONFIG_KGDB_USER_CONSOLE ++ device:kgdb_console_device, ++#endif ++ flags:CON_PRINTBUFFER | CON_ENABLED, ++ index:-1, ++}; ++ ++/* ++ * The trick here is that this file gets linked before printk.o ++ * That means we get to peer at the console info in the command ++ * line before it does. If we are up, we register, otherwise, ++ * do nothing. By returning 0, we allow printk to look also. ++ */ ++static int kgdb_console_enabled; ++ ++int __init ++kgdb_console_init(char *str) ++{ ++ if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { ++ register_console(&kgdbcons); ++ kgdb_console_enabled = 1; ++ } ++ return 0; /* let others look at the string */ ++} ++ ++__setup("console=", kgdb_console_init); ++ ++#ifdef CONFIG_KGDB_USER_CONSOLE ++static kdev_t kgdb_console_device(struct console *c); ++/* This stuff sort of works, but it knocks out telnet devices ++ * we are leaving it here in case we (or you) find time to figure it out ++ * better.. ++ */ ++ ++/* ++ * We need a real char device as well for when the console is opened for user ++ * space activities. ++ */ ++ ++static int ++kgdb_consdev_open(struct inode *inode, struct file *file) ++{ ++ return 0; ++} ++ ++static ssize_t ++kgdb_consdev_write(struct file *file, const char *buf, ++ size_t count, loff_t * ppos) ++{ ++ int size, ret = 0; ++ static char kbuf[128]; ++ static DECLARE_MUTEX(sem); ++ ++ /* We are not reentrant... */ ++ if (down_interruptible(&sem)) ++ return -ERESTARTSYS; ++ ++ while (count > 0) { ++ /* need to copy the data from user space */ ++ size = count; ++ if (size > sizeof (kbuf)) ++ size = sizeof (kbuf); ++ if (copy_from_user(kbuf, buf, size)) { ++ ret = -EFAULT; ++ break;; ++ } ++ kgdb_console_write(&kgdbcons, kbuf, size); ++ count -= size; ++ ret += size; ++ buf += size; ++ } ++ ++ up(&sem); ++ ++ return ret; ++} ++ ++struct file_operations kgdb_consdev_fops = { ++ open:kgdb_consdev_open, ++ write:kgdb_consdev_write ++}; ++static kdev_t ++kgdb_console_device(struct console *c) ++{ ++ return MKDEV(TTYAUX_MAJOR, 1); ++} ++ ++/* ++ * This routine gets called from the serial stub in the i386/lib ++ * This is so it is done late in bring up (just before the console open). ++ */ ++void ++kgdb_console_finit(void) ++{ ++ if (kgdb_console_enabled) { ++ char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); ++ char *cp = cptr; ++ while (*cptr && *cptr != '(') ++ cptr++; ++ *cptr = 0; ++ unregister_chrdev(TTYAUX_MAJOR, cp); ++ register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); ++ } ++} ++#endif ++#endif ++#ifdef CONFIG_KGDB_TS ++#include /* time stamp code */ ++#include /* in_interrupt */ ++#ifdef CONFIG_KGDB_TS_64 ++#define DATA_POINTS 64 ++#endif ++#ifdef CONFIG_KGDB_TS_128 ++#define DATA_POINTS 128 ++#endif ++#ifdef CONFIG_KGDB_TS_256 ++#define DATA_POINTS 256 ++#endif ++#ifdef CONFIG_KGDB_TS_512 ++#define DATA_POINTS 512 ++#endif ++#ifdef CONFIG_KGDB_TS_1024 ++#define DATA_POINTS 1024 ++#endif ++#ifndef DATA_POINTS ++#define DATA_POINTS 128 /* must be a power of two */ ++#endif ++#define INDEX_MASK (DATA_POINTS - 1) ++#if (INDEX_MASK & DATA_POINTS) ++#error "CONFIG_KGDB_TS_COUNT must be a power of 2" ++#endif ++struct kgdb_and_then_struct { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ struct task_struct *task; ++ long long at_time; ++ int from_ln; ++ char *in_src; ++ void *from; ++ int *with_shpf; ++ int data0; ++ int data1; ++}; ++struct kgdb_and_then_struct2 { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ struct task_struct *task; ++ long long at_time; ++ int from_ln; ++ char *in_src; ++ void *from; ++ int *with_shpf; ++ struct task_struct *t1; ++ struct task_struct *t2; ++}; ++struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; ++ ++struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; ++int kgdb_and_then_count; ++ ++void ++kgdb_tstamp(int line, char *source, int data0, int data1) ++{ ++ static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; ++ int flags; ++ kgdb_local_irq_save(flags); ++ spin_lock(&ts_spin); ++ rdtscll(kgdb_and_then->at_time); ++#ifdef CONFIG_SMP ++ kgdb_and_then->on_cpu = smp_processor_id(); ++#endif ++ kgdb_and_then->task = current; ++ kgdb_and_then->from_ln = line; ++ kgdb_and_then->in_src = source; ++ kgdb_and_then->from = __builtin_return_address(0); ++ kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | ++ (preempt_count() << 8)); ++ kgdb_and_then->data0 = data0; ++ kgdb_and_then->data1 = data1; ++ kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; ++ spin_unlock(&ts_spin); ++ kgdb_local_irq_restore(flags); ++#ifdef CONFIG_PREEMPT ++ ++#endif ++ return; ++} ++#endif ++typedef int gdb_debug_hook(int exceptionVector, ++ int signo, int err_code, struct pt_regs *linux_regs); ++gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ +diff -puN arch/i386/kernel/Makefile~kgdb-ga arch/i386/kernel/Makefile +--- 25/arch/i386/kernel/Makefile~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/arch/i386/kernel/Makefile 2003-06-25 23:14:17.000000000 -0700 +@@ -14,6 +14,7 @@ obj-y += timers/ + obj-$(CONFIG_ACPI) += acpi/ + obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o + obj-$(CONFIG_MCA) += mca.o ++obj-$(CONFIG_KGDB) += kgdb_stub.o + obj-$(CONFIG_X86_MSR) += msr.o + obj-$(CONFIG_X86_CPUID) += cpuid.o + obj-$(CONFIG_MICROCODE) += microcode.o +diff -puN arch/i386/kernel/nmi.c~kgdb-ga arch/i386/kernel/nmi.c +--- 25/arch/i386/kernel/nmi.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/arch/i386/kernel/nmi.c 2003-06-25 23:14:17.000000000 -0700 +@@ -31,7 +31,17 @@ + #include + #include + ++#ifdef CONFIG_KGDB ++#include ++#ifdef CONFIG_SMP ++unsigned int nmi_watchdog = NMI_IO_APIC; ++#else ++unsigned int nmi_watchdog = NMI_LOCAL_APIC; ++#endif ++#else + unsigned int nmi_watchdog = NMI_NONE; ++#endif ++ + static unsigned int nmi_hz = HZ; + unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ + extern void show_registers(struct pt_regs *regs); +@@ -398,6 +408,9 @@ void touch_nmi_watchdog (void) + for (i = 0; i < NR_CPUS; i++) + alert_counter[i] = 0; + } ++#ifdef CONFIG_KGDB ++int tune_watchdog = 5*HZ; ++#endif + + void nmi_watchdog_tick (struct pt_regs * regs) + { +@@ -411,12 +424,24 @@ void nmi_watchdog_tick (struct pt_regs * + + sum = irq_stat[cpu].apic_timer_irqs; + ++#ifdef CONFIG_KGDB ++ if (! in_kgdb(regs) && last_irq_sums[cpu] == sum ) { ++ ++#else + if (last_irq_sums[cpu] == sum) { ++#endif + /* + * Ayiee, looks like this CPU is stuck ... + * wait a few IRQs (5 seconds) before doing the oops ... + */ + alert_counter[cpu]++; ++#ifdef CONFIG_KGDB ++ if (alert_counter[cpu] == tune_watchdog) { ++ kgdb_handle_exception(2, SIGPWR, 0, regs); ++ last_irq_sums[cpu] = sum; ++ alert_counter[cpu] = 0; ++ } ++#endif + if (alert_counter[cpu] == 5*nmi_hz) { + spin_lock(&nmi_print_lock); + /* +diff -puN arch/i386/kernel/smp.c~kgdb-ga arch/i386/kernel/smp.c +--- 25/arch/i386/kernel/smp.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/arch/i386/kernel/smp.c 2003-06-25 23:14:17.000000000 -0700 +@@ -459,7 +459,17 @@ void smp_send_reschedule(int cpu) + { + send_IPI_mask(1 << cpu, RESCHEDULE_VECTOR); + } +- ++#ifdef CONFIG_KGDB ++/* ++ * By using the NMI code instead of a vector we just sneak thru the ++ * word generator coming out with just what we want. AND it does ++ * not matter if clustered_apic_mode is set or not. ++ */ ++void smp_send_nmi_allbutself(void) ++{ ++ send_IPI_allbutself(APIC_DM_NMI); ++} ++#endif + /* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. +diff -puN arch/i386/kernel/traps.c~kgdb-ga arch/i386/kernel/traps.c +--- 25/arch/i386/kernel/traps.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/arch/i386/kernel/traps.c 2003-06-25 23:14:17.000000000 -0700 +@@ -90,6 +90,42 @@ asmlinkage void alignment_check(void); + asmlinkage void spurious_interrupt_bug(void); + asmlinkage void machine_check(void); + ++#ifdef CONFIG_KGDB ++extern void sysenter_entry(void); ++#include ++#include ++extern void int3(void); ++extern void debug(void); ++void set_intr_gate(unsigned int n, void *addr); ++static void set_intr_usr_gate(unsigned int n, void *addr); ++/* ++ * Should be able to call this breakpoint() very early in ++ * bring up. Just hard code the call where needed. ++ * The breakpoint() code is here because set_?_gate() functions ++ * are local (static) to trap.c. They need be done only once, ++ * but it does not hurt to do them over. ++ */ ++void breakpoint(void) ++{ ++ set_intr_usr_gate(3,&int3); /* disable ints on trap */ ++ set_intr_gate(1,&debug); ++ set_intr_gate(14,&page_fault); ++ ++ BREAKPOINT; ++} ++#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \ ++ { \ ++ if (!user_mode(regs) ) \ ++ { \ ++ kgdb_handle_exception(trapnr, signr, error_code, regs); \ ++ after; \ ++ } else if ((trapnr == 3) && (regs->eflags &0x200)) local_irq_enable(); \ ++ } ++#else ++#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) ++#endif ++ ++ + static int kstack_depth_to_print = 24; + + void show_trace(struct task_struct *task, unsigned long * stack) +@@ -258,6 +294,15 @@ void die(const char * str, struct pt_reg + bust_spinlocks(1); + handle_BUG(regs); + printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); ++#ifdef CONFIG_KGDB ++ /* This is about the only place we want to go to kgdb even if in ++ * user mode. But we must go in via a trap so within kgdb we will ++ * always be in kernel mode. ++ */ ++ if (user_mode(regs)) ++ BREAKPOINT; ++#endif ++ CHK_REMOTE_DEBUG(0,SIGTRAP,err,regs,) + show_registers(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); +@@ -327,6 +372,7 @@ static inline void do_trap(int trapnr, i + #define DO_ERROR(trapnr, signr, str, name) \ + asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ + { \ ++ CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,)\ + do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ + } + +@@ -344,7 +390,9 @@ asmlinkage void do_##name(struct pt_regs + #define DO_VM86_ERROR(trapnr, signr, str, name) \ + asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ + { \ ++ CHK_REMOTE_DEBUG(trapnr, signr, error_code,regs, return)\ + do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ ++ return; \ + } + + #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +@@ -387,8 +435,10 @@ gp_in_vm86: + return; + + gp_in_kernel: +- if (!fixup_exception(regs)) ++ if (!fixup_exception(regs)){ ++ CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,) + die("general protection fault", regs, error_code); ++ } + } + + static void mem_parity_error(unsigned char reason, struct pt_regs * regs) +@@ -550,8 +600,18 @@ asmlinkage void do_debug(struct pt_regs + * allowing programs to debug themselves without the ptrace() + * interface. + */ +- if ((regs->xcs & 3) == 0) ++#ifdef CONFIG_KGDB ++ /* ++ * I think this is the only "real" case of a TF in the kernel ++ * that really belongs to user space. Others are ++ * "Ours all ours!" ++ */ ++ if (((regs->xcs & 3) == 0) && ((void *)regs->eip == sysenter_entry)) + goto clear_TF_reenable; ++#else ++ if ((regs->xcs & 3) == 0) ++ goto clear_TF_reenable; ++#endif + if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) + goto clear_TF; + } +@@ -563,6 +623,17 @@ asmlinkage void do_debug(struct pt_regs + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + ++#ifdef CONFIG_KGDB ++ /* ++ * If this is a kernel mode trap, we need to reset db7 to allow us ++ * to continue sanely ALSO skip the signal delivery ++ */ ++ if ((regs->xcs & 3) == 0) ++ goto clear_dr7; ++ ++ /* if not kernel, allow ints but only if they were on */ ++ if ( regs->eflags & 0x200) local_irq_enable(); ++#endif + /* If this is a kernel mode trap, save the user PC on entry to + * the kernel, that's what the debugger can make sense of. + */ +@@ -577,6 +648,7 @@ clear_dr7: + __asm__("movl %0,%%db7" + : /* no output */ + : "r" (0)); ++ CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,) + return; + + debug_vm86: +@@ -823,6 +895,12 @@ static void __init set_call_gate(void *a + { + _set_gate(a,12,3,addr,__KERNEL_CS); + } ++#ifdef CONFIG_KGDB ++void set_intr_usr_gate(unsigned int n, void *addr) ++{ ++ _set_gate(idt_table+n,14,3,addr,__KERNEL_CS); ++} ++#endif + + static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) + { +@@ -849,7 +927,11 @@ void __init trap_init(void) + set_trap_gate(0,÷_error); + set_intr_gate(1,&debug); + set_intr_gate(2,&nmi); ++#ifndef CONFIG_KGDB + set_system_gate(3,&int3); /* int3-5 can be called from all */ ++#else ++ set_intr_usr_gate(3,&int3); /* int3-5 can be called from all */ ++#endif + set_system_gate(4,&overflow); + set_system_gate(5,&bounds); + set_trap_gate(6,&invalid_op); +diff -puN /dev/null arch/i386/lib/kgdb_serial.c +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25-akpm/arch/i386/lib/kgdb_serial.c 2003-06-25 23:14:17.000000000 -0700 +@@ -0,0 +1,485 @@ ++/* ++ * Serial interface GDB stub ++ * ++ * Written (hacked together) by David Grothe (dave@gcom.com) ++ * Modified to allow invokation early in boot see also ++ * kgdb.h for instructions by George Anzinger(george@mvista.com) ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_KGDB_USER_CONSOLE ++extern void kgdb_console_finit(void); ++#endif ++#define PRNT_off ++#define TEST_EXISTANCE ++#ifdef PRNT ++#define dbprintk(s) printk s ++#else ++#define dbprintk(s) ++#endif ++#define TEST_INTERRUPT_off ++#ifdef TEST_INTERRUPT ++#define intprintk(s) printk s ++#else ++#define intprintk(s) ++#endif ++ ++#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) ++ ++#define GDB_BUF_SIZE 512 /* power of 2, please */ ++ ++static char gdb_buf[GDB_BUF_SIZE]; ++static int gdb_buf_in_inx; ++static atomic_t gdb_buf_in_cnt; ++static int gdb_buf_out_inx; ++ ++struct async_struct *gdb_async_info; ++static int gdb_async_irq; ++ ++#define outb_px(a,b) outb_p(b,a) ++ ++static void program_uart(struct async_struct *info); ++static void write_char(struct async_struct *info, int chr); ++/* ++ * Get a byte from the hardware data buffer and return it ++ */ ++static int ++read_data_bfr(struct async_struct *info) ++{ ++ char it = inb_p(info->port + UART_LSR); ++ ++ if (it & UART_LSR_DR) ++ return (inb_p(info->port + UART_RX)); ++ /* ++ * If we have a framing error assume somebody messed with ++ * our uart. Reprogram it and send '-' both ways... ++ */ ++ if (it & 0xc) { ++ program_uart(info); ++ write_char(info, '-'); ++ return ('-'); ++ } ++ return (-1); ++ ++} /* read_data_bfr */ ++ ++/* ++ * Get a char if available, return -1 if nothing available. ++ * Empty the receive buffer first, then look at the interface hardware. ++ ++ * Locking here is a bit of a problem. We MUST not lock out communication ++ * if we are trying to talk to gdb about a kgdb entry. ON the other hand ++ * we can loose chars in the console pass thru if we don't lock. It is also ++ * possible that we could hold the lock or be waiting for it when kgdb ++ * NEEDS to talk. Since kgdb locks down the world, it does not need locks. ++ * We do, of course have possible issues with interrupting a uart operation, ++ * but we will just depend on the uart status to help keep that straight. ++ ++ */ ++static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; ++#ifdef CONFIG_SMP ++extern spinlock_t kgdb_spinlock; ++#endif ++ ++static int ++read_char(struct async_struct *info) ++{ ++ int chr; ++ unsigned long flags; ++ local_irq_save(flags); ++#ifdef CONFIG_SMP ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ spin_lock(&uart_interrupt_lock); ++ } ++#endif ++ if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ ++ chr = gdb_buf[gdb_buf_out_inx++]; ++ gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); ++ atomic_dec(&gdb_buf_in_cnt); ++ } else { ++ chr = read_data_bfr(info); ++ } ++#ifdef CONFIG_SMP ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ spin_unlock(&uart_interrupt_lock); ++ } ++#endif ++ local_irq_restore(flags); ++ return (chr); ++} ++ ++/* ++ * Wait until the interface can accept a char, then write it. ++ */ ++static void ++write_char(struct async_struct *info, int chr) ++{ ++ while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; ++ ++ outb_p(chr, info->port + UART_TX); ++ ++} /* write_char */ ++ ++/* ++ * Mostly we don't need a spinlock, but since the console goes ++ * thru here with interrutps on, well, we need to catch those ++ * chars. ++ */ ++/* ++ * This is the receiver interrupt routine for the GDB stub. ++ * It will receive a limited number of characters of input ++ * from the gdb host machine and save them up in a buffer. ++ * ++ * When the gdb stub routine getDebugChar() is called it ++ * draws characters out of the buffer until it is empty and ++ * then reads directly from the serial port. ++ * ++ * We do not attempt to write chars from the interrupt routine ++ * since the stubs do all of that via putDebugChar() which ++ * writes one byte after waiting for the interface to become ++ * ready. ++ * ++ * The debug stubs like to run with interrupts disabled since, ++ * after all, they run as a consequence of a breakpoint in ++ * the kernel. ++ * ++ * Perhaps someone who knows more about the tty driver than I ++ * care to learn can make this work for any low level serial ++ * driver. ++ */ ++static irqreturn_t ++gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) ++{ ++ struct async_struct *info; ++ unsigned long flags; ++ ++ info = gdb_async_info; ++ if (!info || !info->tty || irq != gdb_async_irq) ++ return IRQ_NONE; ++ ++ local_irq_save(flags); ++ spin_lock(&uart_interrupt_lock); ++ do { ++ int chr = read_data_bfr(info); ++ intprintk(("Debug char on int: %x hex\n", chr)); ++ if (chr < 0) ++ continue; ++ ++ if (chr == 3) { /* Ctrl-C means remote interrupt */ ++ BREAKPOINT; ++ continue; ++ } ++ ++ if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { ++ /* buffer overflow tosses early char */ ++ read_char(info); ++ } ++ gdb_buf[gdb_buf_in_inx++] = chr; ++ gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); ++ } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); ++ spin_unlock(&uart_interrupt_lock); ++ local_irq_restore(flags); ++ return IRQ_HANDLED; ++} /* gdb_interrupt */ ++ ++/* ++ * Just a NULL routine for testing. ++ */ ++void ++gdb_null(void) ++{ ++} /* gdb_null */ ++ ++/* These structure are filled in with values defined in asm/kgdb_local.h ++ */ ++static struct serial_state state = SB_STATE; ++static struct async_struct local_info = SB_INFO; ++static int ok_to_enable_ints = 0; ++static void kgdb_enable_ints_now(void); ++ ++extern char *kgdb_version; ++/* ++ * Hook an IRQ for KGDB. ++ * ++ * This routine is called from putDebugChar, below. ++ */ ++static int ints_disabled = 1; ++int ++gdb_hook_interrupt(struct async_struct *info, int verb) ++{ ++ struct serial_state *state = info->state; ++ unsigned long flags; ++ int port; ++#ifdef TEST_EXISTANCE ++ int scratch, scratch2; ++#endif ++ ++ /* The above fails if memory managment is not set up yet. ++ * Rather than fail the set up, just keep track of the fact ++ * and pick up the interrupt thing later. ++ */ ++ gdb_async_info = info; ++ port = gdb_async_info->port; ++ gdb_async_irq = state->irq; ++ if (verb) { ++ printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", ++ kgdb_version, ++ port, ++ gdb_async_irq, gdb_async_info->state->custom_divisor); ++ } ++ local_irq_save(flags); ++#ifdef TEST_EXISTANCE ++ /* Existance test */ ++ /* Should not need all this, but just in case.... */ ++ ++ scratch = inb_p(port + UART_IER); ++ outb_px(port + UART_IER, 0); ++ outb_px(0xff, 0x080); ++ scratch2 = inb_p(port + UART_IER); ++ outb_px(port + UART_IER, scratch); ++ if (scratch2) { ++ printk ++ ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); ++ local_irq_restore(flags); ++ return 1; /* We failed; there's nothing here */ ++ } ++ scratch2 = inb_p(port + UART_LCR); ++ outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ ++ outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ ++ outb_px(port + UART_LCR, 0); ++ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); ++ scratch = inb_p(port + UART_IIR) >> 6; ++ if (scratch == 1) { ++ printk("gdb_hook_interrupt: Undefined UART type!" ++ " Not a UART! \n"); ++ local_irq_restore(flags); ++ return 1; ++ } else { ++ dbprintk(("gdb_hook_interrupt: UART type " ++ "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); ++ } ++ scratch = inb_p(port + UART_MCR); ++ outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); ++ outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); ++ scratch2 = inb_p(port + UART_MSR) & 0xF0; ++ outb_px(port + UART_MCR, scratch); ++ if (scratch2 != 0x90) { ++ printk("gdb_hook_interrupt: " ++ "Loop back test failed! Not a UART!\n"); ++ local_irq_restore(flags); ++ return scratch2 + 1000; /* force 0 to fail */ ++ } ++#endif /* test existance */ ++ program_uart(info); ++ local_irq_restore(flags); ++ ++ return (0); ++ ++} /* gdb_hook_interrupt */ ++ ++static void ++program_uart(struct async_struct *info) ++{ ++ int port = info->port; ++ ++ (void) inb_p(port + UART_RX); ++ outb_px(port + UART_IER, 0); ++ ++ (void) inb_p(port + UART_RX); /* serial driver comments say */ ++ (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ ++ (void) inb_p(port + UART_MSR); ++ outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); ++ outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ ++ outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ ++ outb_px(port + UART_MCR, info->MCR); ++ ++ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ ++ outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ ++ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ ++ if (!ints_disabled) { ++ intprintk(("KGDB: Sending %d to port %x offset %d\n", ++ gdb_async_info->IER, ++ (int) gdb_async_info->port, UART_IER)); ++ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); ++ } ++ return; ++} ++ ++/* ++ * getDebugChar ++ * ++ * This is a GDB stub routine. It waits for a character from the ++ * serial interface and then returns it. If there is no serial ++ * interface connection then it returns a bogus value which will ++ * almost certainly cause the system to hang. In the ++ */ ++int kgdb_in_isr = 0; ++int kgdb_in_lsr = 0; ++extern spinlock_t kgdb_spinlock; ++ ++/* Caller takes needed protections */ ++ ++int ++getDebugChar(void) ++{ ++ volatile int chr, dum, time, end_time; ++ ++ dbprintk(("getDebugChar(port %x): ", gdb_async_info->port)); ++ ++ if (gdb_async_info == NULL) { ++ gdb_hook_interrupt(&local_info, 0); ++ } ++ /* ++ * This trick says if we wait a very long time and get ++ * no char, return the -1 and let the upper level deal ++ * with it. ++ */ ++ rdtsc(dum, time); ++ end_time = time + 2; ++ while (((chr = read_char(gdb_async_info)) == -1) && ++ (end_time - time) > 0) { ++ rdtsc(dum, time); ++ }; ++ /* ++ * This covers our butts if some other code messes with ++ * our uart, hay, it happens :o) ++ */ ++ if (chr == -1) ++ program_uart(gdb_async_info); ++ ++ dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); ++ return (chr); ++ ++} /* getDebugChar */ ++ ++static int count = 3; ++static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; ++ ++static int __init ++kgdb_enable_ints(void) ++{ ++ if (gdb_async_info == NULL) { ++ gdb_hook_interrupt(&local_info, 1); ++ } ++ ok_to_enable_ints = 1; ++ kgdb_enable_ints_now(); ++#ifdef CONFIG_KGDB_USER_CONSOLE ++ kgdb_console_finit(); ++#endif ++ return 0; ++} ++ ++#ifdef CONFIG_SERIAL_8250 ++void shutdown_for_kgdb(struct async_struct *gdb_async_info); ++#endif ++ ++#ifdef CONFIG_DISCONTIGMEM ++static inline int kgdb_mem_init_done(void) ++{ ++ return highmem_start_page != NULL; ++} ++#else ++static inline int kgdb_mem_init_done(void) ++{ ++ return max_mapnr != 0; ++} ++#endif ++ ++static void ++kgdb_enable_ints_now(void) ++{ ++ if (!spin_trylock(&one_at_atime)) ++ return; ++ if (!ints_disabled) ++ goto exit; ++ if (kgdb_mem_init_done() && ++ ints_disabled) { /* don't try till mem init */ ++#ifdef CONFIG_SERIAL_8250 ++ /* ++ * The ifdef here allows the system to be configured ++ * without the serial driver. ++ * Don't make it a module, however, it will steal the port ++ */ ++ shutdown_for_kgdb(gdb_async_info); ++#endif ++ ints_disabled = request_irq(gdb_async_info->state->irq, ++ gdb_interrupt, ++ IRQ_T(gdb_async_info), ++ "KGDB-stub", NULL); ++ intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); ++ } ++ if (!ints_disabled) { ++ intprintk(("KGDB: Sending %d to port %x offset %d\n", ++ gdb_async_info->IER, ++ (int) gdb_async_info->port, UART_IER)); ++ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); ++ } ++ exit: ++ spin_unlock(&one_at_atime); ++} ++ ++/* ++ * putDebugChar ++ * ++ * This is a GDB stub routine. It waits until the interface is ready ++ * to transmit a char and then sends it. If there is no serial ++ * interface connection then it simply returns to its caller, having ++ * pretended to send the char. Caller takes needed protections. ++ */ ++void ++putDebugChar(int chr) ++{ ++ dbprintk(("putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", ++ gdb_async_info->port, ++ chr, ++ chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); ++ ++ if (gdb_async_info == NULL) { ++ gdb_hook_interrupt(&local_info, 0); ++ } ++ ++ write_char(gdb_async_info, chr); /* this routine will wait */ ++ count = (chr == '#') ? 0 : count + 1; ++ if ((count == 2)) { /* try to enable after */ ++ if (ints_disabled & ok_to_enable_ints) ++ kgdb_enable_ints_now(); /* try to enable after */ ++ ++ /* We do this a lot because, well we really want to get these ++ * interrupts. The serial driver will clear these bits when it ++ * initializes the chip. Every thing else it does is ok, ++ * but this. ++ */ ++ if (!ints_disabled) { ++ outb_px(gdb_async_info->port + UART_IER, ++ gdb_async_info->IER); ++ } ++ } ++ ++} /* putDebugChar */ ++ ++module_init(kgdb_enable_ints); +diff -puN arch/i386/lib/Makefile~kgdb-ga arch/i386/lib/Makefile +--- 25/arch/i386/lib/Makefile~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/arch/i386/lib/Makefile 2003-06-25 23:14:17.000000000 -0700 +@@ -9,4 +9,5 @@ lib-y = checksum.o delay.o \ + + lib-$(CONFIG_X86_USE_3DNOW) += mmx.o + lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o ++lib-$(CONFIG_KGDB) += kgdb_serial.o + lib-$(CONFIG_DEBUG_IOVIRT) += iodebug.o +diff -puN arch/i386/Makefile~kgdb-ga arch/i386/Makefile +--- 25/arch/i386/Makefile~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/arch/i386/Makefile 2003-06-25 23:14:17.000000000 -0700 +@@ -85,6 +85,9 @@ mcore-$(CONFIG_X86_ES7000) := mach-es700 + # default subarch .h files + mflags-y += -Iinclude/asm-i386/mach-default + ++mflags-$(CONFIG_KGDB) += -g ++mflags-$(CONFIG_KGDB_MORE) += $(shell echo $(CONFIG_KGDB_OPTIONS) | sed -e 's/"//g') ++ + head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o + + libs-y += arch/i386/lib/ +diff -puN arch/i386/mm/fault.c~kgdb-ga arch/i386/mm/fault.c +--- 25/arch/i386/mm/fault.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/arch/i386/mm/fault.c 2003-06-25 23:14:17.000000000 -0700 +@@ -236,6 +236,12 @@ no_context: + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ ++#ifdef CONFIG_KGDB ++ if (!user_mode(regs)){ ++ kgdb_handle_exception(14,SIGBUS, error_code, regs); ++ return; ++ } ++#endif + + bust_spinlocks(1); + +diff -puN /dev/null Documentation/i386/kgdb/andthen +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/andthen 2003-06-25 23:14:17.000000000 -0700 +@@ -0,0 +1,100 @@ ++ ++define set_andthen ++ set var $thp=0 ++ set var $thp=(struct kgdb_and_then_struct *)&kgdb_data[0] ++ set var $at_size = (sizeof kgdb_data)/(sizeof *$thp) ++ set var $at_oc=kgdb_and_then_count ++ set var $at_cc=$at_oc ++end ++ ++define andthen_next ++ set var $at_cc=$arg0 ++end ++ ++define andthen ++ andthen_set_edge ++ if ($at_cc >= $at_oc) ++ printf "Outside window. Window size is %d\n",($at_oc-$at_low) ++ else ++ printf "%d: ",$at_cc ++ output *($thp+($at_cc++ % $at_size )) ++ printf "\n" ++ end ++end ++define andthen_set_edge ++ set var $at_oc=kgdb_and_then_count ++ set var $at_low = $at_oc - $at_size ++ if ($at_low < 0 ) ++ set var $at_low = 0 ++ end ++ if (( $at_cc > $at_oc) || ($at_cc < $at_low)) ++ printf "Count outside of window, setting count to " ++ if ($at_cc >= $at_oc) ++ set var $at_cc = $at_oc ++ else ++ set var $at_cc = $at_low ++ end ++ printf "%d\n",$at_cc ++ end ++end ++ ++define beforethat ++ andthen_set_edge ++ if ($at_cc <= $at_low) ++ printf "Outside window. Window size is %d\n",($at_oc-$at_low) ++ else ++ printf "%d: ",$at_cc-1 ++ output *($thp+(--$at_cc % $at_size )) ++ printf "\n" ++ end ++end ++ ++document andthen_next ++ andthen_next ++ . sets the number of the event to display next. If this event ++ . is not in the event pool, either andthen or beforethat will ++ . correct it to the nearest event pool edge. The event pool ++ . ends at the last event recorded and begins ++ . prior to that. If beforethat is used next, it will display ++ . event -1. ++. ++ andthen commands are: set_andthen, andthen_next, andthen and beforethat ++end ++ ++ ++document andthen ++ andthen ++. displays the next event in the list. sets up to display ++. the oldest saved event first. ++. (optional) count of the event to display. ++. note the number of events saved is specified at configure time. ++. if events are saved between calls to andthen the index will change ++. but the displayed event will be the next one (unless the event buffer ++. is overrun). ++. ++. andthen commands are: set_andthen, andthen_next, andthen and beforethat ++end ++ ++document set_andthen ++ set_andthen ++. sets up to use the and commands. ++. if you have defined your own struct, use the above and ++. then enter the following: ++. p $thp=(struct kgdb_and_then_structX *)&kgdb_data[0] ++. where is the name of your structure. ++. ++. andthen commands are: set_andthen, andthen_next, andthen and beforethat ++end ++ ++document beforethat ++ beforethat ++. displays the next prior event in the list. sets up to ++. display the last occuring event first. ++. ++. note the number of events saved is specified at configure time. ++. if events are saved between calls to beforethat the index will change ++. but the displayed event will be the next one (unless the event buffer ++. is overrun). ++. ++. andthen commands are: set_andthen, andthen_next, andthen and beforethat ++end +diff -puN /dev/null Documentation/i386/kgdb/debug-nmi.txt +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/debug-nmi.txt 2003-06-25 23:14:17.000000000 -0700 +@@ -0,0 +1,37 @@ ++Subject: Debugging with NMI ++Date: Mon, 12 Jul 1999 11:28:31 -0500 ++From: David Grothe ++Organization: Gcom, Inc ++To: David Grothe ++ ++Kernel hackers: ++ ++Maybe this is old hat, but it is new to me -- ++ ++On an ISA bus machine, if you short out the A1 and B1 pins of an ISA ++slot you will generate an NMI to the CPU. This interrupts even a ++machine that is hung in a loop with interrupts disabled. Used in ++conjunction with kgdb < ++ftp://ftp.gcom.com/pub/linux/src/kgdb-2.3.35/kgdb-2.3.35.tgz > you can ++gain debugger control of a machine that is hung in the kernel! Even ++without kgdb the kernel will print a stack trace so you can find out ++where it was hung. ++ ++The A1/B1 pins are directly opposite one another and the farthest pins ++towards the bracket end of the ISA bus socket. You can stick a paper ++clip or multi-meter probe between them to short them out. ++ ++I had a spare ISA bus to PC104 bus adapter around. The PC104 end of the ++board consists of two rows of wire wrap pins. So I wired a push button ++between the A1/B1 pins and now have an ISA board that I can stick into ++any ISA bus slot for debugger entry. ++ ++Microsoft has a circuit diagram of a PCI card at ++http://www.microsoft.com/hwdev/DEBUGGING/DMPSW.HTM. If you want to ++build one you will have to mail them and ask for the PAL equations. ++Nobody makes one comercially. ++ ++[THIS TIP COMES WITH NO WARRANTY WHATSOEVER. It works for me, but if ++your machine catches fire, it is your problem, not mine.] ++ ++-- Dave (the kgdb guy) +diff -puN /dev/null Documentation/i386/kgdb/gdb-globals.txt +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/gdb-globals.txt 2003-06-25 23:14:17.000000000 -0700 +@@ -0,0 +1,71 @@ ++Sender: akale@veritas.com ++Date: Fri, 23 Jun 2000 19:26:35 +0530 ++From: "Amit S. Kale" ++Organization: Veritas Software (India) ++To: Dave Grothe , linux-kernel@vger.rutgers.edu ++CC: David Milburn , ++ "Edouard G. Parmelan" , ++ ezannoni@cygnus.com, Keith Owens ++Subject: Re: Module debugging using kgdb ++ ++Dave Grothe wrote: ++> ++> Amit: ++> ++> There is a 2.4.0 version of kgdb on our ftp site: ++> ftp://ftp.gcom.com/pub/linux/src/kgdb. I mirrored your version of gdb ++> and loadmodule.sh there. ++> ++> Have a look at the README file and see if I go it right. If not, send ++> me some corrections and I will update it. ++> ++> Does your version of gdb solve the global variable problem? ++ ++Yes. ++Thanks to Elena Zanoni, gdb (developement version) can now calculate ++correctly addresses of dynamically loaded object files. I have not been ++following gdb developement for sometime and am not sure when symbol ++address calculation fix is going to appear in a gdb stable version. ++ ++Elena, any idea when the fix will make it to a prebuilt gdb from a ++redhat release? ++ ++For the time being I have built a gdb developement version. It can be ++used for module debugging with loadmodule.sh script. ++ ++The problem with calculating of module addresses with previous versions ++of gdb was as follows: ++gdb did not use base address of a section while calculating address of ++a symbol in the section in an object file loaded via 'add-symbol-file'. ++It used address of .text segment instead. Due to this addresses of ++symbols in .data, .bss etc. (e.g. global variables) were calculated incorrectly. ++ ++Above mentioned fix allow gdb to use base address of a segment while ++calculating address of a symbol in it. It adds a parameter '-s' to ++'add-symbol-file' command for specifying base address of a segment. ++ ++loadmodule.sh script works as follows. ++ ++1. Copy a module file to target machine. ++2. Load the module on the target machine using insmod with -m parameter. ++insmod produces a module load map which contains base addresses of all ++sections in the module and addresses of symbols in the module file. ++3. Find all sections and their base addresses in the module from ++the module map. ++4. Generate a script that loads the module file. The script uses ++'add-symbol-file' and specifies address of text segment followed by ++addresses of all segments in the module. ++ ++Here is an example gdb script produced by loadmodule.sh script. ++ ++add-symbol-file foo 0xd082c060 -s .text.lock 0xd08cbfb5 ++-s .fixup 0xd08cfbdf -s .rodata 0xd08cfde0 -s __ex_table 0xd08e3b38 ++-s .data 0xd08e3d00 -s .bss 0xd08ec8c0 -s __ksymtab 0xd08ee838 ++ ++With this command gdb can calculate addresses of symbols in ANY segment ++in a module file. ++ ++Regards. ++-- ++Amit Kale ++Veritas Software ( http://www.veritas.com ) +diff -puN /dev/null Documentation/i386/kgdb/gdbinit +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/gdbinit 2003-06-25 23:14:17.000000000 -0700 +@@ -0,0 +1,14 @@ ++shell echo -e "\003" >/dev/ttyS0 ++set remotebaud 38400 ++target remote /dev/ttyS0 ++define si ++stepi ++printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx ++printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp ++x/i $eip ++end ++define ni ++nexti ++printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx ++printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp ++x/i $eip +diff -puN /dev/null Documentation/i386/kgdb/gdbinit.hw +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/gdbinit.hw 2003-06-25 23:14:17.000000000 -0700 +@@ -0,0 +1,117 @@ ++ ++#Using ia-32 hardware breakpoints. ++# ++#4 hardware breakpoints are available in ia-32 processors. These breakpoints ++#do not need code modification. They are set using debug registers. ++# ++#Each hardware breakpoint can be of one of the ++#three types: execution, write, access. ++#1. An Execution breakpoint is triggered when code at the breakpoint address is ++#executed. ++#2. A write breakpoint ( aka watchpoints ) is triggered when memory location ++#at the breakpoint address is written. ++#3. An access breakpoint is triggered when memory location at the breakpoint ++#address is either read or written. ++# ++#As hardware breakpoints are available in limited number, use software ++#breakpoints ( br command in gdb ) instead of execution hardware breakpoints. ++# ++#Length of an access or a write breakpoint defines length of the datatype to ++#be watched. Length is 1 for char, 2 short , 3 int. ++# ++#For placing execution, write and access breakpoints, use commands ++#hwebrk, hwwbrk, hwabrk ++#To remove a breakpoint use hwrmbrk command. ++# ++#These commands take following types of arguments. For arguments associated ++#with each command, use help command. ++#1. breakpointno: 0 to 3 ++#2. length: 1 to 3 ++#3. address: Memory location in hex ( without 0x ) e.g c015e9bc ++# ++#Use the command exinfo to find which hardware breakpoint occured. ++ ++#hwebrk breakpointno address ++define hwebrk ++ maintenance packet Y$arg0,0,0,$arg1 ++end ++document hwebrk ++ hwebrk
++ Places a hardware execution breakpoint ++ = 0 - 3 ++
= Hex digits without leading "0x". ++end ++ ++#hwwbrk breakpointno length address ++define hwwbrk ++ maintenance packet Y$arg0,1,$arg1,$arg2 ++end ++document hwwbrk ++ hwwbrk
++ Places a hardware write breakpoint ++ = 0 - 3 ++ = 1 (1 byte), 2 (2 byte), 3 (4 byte) ++
= Hex digits without leading "0x". ++end ++ ++#hwabrk breakpointno length address ++define hwabrk ++ maintenance packet Y$arg0,1,$arg1,$arg2 ++end ++document hwabrk ++ hwabrk
++ Places a hardware access breakpoint ++ = 0 - 3 ++ = 1 (1 byte), 2 (2 byte), 3 (4 byte) ++
= Hex digits without leading "0x". ++end ++ ++#hwrmbrk breakpointno ++define hwrmbrk ++ maintenance packet y$arg0 ++end ++document hwrmbrk ++ hwrmbrk ++ = 0 - 3 ++ Removes a hardware breakpoint ++end ++ ++define reboot ++ maintenance packet r ++end ++#exinfo ++define exinfo ++ maintenance packet qE ++end ++document exinfo ++ exinfo ++ Gives information about a breakpoint. ++end ++define get_th ++ p $th=(struct thread_info *)((int)$esp & ~8191) ++end ++document get_th ++ get_tu ++ Gets and prints the current thread_info pointer, Defines th to be it. ++end ++define get_cu ++ p $cu=(struct thread_info *)((int)$esp & ~8191)->task ++end ++document get_cu ++ get_cu ++ Gets and print the "current" value. Defines $cu to be it. ++end ++define int_off ++ set var $flags=$eflags ++ set $eflags=$eflags&~0x200 ++ end ++define int_on ++ set var $eflags|=$flags&0x200 ++ end ++document int_off ++ saves the current interrupt state and clears the processor interrupt ++ flag. Use int_on to restore the saved flag. ++end ++document int_on ++ Restores the interrupt flag saved by int_off. ++end +diff -puN /dev/null Documentation/i386/kgdb/gdbinit-modules +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/gdbinit-modules 2003-06-25 23:14:17.000000000 -0700 +@@ -0,0 +1,146 @@ ++# ++# Usefull GDB user-command to debug Linux Kernel Modules with gdbstub. ++# ++# This don't work for Linux-2.0 or older. ++# ++# Author Edouard G. Parmelan ++# ++# ++# Fri Apr 30 20:33:29 CEST 1999 ++# First public release. ++# ++# Major cleanup after experiment Linux-2.0 kernel without success. ++# Symbols of a module are not in the correct order, I can't explain ++# why :( ++# ++# Fri Mar 19 15:41:40 CET 1999 ++# Initial version. ++# ++# Thu Jan 6 16:29:03 CST 2000 ++# A little fixing by Dave Grothe ++# ++# Mon Jun 19 09:33:13 CDT 2000 ++# Alignment changes from Edouard Parmelan ++# ++# The basic idea is to find where insmod load the module and inform ++# GDB to load the symbol table of the module with the GDB command ++# ``add-symbol-file
''. ++# ++# The Linux kernel holds the list of all loaded modules in module_list, ++# this list end with &kernel_module (exactly with module->next == NULL, ++# but the last module is not a real module). ++# ++# Insmod allocates the struct module before the object file. Since ++# Linux-2.1, this structure contain his size. The real address of ++# the object file is then (char*)module + module->size_of_struct. ++# ++# You can use three user functions ``mod-list'', ``mod-print-symbols'' ++# and ``add-module-symbols''. ++# ++# mod-list list all loaded modules with the format: ++# ++# ++# As soon as you have found the address of your module, you can ++# print its exported symbols (mod-print-symbols) or inform GDB to add ++# symbols from your module file (mod-add-symbols). ++# ++# The argument that you give to mod-print-symbols or mod-add-symbols ++# is the from the mod-list command. ++# ++# When using the mod-add-symbols command you must also give the full ++# pathname of the modules object code file. ++# ++# The command mod-add-lis is an example of how to make this easier. ++# You can edit this macro to contain the path name of your own ++# favorite module and then use it as a shorthand to load it. You ++# still need the module-address, however. ++# ++# The internal function ``mod-validate'' set the GDB variable $mod ++# as a ``struct module*'' if the kernel known the module otherwise ++# $mod is set to NULL. This ensure to not add symbols for a wrong ++# address. ++# ++# Have a nice hacking day ! ++# ++# ++define mod-list ++ set $mod = (struct module*)module_list ++ # the last module is the kernel, ignore it ++ while $mod != &kernel_module ++ printf "%p\t%s\n", (long)$mod, ($mod)->name ++ set $mod = $mod->next ++ end ++end ++document mod-list ++List all modules in the form: ++Use the as the argument for the other ++mod-commands: mod-print-symbols, mod-add-symbols. ++end ++ ++define mod-validate ++ set $mod = (struct module*)module_list ++ while ($mod != $arg0) && ($mod != &kernel_module) ++ set $mod = $mod->next ++ end ++ if $mod == &kernel_module ++ set $mod = 0 ++ printf "%p is not a module\n", $arg0 ++ end ++end ++document mod-validate ++mod-validate ++Internal user-command used to validate the module parameter. ++If is a real loaded module, set $mod to it otherwise set $mod to 0. ++end ++ ++ ++define mod-print-symbols ++ mod-validate $arg0 ++ if $mod != 0 ++ set $i = 0 ++ while $i < $mod->nsyms ++ set $sym = $mod->syms[$i] ++ printf "%p\t%s\n", $sym->value, $sym->name ++ set $i = $i + 1 ++ end ++ end ++end ++document mod-print-symbols ++mod-print-symbols ++Print all exported symbols of the module. see mod-list ++end ++ ++ ++define mod-add-symbols-align ++ mod-validate $arg0 ++ if $mod != 0 ++ set $mod_base = ($mod->size_of_struct + (long)$mod) ++ if ($arg2 != 0) && (($mod_base & ($arg2 - 1)) != 0) ++ set $mod_base = ($mod_base | ($arg2 - 1)) + 1 ++ end ++ add-symbol-file $arg1 $mod_base ++ end ++end ++document mod-add-symbols-align ++mod-add-symbols-align ++Load the symbols table of the module from the object file where ++first section aligment is . ++To retreive alignment, use `objdump -h '. ++end ++ ++define mod-add-symbols ++ mod-add-symbols-align $arg0 $arg1 sizeof(long) ++end ++document mod-add-symbols ++mod-add-symbols ++Load the symbols table of the module from the object file. ++Default alignment is 4. See mod-add-symbols-align. ++end ++ ++define mod-add-lis ++ mod-add-symbols-align $arg0 /usr/src/LiS/streams.o 16 ++end ++document mod-add-lis ++mod-add-lis ++Does mod-add-symbols /usr/src/LiS/streams.o ++end +diff -puN /dev/null Documentation/i386/kgdb/kgdb.txt +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/kgdb.txt 2003-06-25 23:14:17.000000000 -0700 +@@ -0,0 +1,715 @@ ++Last edit: <20030506.1615.42> ++This file has information specific to the i386 kgdb option. Other ++platforms with the kgdb option may behave in a similar fashion. ++ ++New features: ++============ ++20030505.1827.27 ++We are starting to align with the sourceforge version, at least in ++commands. To this end, the boot command sting to start kgdb at ++boot time has been changed from "kgdb" to "gdb". ++ ++Andrew Morton sent a couple of patchs which are now included as follows: ++1.) We now return a flag to the interrupt handler. ++2.) We no longer use smp_num_cpus (a conflict with the lock meter). ++3.) And from William Lee Irwin III code to make ++ sure high-mem is set up before we attempt to register our interrupt ++ handler. ++We now include asm/kgdb.h from config.h so you will most likely never ++have to include it. It also 'NULLS' the kgdb macros you might have in ++your code when CONFIG_KGDB is not defined. This allows you to just ++turn off CONFIG_KGDB to turn off all the kgdb_ts() calls and such. ++This include is conditioned on the machine being an x86 so as to not ++mess with other archs. ++ ++20020801.1129.03 ++This is currently the version for the 2.4.18 (and beyond?) kernel. ++ ++We have several new "features" beginning with this version: ++ ++1.) Kgdb now syncs the "other" cpus with a cross cpu NMI. No more ++ waiting and it will pull that guy out of an irq off spin lock :) ++ ++2.) We doctored up the code that tells where a task is waiting and ++ included it so that the "info thread" command will show a bit more ++ than "schedule()". Try it... ++ ++3.) Added the ability to call a function from gdb. All the standard gdb ++ issues apply, i.e. if you hit a break point in the function you are ++ not allowed to call another (gdb limitation, not kgdb). T0 help ++ this capability we added a memory allocation function. Gdb does not ++ return this memory (it is used for stings you pass to that function ++ you are calling from gdb) so we fixed up a way to allow you to ++ manually return the memory (see below). ++ ++4.) Kgdb time stamps (kgdb_ts()) are enhanced to expand what was the ++ interrupt flag to now also include the preemption count and the ++ "in_interrupt" info. The flag is now called "with_pif" to indicate ++ the order, preempt_count, in_interrupt, flag. The preempt_count is ++ shifted left by 4 bits so you can read the count in hex by dropping ++ the low order digit. In_interrupt is in bit 1, and the flag is in ++ bit 0. ++ ++5.) The command: "p kgdb_info" is now expanded and prints something ++ like: ++(gdb) p kgdb_info ++$2 = {used_malloc = 0, called_from = 0xc0107506, entry_tsc = 67468627259, ++ errcode = 0, vector = 3, print_debug_info = 0, hold_on_sstep = 1, ++ cpus_waiting = {{task = 0xc027a000, pid = 32768, hold = 0, ++ regs = 0xc027bf84}, {task = 0x0, pid = 0, hold = 0, regs = 0x0}}} ++ ++ Things to note here: a.) used_malloc is the amount of memory that ++ has been malloc'ed to do calls from gdb. You can reclaim this ++ memory like this: "p kgdb_info.used_malloc=0" Cool, huh? b.) ++ cpus_waiting is now "sized" by the number of cpus you enter at ++ configure time in the kgdb configure section. This is NOT used any ++ where else in the system, but it is "nice" here. c.) The tasks ++ "pid" is now in the structure. This is the pid you will need to use ++ to decode to the thread id to get gdb to look at that thread. ++ Remember that the "info thread" command prints a list of threads ++ where in it numbers each thread with its reference number followed ++ by the threads pid. Note that the per cpu idle threads actually ++ have pids of 0 (yes there is more than one pid 0 in an SMP system). ++ To avoid confusion, kgdb numbers these threads with numbers beyond ++ the MAX_PID. That is why you see 32768 above. ++ ++6.) A subtle change, we now provide the complete register set for tasks ++ that are active on the other cpus. This allows better trace back on ++ those tasks. ++ ++ And, lets mention what we could not fix. Back-trace from all but the ++ thread that we trapped will, most likely, have a bogus entry in it. ++ The problem is that gdb does not recognize the entry code for ++ functions that use "current" near (at all?) the entry. The compiler ++ is putting the "current" decode as the first two instructions of the ++ function where gdb expects to find %ebp changing code. Back trace ++ also has trouble with interrupt frames. I am talking with Daniel ++ Jacobowitz about some way to fix this, but don't hold your breath. ++ ++20011220.0050.35 ++Major enhancement with this version is the ability to hold one or more ++cpus in an SMP system while allowing the others to continue. Also, by ++default only the current cpu is enabled on single step commands (please ++note that gdb issues single step commands at times other than when you ++use the si command). ++ ++Another change is to collect some useful information in ++a global structure called "kgdb_info". You should be able to just: ++ ++p kgdb_info ++ ++although I have seen cases where the first time this is done gdb just ++prints the first member but prints the whole structure if you then enter ++CR (carriage return or enter). This also works: ++ ++p *&kgdb_info ++ ++Here is a sample: ++(gdb) p kgdb_info ++$4 = {called_from = 0xc010732c, entry_tsc = 32804123790856, errcode = 0, ++ vector = 3, print_debug_info = 0} ++ ++"Called_from" is the return address from the current entry into kgdb. ++Sometimes it is useful to know why you are in kgdb, for example, was ++it an NMI or a real break point? The simple way to interrogate this ++return address is: ++ ++l *0xc010732c ++ ++which will print the surrounding few lines of source code. ++ ++"Entry_tsc" is the cpu TSC on entry to kgdb (useful to compare to the ++kgdb_ts entries). ++ ++"errcode" and "vector" are other entry parameters which may be helpful on ++some traps. ++ ++"print_debug_info" is the internal debugging kgdb print enable flag. Yes, ++you can modify it. ++ ++In SMP systems kgdb_info also includes the "cpus_waiting" structure and ++"hold_on_step": ++ ++(gdb) p kgdb_info ++$7 = {called_from = 0xc0112739, entry_tsc = 1034936624074, errcode = 0, ++ vector = 2, print_debug_info = 0, hold_on_sstep = 1, cpus_waiting = {{ ++ task = 0x0, hold = 0, regs = 0x0}, {task = 0xc71b8000, hold = 0, ++ regs = 0xc71b9f70}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, ++ hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, ++ hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, ++ hold = 0, regs = 0x0}}} ++ ++"Cpus_waiting" has an entry for each cpu other than the current one that ++has been stopped. Each entry contains the task_struct address for that ++cpu, the address of the regs for that task and a hold flag. All these ++have the proper typing so that, for example: ++ ++p *kgdb_info.cpus_waiting[1].regs ++ ++will print the registers for cpu 1. ++ ++"Hold_on_sstep" is a new feature with this version and comes up set or ++true. What is means is that whenever kgdb is asked to single step all ++other cpus are held (i.e. not allowed to execute). The flag applies to ++all but the current cpu and, again, can be changed: ++ ++p kgdb_info.hold_on_sstep=0 ++ ++restores the old behavior of letting all cpus run during single stepping. ++ ++Likewise, each cpu has a "hold" flag, which if set, locks that cpu out ++of execution. Note that this has some risk in cases where the cpus need ++to communicate with each other. If kgdb finds no cpu available on exit, ++it will push a message thru gdb and stay in kgdb. Note that it is legal ++to hold the current cpu as long as at least one cpu can execute. ++ ++20010621.1117.09 ++This version implements an event queue. Events are signaled by calling ++a function in the kgdb stub and may be examined from gdb. See EVENTS ++below for details. This version also tighten up the interrupt and SMP ++handling to not allow interrupts on the way to kgdb from a breakpoint ++trap. It is fine to allow these interrupts for user code, but not ++system debugging. ++ ++Version ++======= ++ ++This version of the kgdb package was developed and tested on ++kernel version 2.4.16. It will not install on any earlier kernels. ++It is possible that it will continue to work on later versions ++of 2.4 and then versions of 2.5 (I hope). ++ ++ ++Debugging Setup ++=============== ++ ++Designate one machine as the "development" machine. This is the ++machine on which you run your compiles and which has your source ++code for the kernel. Designate a second machine as the "target" ++machine. This is the machine that will run your experimental ++kernel. ++ ++The two machines will be connected together via a serial line out ++one or the other of the COM ports of the PC. You will need a modem ++eliminator and the appropriate cables. ++ ++Decide on which tty port you want the machines to communicate, then ++cable them up back-to-back using the null modem. COM1 is /dev/ttyS0 and ++COM2 is /dev/ttyS1. You should test this connection with the two ++machines prior to trying to debug a kernel. Once you have it working, ++on the TARGET machine, enter: ++ ++setserial /dev/ttyS0 (or what ever tty you are using) ++ ++and record the port and the irq addresses. ++ ++On the DEVELOPMENT machine you need to apply the patch for the kgdb ++hooks. You have probably already done that if you are reading this ++file. ++ ++On your DEVELOPMENT machine, go to your kernel source directory and do ++"make Xconfig" where X is one of "x", "menu", or "". If you are ++configuring in the standard serial driver, it must not be a module. ++Either yes or no is ok, but making the serial driver a module means it ++will initialize after kgdb has set up the UART interrupt code and may ++cause a failure of the control C option discussed below. The configure ++question for the serial driver is under the "Character devices" heading ++and is: ++ ++"Standard/generic (8250/16550 and compatible UARTs) serial support" ++ ++Go down to the kernel debugging menu item and open it up. Enable the ++kernel kgdb stub code by selecting that item. You can also choose to ++turn on the "-ggdb -O1" compile options. The -ggdb causes the compiler ++to put more debug info (like local symbols) in the object file. On the ++i386 -g and -ggdb are the same so this option just reduces to "O1". The ++-O1 reduces the optimization level. This may be helpful in some cases, ++be aware, however, that this may also mask the problem you are looking ++for. ++ ++The baud rate. Default is 115200. What ever you choose be sure that ++the host machine is set to the same speed. I recommend the default. ++ ++The port. This is the I/O address of the serial UART that you should ++have gotten using setserial as described above. The standard com1 port ++(3f8) using irq 4 is default . Com2 is 2f8 which by convention uses irq ++3. ++ ++The port irq (see above). ++ ++Stack overflow test. This option makes a minor change in the trap, ++system call and interrupt code to detect stack overflow and transfer ++control to kgdb if it happens. (Some platforms have this in the base ++line code, but the i386 does not.) ++ ++You can also configure the system to recognize the boot option ++"console=kgdb" which if given will cause all console output during ++booting to be put thru gdb as well as other consoles. This option ++requires that gdb and kgdb be connected prior to sending console output ++so, if they are not, a breakpoint is executed to force the connection. ++This will happen before any kernel output (it is going thru gdb, right), ++and will stall the boot until the connection is made. ++ ++You can also configure in a patch to SysRq to enable the kGdb SysRq. ++This request generates a breakpoint. Since the serial port irq line is ++set up after any serial drivers, it is possible that this command will ++work when the control C will not. ++ ++Save and exit the Xconfig program. Then do "make clean" , "make dep" ++and "make bzImage" (or whatever target you want to make). This gets the ++kernel compiled with the "-g" option set -- necessary for debugging. ++ ++You have just built the kernel on your DEVELOPMENT machine that you ++intend to run on your TARGET machine. ++ ++To install this new kernel, use the following installation procedure. ++Remember, you are on the DEVELOPMENT machine patching the kernel source ++for the kernel that you intend to run on the TARGET machine. ++ ++Copy this kernel to your target machine using your usual procedures. I ++usually arrange to copy development: ++/usr/src/linux/arch/i386/boot/bzImage to /vmlinuz on the TARGET machine ++via a LAN based NFS access. That is, I run the cp command on the target ++and copy from the development machine via the LAN. Run Lilo (see "man ++lilo" for details on how to set this up) on the new kernel on the target ++machine so that it will boot! Then boot the kernel on the target ++machine. ++ ++On the DEVELOPMENT machine, create a file called .gdbinit in the ++directory /usr/src/linux. An example .gdbinit file looks like this: ++ ++shell echo -e "\003" >/dev/ttyS0 ++set remotebaud 38400 (or what ever speed you have chosen) ++target remote /dev/ttyS0 ++ ++ ++Change the "echo" and "target" definition so that it specifies the tty ++port that you intend to use. Change the "remotebaud" definition to ++match the data rate that you are going to use for the com line. ++ ++You are now ready to try it out. ++ ++Boot your target machine with "kgdb" in the boot command i.e. something ++like: ++ ++lilo> test kgdb ++ ++or if you also want console output thru gdb: ++ ++lilo> test kgdb console=kgdb ++ ++You should see the lilo message saying it has loaded the kernel and then ++all output stops. The kgdb stub is trying to connect with gdb. Start ++gdb something like this: ++ ++ ++On your DEVELOPMENT machine, cd /usr/src/linux and enter "gdb vmlinux". ++When gdb gets the symbols loaded it will read your .gdbinit file and, if ++everything is working correctly, you should see gdb print out a few ++lines indicating that a breakpoint has been taken. It will actually ++show a line of code in the target kernel inside the kgdb activation ++code. ++ ++The gdb interaction should look something like this: ++ ++ linux-dev:/usr/src/linux# gdb vmlinux ++ GDB is free software and you are welcome to distribute copies of it ++ under certain conditions; type "show copying" to see the conditions. ++ There is absolutely no warranty for GDB; type "show warranty" for details. ++ GDB 4.15.1 (i486-slackware-linux), ++ Copyright 1995 Free Software Foundation, Inc... ++ breakpoint () at i386-stub.c:750 ++ 750 } ++ (gdb) ++ ++You can now use whatever gdb commands you like to set breakpoints. ++Enter "continue" to start your target machine executing again. At this ++point the target system will run at full speed until it encounters ++your breakpoint or gets a segment violation in the kernel, or whatever. ++ ++If you have the kgdb console enabled when you continue, gdb will print ++out all the console messages. ++ ++The above example caused a breakpoint relatively early in the boot ++process. For the i386 kgdb it is possible to code a break instruction ++as the first C-language point in init/main.c, i.e. as the first instruction ++in start_kernel(). This could be done as follows: ++ ++#include ++ breakpoint(); ++ ++This breakpoint() is really a function that sets up the breakpoint and ++single-step hardware trap cells and then executes a breakpoint. Any ++early hard coded breakpoint will need to use this function. Once the ++trap cells are set up they need not be set again, but doing it again ++does not hurt anything, so you don't need to be concerned about which ++breakpoint is hit first. Once the trap cells are set up (and the kernel ++sets them up in due course even if breakpoint() is never called) the ++macro: ++ ++BREAKPOINT; ++ ++will generate an inline breakpoint. This may be more useful as it stops ++the processor at the instruction instead of in a function a step removed ++from the location of interest. In either case must be ++included to define both breakpoint() and BREAKPOINT. ++ ++Triggering kgdbstub at other times ++================================== ++ ++Often you don't need to enter the debugger until much later in the boot ++or even after the machine has been running for some time. Once the ++kernel is booted and interrupts are on, you can force the system to ++enter the debugger by sending a control C to the debug port. This is ++what the first line of the recommended .gdbinit file does. This allows ++you to start gdb any time after the system is up as well as when the ++system is already at a break point. (In the case where the system is ++already at a break point the control C is not needed, however, it will ++be ignored by the target so no harm is done. Also note the the echo ++command assumes that the port speed is already set. This will be true ++once gdb has connected, but it is best to set the port speed before you ++run gdb.) ++ ++Another simple way to do this is to put the following file in you ~/bin ++directory: ++ ++#!/bin/bash ++echo -e "\003" > /dev/ttyS0 ++ ++Here, the ttyS0 should be replaced with what ever port you are using. ++The "\003" is control-C. Once you are connected with gdb, you can enter ++control-C at the command prompt. ++ ++An alternative way to get control to the debugger is to enable the kGdb ++SysRq command. Then you would enter Alt-SysRq-g (all three keys at the ++same time, but push them down in the order given). To refresh your ++memory of the available SysRq commands try Alt-SysRq-=. Actually any ++undefined command could replace the "=", but I like to KNOW that what I ++am pushing will never be defined. ++ ++Debugging hints ++=============== ++ ++You can break into the target machine at any time from the development ++machine by typing ^C (see above paragraph). If the target machine has ++interrupts enabled this will stop it in the kernel and enter the ++debugger. ++ ++There is unfortunately no way of breaking into the kernel if it is ++in a loop with interrupts disabled, so if this happens to you then ++you need to place exploratory breakpoints or printk's into the kernel ++to find out where it is looping. The exploratory breakpoints can be ++entered either thru gdb or hard coded into the source. This is very ++handy if you do something like: ++ ++if () BREAKPOINT; ++ ++ ++There is a copy of an e-mail in the Documentation/i386/kgdb/ directory ++(debug-nmi.txt) which describes how to create an NMI on an ISA bus ++machine using a paper clip. I have a sophisticated version of this made ++by wiring a push button switch into a PC104/ISA bus adapter card. The ++adapter card nicely furnishes wire wrap pins for all the ISA bus ++signals. ++ ++When you are done debugging the kernel on the target machine it is a ++good idea to leave it in a running state. This makes reboots faster, ++bypassing the fsck. So do a gdb "continue" as the last gdb command if ++this is possible. To terminate gdb itself on the development machine ++and leave the target machine running, first clear all breakpoints and ++continue, then type ^Z to suspend gdb and then kill it with "kill %1" or ++something similar. ++ ++If gdbstub Does Not Work ++======================== ++ ++If it doesn't work, you will have to troubleshoot it. Do the easy ++things first like double checking your cabling and data rates. You ++might try some non-kernel based programs to see if the back-to-back ++connection works properly. Just something simple like cat /etc/hosts ++>/dev/ttyS0 on one machine and cat /dev/ttyS0 on the other will tell you ++if you can send data from one machine to the other. Make sure it works ++in both directions. There is no point in tearing out your hair in the ++kernel if the line doesn't work. ++ ++All of the real action takes place in the file ++/usr/src/linux/arch/i386/kernel/kgdb_stub.c. That is the code on the target ++machine that interacts with gdb on the development machine. In gdb you can ++turn on a debug switch with the following command: ++ ++ set remotedebug ++ ++This will print out the protocol messages that gdb is exchanging with ++the target machine. ++ ++Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c This is ++the code that talks to the serial port on the target side. There might ++be a problem there. In particular there is a section of this code that ++tests the UART which will tell you what UART you have if you define ++"PRNT" (just remove "_off" from the #define PRNT_off). To view this ++report you will need to boot the system without any beakpoints. This ++allows the kernel to run to the point where it calls kgdb to set up ++interrupts. At this time kgdb will test the UART and print out the type ++it finds. (You need to wait so that the printks are actually being ++printed. Early in the boot they are cached, waiting for the console to ++be enabled. Also, if kgdb is entered thru a breakpoint it is possible ++to cause a dead lock by calling printk when the console is locked. The ++stub, thus avoids doing printks from break points especially in the ++serial code.) At this time, if the UART fails to do the expected thing, ++kgdb will print out (using printk) information on what failed. (These ++messages will be buried in all the other boot up messages. Look for ++lines that start with "gdb_hook_interrupt:". You may want to use dmesg ++once the system is up to view the log. If this fails or if you still ++don't connect, review your answers for the port address. Use: ++ ++setserial /dev/ttyS0 ++ ++to get the current port and irq information. This command will also ++tell you what the system found for the UART type. The stub recognizes ++the following UART types: ++ ++16450, 16550, and 16550A ++ ++If you are really desperate you can use printk debugging in the ++kgdbstub code in the target kernel until you get it working. In particular, ++there is a global variable in /usr/src/linux/arch/i386/kernel/kgdb_stub.c ++named "remote_debug". Compile your kernel with this set to 1, rather ++than 0 and the debug stub will print out lots of stuff as it does ++what it does. Likewise there are debug printks in the kgdb_serial.c ++code that can be turned on with simple changes in the macro defines. ++ ++ ++Debugging Loadable Modules ++========================== ++ ++This technique comes courtesy of Edouard Parmelan ++ ++ ++When you run gdb, enter the command ++ ++source gdbinit-modules ++ ++This will read in a file of gdb macros that was installed in your ++kernel source directory when kgdb was installed. This file implements ++the following commands: ++ ++mod-list ++ Lists the loaded modules in the form ++ ++mod-print-symbols ++ Prints all the symbols in the indicated module. ++ ++mod-add-symbols ++ Loads the symbols from the object file and associates them ++ with the indicated module. ++ ++After you have loaded the module that you want to debug, use the command ++mod-list to find the of your module. Then use that ++address in the mod-add-symbols command to load your module's symbols. ++From that point onward you can debug your module as if it were a part ++of the kernel. ++ ++The file gdbinit-modules also contains a command named mod-add-lis as ++an example of how to construct a command of your own to load your ++favorite module. The idea is to "can" the pathname of the module ++in the command so you don't have to type so much. ++ ++Threads ++======= ++ ++Each process in a target machine is seen as a gdb thread. gdb thread ++related commands (info threads, thread n) can be used. ++ ++ia-32 hardware breakpoints ++========================== ++ ++kgdb stub contains support for hardware breakpoints using debugging features ++of ia-32(x86) processors. These breakpoints do not need code modification. ++They use debugging registers. 4 hardware breakpoints are available in ia-32 ++processors. ++ ++Each hardware breakpoint can be of one of the following three types. ++ ++1. Execution breakpoint - An Execution breakpoint is triggered when code ++ at the breakpoint address is executed. ++ ++ As limited number of hardware breakpoints are available, it is ++ advisable to use software breakpoints ( break command ) instead ++ of execution hardware breakpoints, unless modification of code ++ is to be avoided. ++ ++2. Write breakpoint - A write breakpoint is triggered when memory ++ location at the breakpoint address is written. ++ ++ A write or can be placed for data of variable length. Length of ++ a write breakpoint indicates length of the datatype to be ++ watched. Length is 1 for 1 byte data , 2 for 2 byte data, 3 for ++ 4 byte data. ++ ++3. Access breakpoint - An access breakpoint is triggered when memory ++ location at the breakpoint address is either read or written. ++ ++ Access breakpoints also have lengths similar to write breakpoints. ++ ++IO breakpoints in ia-32 are not supported. ++ ++Since gdb stub at present does not use the protocol used by gdb for hardware ++breakpoints, hardware breakpoints are accessed through gdb macros. gdb macros ++for hardware breakpoints are described below. ++ ++hwebrk - Places an execution breakpoint ++ hwebrk breakpointno address ++hwwbrk - Places a write breakpoint ++ hwwbrk breakpointno length address ++hwabrk - Places an access breakpoint ++ hwabrk breakpointno length address ++hwrmbrk - Removes a breakpoint ++ hwrmbrk breakpointno ++exinfo - Tells whether a software or hardware breakpoint has occurred. ++ Prints number of the hardware breakpoint if a hardware breakpoint has ++ occurred. ++ ++Arguments required by these commands are as follows ++breakpointno - 0 to 3 ++length - 1 to 3 ++address - Memory location in hex digits ( without 0x ) e.g c015e9bc ++ ++SMP support ++========== ++ ++When a breakpoint occurs or user issues a break ( Ctrl + C ) to gdb ++client, all the processors are forced to enter the debugger. Current ++thread corresponds to the thread running on the processor where ++breakpoint occurred. Threads running on other processor(s) appear ++similar to other non running threads in the 'info threads' output. With ++in the kgdb stub there is a structure "waiting_cpus" in which kgdb ++records the values of "current" and "regs" for each cpu other than the ++one that hit the breakpoint. "current" is a pointer to the task ++structure for the task that cpu is running, while "regs" points to the ++saved registers for the task. This structure can be examined with the ++gdb "p" command. ++ ++ia-32 hardware debugging registers on all processors are set to same ++values. Hence any hardware breakpoints may occur on any processor. ++ ++gdb troubleshooting ++=================== ++ ++1. gdb hangs ++Kill it. restart gdb. Connect to target machine. ++ ++2. gdb cannot connect to target machine (after killing a gdb and ++restarting another) If the target machine was not inside debugger when ++you killed gdb, gdb cannot connect because the target machine won't ++respond. In this case echo "Ctrl+C"(ASCII 3) in the serial line. ++e.g. echo -e "\003" > /dev/ttyS1 This forces that target machine into ++debugger after which you can connect. ++ ++3. gdb cannot connect even after echoing Ctrl+C into serial line ++Try changing serial line settings min to 1 and time to 0 ++e.g. stty min 1 time 0 < /dev/ttyS1 ++Try echoing again ++ ++check serial line speed and set it to correct value if required ++e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 ++ ++EVENTS ++====== ++ ++Ever want to know the order of things happening? Which cpu did what and ++when? How did the spinlock get the way it is? Then events are for ++you. Events are defined by calls to an event collection interface and ++saved for later examination. In this case, kgdb events are saved by a ++very fast bit of code in kgdb which is fully SMP and interrupt protected ++and they are examined by using gdb to display them. Kgdb keeps only ++the last N events, where N must be a power of two and is defined at ++configure time. ++ ++ ++Events are signaled to kgdb by calling: ++ ++kgdb_ts(data0,data1) ++ ++For each call kgdb records each call in an array along with other info. ++Here is the array def: ++ ++struct kgdb_and_then_struct { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ long long at_time; ++ int from_ln; ++ char * in_src; ++ void *from; ++ int with_if; ++ int data0; ++ int data1; ++}; ++ ++For SMP machines the cpu is recorded, for all machines the TSC is ++recorded (gets a time stamp) as well as the line number and source file ++the call was made from. The address of the (from), the "if" (interrupt ++flag) and the two data items are also recorded. The macro kgdb_ts casts ++the types to int, so you can put any 32-bit values here. There is a ++configure option to select the number of events you want to keep. A ++nice number might be 128, but you can keep up to 1024 if you want. The ++number must be a power of two. An "andthen" macro library is provided ++for gdb to help you look at these events. It is also possible to define ++a different structure for the event storage and cast the data to this ++structure. For example the following structure is defined in kgdb: ++ ++struct kgdb_and_then_struct2 { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ long long at_time; ++ int from_ln; ++ char * in_src; ++ void *from; ++ int with_if; ++ struct task_struct *t1; ++ struct task_struct *t2; ++}; ++ ++If you use this for display, the data elements will be displayed as ++pointers to task_struct entries. You may want to define your own ++structure to use in casting. You should only change the last two items ++and you must keep the structure size the same. Kgdb will handle these ++as 32-bit ints, but within that constraint you can define a structure to ++cast to any 32-bit quantity. This need only be available to gdb and is ++only used for casting in the display code. ++ ++Final Items ++=========== ++ ++I picked up this code from Amit S. Kale and enhanced it. ++ ++If you make some really cool modification to this stuff, or if you ++fix a bug, please let me know. ++ ++George Anzinger ++ ++ ++Amit S. Kale ++ ++ ++(First kgdb by David Grothe ) ++ ++(modified by Tigran Aivazian ) ++ Putting gdbstub into the kernel config menu. ++ ++(modified by Scott Foehner ) ++ Hooks for entering gdbstub at boot time. ++ ++(modified by Amit S. Kale ) ++ Threads, ia-32 hw debugging, mp support, console support, ++ nmi watchdog handling. ++ ++(modified by George Anzinger ) ++ Extended threads to include the idle threads. ++ Enhancements to allow breakpoint() at first C code. ++ Use of module_init() and __setup() to automate the configure. ++ Enhanced the cpu "collection" code to work in early bring up. ++ Added ability to call functions from gdb ++ Print info thread stuff without going back to schedule() ++ Now collect the "other" cpus with a IPI/ NMI. +\ No newline at end of file +diff -puN /dev/null Documentation/i386/kgdb/loadmodule.sh +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/loadmodule.sh 2003-06-25 23:14:17.000000000 -0700 +@@ -0,0 +1,78 @@ ++#/bin/sh ++# This script loads a module on a target machine and generates a gdb script. ++# source generated gdb script to load the module file at appropriate addresses ++# in gdb. ++# ++# Usage: ++# Loading the module on target machine and generating gdb script) ++# [foo]$ loadmodule.sh ++# ++# Loading the module file into gdb ++# (gdb) source ++# ++# Modify following variables according to your setup. ++# TESTMACHINE - Name of the target machine ++# GDBSCRIPTS - The directory where a gdb script will be generated ++# ++# Author: Amit S. Kale (akale@veritas.com). ++# ++# If you run into problems, please check files pointed to by following ++# variables. ++# ERRFILE - /tmp/.errs contains stderr output of insmod ++# MAPFILE - /tmp/.map contains stdout output of insmod ++# GDBSCRIPT - $GDBSCRIPTS/load gdb script. ++ ++TESTMACHINE=foo ++GDBSCRIPTS=/home/bar ++ ++if [ $# -lt 1 ] ; then { ++ echo Usage: $0 modulefile ++ exit ++} ; fi ++ ++MODULEFILE=$1 ++MODULEFILEBASENAME=`basename $1` ++ ++if [ $MODULEFILE = $MODULEFILEBASENAME ] ; then { ++ MODULEFILE=`pwd`/$MODULEFILE ++} fi ++ ++ERRFILE=/tmp/$MODULEFILEBASENAME.errs ++MAPFILE=/tmp/$MODULEFILEBASENAME.map ++GDBSCRIPT=$GDBSCRIPTS/load$MODULEFILEBASENAME ++ ++function findaddr() { ++ local ADDR=0x$(echo "$SEGMENTS" | \ ++ grep "$1" | sed 's/^[^ ]*[ ]*[^ ]*[ ]*//' | \ ++ sed 's/[ ]*[^ ]*$//') ++ echo $ADDR ++} ++ ++function checkerrs() { ++ if [ "`cat $ERRFILE`" != "" ] ; then { ++ cat $ERRFILE ++ exit ++ } fi ++} ++ ++#load the module ++echo Copying $MODULEFILE to $TESTMACHINE ++rcp $MODULEFILE root@${TESTMACHINE}: ++ ++echo Loading module $MODULEFILE ++rsh -l root $TESTMACHINE /sbin/insmod -m ./`basename $MODULEFILE` \ ++ > $MAPFILE 2> $ERRFILE ++checkerrs ++ ++SEGMENTS=`head -n 11 $MAPFILE | tail -n 10` ++TEXTADDR=$(findaddr "\\.text[^.]") ++LOADSTRING="add-symbol-file $MODULEFILE $TEXTADDR" ++SEGADDRS=`echo "$SEGMENTS" | awk '//{ ++ if ($1 != ".text" && $1 != ".this" && ++ $1 != ".kstrtab" && $1 != ".kmodtab") { ++ print " -s " $1 " 0x" $3 " " ++ } ++}'` ++LOADSTRING="$LOADSTRING $SEGADDRS" ++echo Generating script $GDBSCRIPT ++echo $LOADSTRING > $GDBSCRIPT +diff -puN drivers/char/keyboard.c~kgdb-ga drivers/char/keyboard.c +--- 25/drivers/char/keyboard.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/drivers/char/keyboard.c 2003-06-25 23:14:17.000000000 -0700 +@@ -1055,6 +1055,9 @@ void kbd_keycode(unsigned int keycode, i + } + if (sysrq_down && down && !rep) { + handle_sysrq(kbd_sysrq_xlate[keycode], regs, tty); ++#ifdef CONFIG_KGDB_SYSRQ ++ sysrq_down = 0; /* in case we miss the "up" event */ ++#endif + return; + } + #endif +diff -puN drivers/char/sysrq.c~kgdb-ga drivers/char/sysrq.c +--- 25/drivers/char/sysrq.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/drivers/char/sysrq.c 2003-06-25 23:14:17.000000000 -0700 +@@ -35,6 +35,19 @@ + #include + + #include ++#ifdef CONFIG_KGDB_SYSRQ ++ ++#define GDB_OP &kgdb_op ++static struct sysrq_key_op kgdb_op={ ++ handler: (void*)breakpoint, ++ help_msg: "kGdb ", ++ action_msg: "Debug breakpoint\n", ++}; ++ ++#else ++#define GDB_OP NULL ++#endif ++ + + extern void reset_vc(unsigned int); + extern struct list_head super_blocks; +@@ -240,7 +253,7 @@ static struct sysrq_key_op *sysrq_key_ta + /* d */ NULL, + /* e */ &sysrq_term_op, + /* f */ NULL, +-/* g */ NULL, ++/* g */ GDB_OP, + /* h */ NULL, + /* i */ &sysrq_kill_op, + /* j */ NULL, +diff -puN drivers/serial/8250.c~kgdb-ga drivers/serial/8250.c +--- 25/drivers/serial/8250.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/drivers/serial/8250.c 2003-06-25 23:14:17.000000000 -0700 +@@ -823,7 +823,7 @@ receive_chars(struct uart_8250_port *up, + if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { + tty->flip.work.func((void *)tty); + if (tty->flip.count >= TTY_FLIPBUF_SIZE) +- return; // if TTY_DONT_FLIP is set ++ return; /* if TTY_DONT_FLIP is set */ + } + ch = serial_inp(up, UART_RX); + *tty->flip.char_buf_ptr = ch; +@@ -1183,13 +1183,20 @@ static void serial8250_break_ctl(struct + serial_out(up, UART_LCR, up->lcr); + spin_unlock_irqrestore(&up->port.lock, flags); + } ++#ifdef CONFIG_KGDB ++static int kgdb_irq = -1; ++#endif + + static int serial8250_startup(struct uart_port *port) + { + struct uart_8250_port *up = (struct uart_8250_port *)port; + unsigned long flags; + int retval; +- ++#ifdef CONFIG_KGDB ++ if ( up->port.irq == kgdb_irq){ ++ return -EBUSY; ++ } ++#endif + if (up->port.type == PORT_16C950) { + /* Wake up and initialize UART */ + up->acr = 0; +@@ -1853,6 +1860,11 @@ static void __init serial8250_register_p + for (i = 0; i < UART_NR; i++) { + struct uart_8250_port *up = &serial8250_ports[i]; + ++#ifdef CONFIG_KGDB ++ if(up->port.irq == kgdb_irq){ ++ up->port.iobase = up->port.mapbase = 0; ++ } ++#endif + up->port.line = i; + up->port.ops = &serial8250_pops; + init_timer(&up->timer); +@@ -2116,7 +2128,31 @@ void serial8250_resume_port(int line, u3 + { + uart_resume_port(&serial8250_reg, &serial8250_ports[line].port, level); + } +- ++#ifdef CONFIG_KGDB ++/* ++ * Find all the ports using the given irq and shut them down. ++ * Result should be that the irq will be released. ++ */ ++void shutdown_for_kgdb(struct async_struct * info) ++{ ++ int irq = info->state->irq; ++ struct uart_8250_port *up; ++ int ttyS; ++ ++ kgdb_irq = irq; /* save for later init */ ++ for (ttyS = 0; ttyS < UART_NR; ttyS++){ ++ up = &serial8250_ports[ttyS]; ++ if( up->port.irq == irq && (irq_lists + irq)->head){ ++#ifdef CONFIG_DEBUG_SPINLOCK /* ugly business... */ ++ if(up->port.lock.magic != SPINLOCK_MAGIC){ ++ spin_lock_init(&up->port.lock); ++ } ++#endif ++ serial8250_shutdown(&up->port); ++ } ++ } ++} ++#endif + static int __init serial8250_init(void) + { + int ret, i; +diff -puN include/asm-i386/bugs.h~kgdb-ga include/asm-i386/bugs.h +--- 25/include/asm-i386/bugs.h~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/include/asm-i386/bugs.h 2003-06-25 23:14:17.000000000 -0700 +@@ -1,11 +1,11 @@ + /* + * include/asm-i386/bugs.h + * +- * Copyright (C) 1994 Linus Torvalds ++ * Copyright (C) 1994 Linus Torvalds + * + * Cyrix stuff, June 1998 by: + * - Rafael R. Reilova (moved everything from head.S), +- * ++ * + * - Channing Corn (tests & fixes), + * - Andrew D. Balsa (code cleanup). + * +@@ -25,7 +25,20 @@ + #include + #include + #include +- ++#ifdef CONFIG_KGDB ++/* ++ * Provied the command line "gdb" initial break ++ */ ++int __init kgdb_initial_break(char * str) ++{ ++ if (*str == '\0'){ ++ breakpoint(); ++ return 1; ++ } ++ return 0; ++} ++__setup("gdb",kgdb_initial_break); ++#endif + static int __init no_halt(char *s) + { + boot_cpu_data.hlt_works_ok = 0; +@@ -140,7 +153,7 @@ static void __init check_popad(void) + : "ecx", "edi" ); + /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ + if (res != 12345678) printk( "Buggy.\n" ); +- else printk( "OK.\n" ); ++ else printk( "OK.\n" ); + #endif + } + +diff -puN /dev/null include/asm-i386/kgdb.h +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25-akpm/include/asm-i386/kgdb.h 2003-06-25 23:14:17.000000000 -0700 +@@ -0,0 +1,59 @@ ++#ifndef __KGDB ++#define __KGDB ++ ++/* ++ * This file should not include ANY others. This makes it usable ++ * most anywhere without the fear of include order or inclusion. ++ * Make it so! ++ * ++ * This file may be included all the time. It is only active if ++ * CONFIG_KGDB is defined, otherwise it stubs out all the macros ++ * and entry points. ++ */ ++#if defined(CONFIG_KGDB) && !defined(__ASSEMBLY__) ++ ++extern void breakpoint(void); ++#define INIT_KGDB_INTS kgdb_enable_ints() ++ ++#ifndef BREAKPOINT ++#define BREAKPOINT asm(" int $3") ++#endif ++/* ++ * GDB debug stub (or any debug stub) can point the 'linux_debug_hook' ++ * pointer to its routine and it will be entered as the first thing ++ * when a trap occurs. ++ * ++ * Return values are, at present, undefined. ++ * ++ * The debug hook routine does not necessarily return to its caller. ++ * It has the register image and thus may choose to resume execution ++ * anywhere it pleases. ++ */ ++struct pt_regs; ++ ++extern int kgdb_handle_exception(int trapno, ++ int signo, int err_code, struct pt_regs *regs); ++extern int in_kgdb(struct pt_regs *regs); ++ ++#ifdef CONFIG_KGDB_TS ++void kgdb_tstamp(int line, char *source, int data0, int data1); ++/* ++ * This is the time stamp function. The macro adds the source info and ++ * does a cast on the data to allow most any 32-bit value. ++ */ ++ ++#define kgdb_ts(data0,data1) kgdb_tstamp(__LINE__,__FILE__,(int)data0,(int)data1) ++#else ++#define kgdb_ts(data0,data1) ++#endif ++#else /* CONFIG_KGDB && ! __ASSEMBLY__ ,stubs follow... */ ++#ifndef BREAKPOINT ++#define BREAKPOINT ++#endif ++#define kgdb_ts(data0,data1) ++#define in_kgdb ++#define kgdb_handle_exception ++#define breakpoint ++#define INIT_KGDB_INTS ++#endif ++#endif /* __KGDB */ +diff -puN /dev/null include/asm-i386/kgdb_local.h +--- /dev/null 2002-08-30 16:31:37.000000000 -0700 ++++ 25-akpm/include/asm-i386/kgdb_local.h 2003-06-25 23:14:17.000000000 -0700 +@@ -0,0 +1,102 @@ ++#ifndef __KGDB_LOCAL ++#define ___KGDB_LOCAL ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define PORT 0x3f8 ++#ifdef CONFIG_KGDB_PORT ++#undef PORT ++#define PORT CONFIG_KGDB_PORT ++#endif ++#define IRQ 4 ++#ifdef CONFIG_KGDB_IRQ ++#undef IRQ ++#define IRQ CONFIG_KGDB_IRQ ++#endif ++#define SB_CLOCK 1843200 ++#define SB_BASE (SB_CLOCK/16) ++#define SB_BAUD9600 SB_BASE/9600 ++#define SB_BAUD192 SB_BASE/19200 ++#define SB_BAUD384 SB_BASE/38400 ++#define SB_BAUD576 SB_BASE/57600 ++#define SB_BAUD1152 SB_BASE/115200 ++#ifdef CONFIG_KGDB_9600BAUD ++#define SB_BAUD SB_BAUD9600 ++#endif ++#ifdef CONFIG_KGDB_19200BAUD ++#define SB_BAUD SB_BAUD192 ++#endif ++#ifdef CONFIG_KGDB_38400BAUD ++#define SB_BAUD SB_BAUD384 ++#endif ++#ifdef CONFIG_KGDB_57600BAUD ++#define SB_BAUD SB_BAUD576 ++#endif ++#ifdef CONFIG_KGDB_115200BAUD ++#define SB_BAUD SB_BAUD1152 ++#endif ++#ifndef SB_BAUD ++#define SB_BAUD SB_BAUD1152 /* Start with this if not given */ ++#endif ++ ++#ifndef CONFIG_X86_TSC ++#undef rdtsc ++#define rdtsc(a,b) if (a++ > 10000){a = 0; b++;} ++#undef rdtscll ++#define rdtscll(s) s++ ++#endif ++ ++#ifdef _raw_read_unlock /* must use a name that is "define"ed, not an inline */ ++#undef spin_lock ++#undef spin_trylock ++#undef spin_unlock ++#define spin_lock _raw_spin_lock ++#define spin_trylock _raw_spin_trylock ++#define spin_unlock _raw_spin_unlock ++#else ++#endif ++#undef spin_unlock_wait ++#define spin_unlock_wait(x) do { cpu_relax(); barrier();} \ ++ while(spin_is_locked(x)) ++ ++#define SB_IER 1 ++#define SB_MCR UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS ++ ++#define FLAGS 0 ++#define SB_STATE { \ ++ magic: SSTATE_MAGIC, \ ++ baud_base: SB_BASE, \ ++ port: PORT, \ ++ irq: IRQ, \ ++ flags: FLAGS, \ ++ custom_divisor:SB_BAUD} ++#define SB_INFO { \ ++ magic: SERIAL_MAGIC, \ ++ port: PORT,0,FLAGS, \ ++ state: &state, \ ++ tty: (struct tty_struct *)&state, \ ++ IER: SB_IER, \ ++ MCR: SB_MCR} ++extern void putDebugChar(int); ++/* RTAI support needs us to really stop/start interrupts */ ++ ++#define kgdb_sti() __asm__ __volatile__("sti": : :"memory") ++#define kgdb_cli() __asm__ __volatile__("cli": : :"memory") ++#define kgdb_local_save_flags(x) __asm__ __volatile__(\ ++ "pushfl ; popl %0":"=g" (x): /* no input */) ++#define kgdb_local_irq_restore(x) __asm__ __volatile__(\ ++ "pushl %0 ; popfl": \ ++ /* no output */ :"g" (x):"memory", "cc") ++#define kgdb_local_irq_save(x) kgdb_local_save_flags(x); kgdb_cli() ++ ++#ifdef CONFIG_SERIAL ++extern void shutdown_for_kgdb(struct async_struct *info); ++#endif ++#define INIT_KDEBUG putDebugChar("+"); ++#endif /* __KGDB_LOCAL */ +diff -puN include/linux/config.h~kgdb-ga include/linux/config.h +--- 25/include/linux/config.h~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/include/linux/config.h 2003-06-25 23:14:17.000000000 -0700 +@@ -2,5 +2,8 @@ + #define _LINUX_CONFIG_H + + #include ++#ifdef CONFIG_X86 ++#include ++#endif + + #endif +diff -puN kernel/sched.c~kgdb-ga kernel/sched.c +--- 25/kernel/sched.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/kernel/sched.c 2003-06-25 23:14:17.000000000 -0700 +@@ -1604,6 +1604,13 @@ out_unlock: + task_rq_unlock(rq, &flags); + } + ++#if defined( CONFIG_KGDB) ++struct task_struct * kgdb_get_idle(int this_cpu) ++{ ++ return runqueues[this_cpu].idle; ++} ++#endif ++ + #ifndef __alpha__ + + /* +diff -puN MAINTAINERS~kgdb-ga MAINTAINERS +--- 25/MAINTAINERS~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 ++++ 25-akpm/MAINTAINERS 2003-06-25 23:14:17.000000000 -0700 +@@ -1059,6 +1059,12 @@ L: kbuild-devel@lists.sourceforge.net + W: http://kbuild.sourceforge.net + S: Maintained + ++KGDB FOR I386 PLATFORM ++P: George Anzinger ++M: george@mvista.com ++L: linux-net@vger.kernel.org ++S: Supported ++ + KERNEL NFSD + P: Neil Brown + M: neilb@cse.unsw.edu.au + +_ diff --git a/lustre/kernel_patches/patches/kgdb-ga-docco-fixes-2.5.73.patch b/lustre/kernel_patches/patches/kgdb-ga-docco-fixes-2.5.73.patch new file mode 100644 index 0000000..47d451c --- /dev/null +++ b/lustre/kernel_patches/patches/kgdb-ga-docco-fixes-2.5.73.patch @@ -0,0 +1,347 @@ + +From: "Randy.Dunlap" + +Just some readability fixes. + + + + Documentation/i386/kgdb/kgdb.txt | 135 +++++++++++++++++++-------------------- + 1 files changed, 68 insertions(+), 67 deletions(-) + +diff -puN Documentation/i386/kgdb/kgdb.txt~kgdb-gs-docco-fixes Documentation/i386/kgdb/kgdb.txt +--- 25/Documentation/i386/kgdb/kgdb.txt~kgdb-gs-docco-fixes 2003-06-26 17:32:25.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/kgdb.txt 2003-06-26 17:32:25.000000000 -0700 +@@ -6,10 +6,10 @@ New features: + ============ + 20030505.1827.27 + We are starting to align with the sourceforge version, at least in +-commands. To this end, the boot command sting to start kgdb at ++commands. To this end, the boot command string to start kgdb at + boot time has been changed from "kgdb" to "gdb". + +-Andrew Morton sent a couple of patchs which are now included as follows: ++Andrew Morton sent a couple of patches which are now included as follows: + 1.) We now return a flag to the interrupt handler. + 2.) We no longer use smp_num_cpus (a conflict with the lock meter). + 3.) And from William Lee Irwin III code to make +@@ -27,18 +27,18 @@ This is currently the version for the 2. + + We have several new "features" beginning with this version: + +-1.) Kgdb now syncs the "other" cpus with a cross cpu NMI. No more +- waiting and it will pull that guy out of an irq off spin lock :) ++1.) Kgdb now syncs the "other" CPUs with a cross-CPU NMI. No more ++ waiting and it will pull that guy out of an IRQ off spin lock :) + + 2.) We doctored up the code that tells where a task is waiting and + included it so that the "info thread" command will show a bit more + than "schedule()". Try it... + + 3.) Added the ability to call a function from gdb. All the standard gdb +- issues apply, i.e. if you hit a break point in the function you are +- not allowed to call another (gdb limitation, not kgdb). T0 help ++ issues apply, i.e. if you hit a breakpoint in the function, you are ++ not allowed to call another (gdb limitation, not kgdb). To help + this capability we added a memory allocation function. Gdb does not +- return this memory (it is used for stings you pass to that function ++ return this memory (it is used for strings that you pass to that function + you are calling from gdb) so we fixed up a way to allow you to + manually return the memory (see below). + +@@ -61,23 +61,23 @@ $2 = {used_malloc = 0, called_from = 0xc + Things to note here: a.) used_malloc is the amount of memory that + has been malloc'ed to do calls from gdb. You can reclaim this + memory like this: "p kgdb_info.used_malloc=0" Cool, huh? b.) +- cpus_waiting is now "sized" by the number of cpus you enter at +- configure time in the kgdb configure section. This is NOT used any +- where else in the system, but it is "nice" here. c.) The tasks ++ cpus_waiting is now "sized" by the number of CPUs you enter at ++ configure time in the kgdb configure section. This is NOT used ++ anywhere else in the system, but it is "nice" here. c.) The task's + "pid" is now in the structure. This is the pid you will need to use + to decode to the thread id to get gdb to look at that thread. + Remember that the "info thread" command prints a list of threads +- where in it numbers each thread with its reference number followed +- by the threads pid. Note that the per cpu idle threads actually +- have pids of 0 (yes there is more than one pid 0 in an SMP system). ++ wherein it numbers each thread with its reference number followed ++ by the thread's pid. Note that the per-CPU idle threads actually ++ have pids of 0 (yes, there is more than one pid 0 in an SMP system). + To avoid confusion, kgdb numbers these threads with numbers beyond +- the MAX_PID. That is why you see 32768 above. ++ the MAX_PID. That is why you see 32768 and above. + + 6.) A subtle change, we now provide the complete register set for tasks +- that are active on the other cpus. This allows better trace back on ++ that are active on the other CPUs. This allows better trace back on + those tasks. + +- And, lets mention what we could not fix. Back-trace from all but the ++ And, let's mention what we could not fix. Back-trace from all but the + thread that we trapped will, most likely, have a bogus entry in it. + The problem is that gdb does not recognize the entry code for + functions that use "current" near (at all?) the entry. The compiler +@@ -88,9 +88,9 @@ $2 = {used_malloc = 0, called_from = 0xc + + 20011220.0050.35 + Major enhancement with this version is the ability to hold one or more +-cpus in an SMP system while allowing the others to continue. Also, by +-default only the current cpu is enabled on single step commands (please +-note that gdb issues single step commands at times other than when you ++CPUs in an SMP system while allowing the others to continue. Also, by ++default only the current CPU is enabled on single-step commands (please ++note that gdb issues single-step commands at times other than when you + use the si command). + + Another change is to collect some useful information in +@@ -111,14 +111,14 @@ $4 = {called_from = 0xc010732c, entry_ts + + "Called_from" is the return address from the current entry into kgdb. + Sometimes it is useful to know why you are in kgdb, for example, was +-it an NMI or a real break point? The simple way to interrogate this ++it an NMI or a real breakpoint? The simple way to interrogate this + return address is: + + l *0xc010732c + + which will print the surrounding few lines of source code. + +-"Entry_tsc" is the cpu TSC on entry to kgdb (useful to compare to the ++"Entry_tsc" is the CPU TSC on entry to kgdb (useful to compare to the + kgdb_ts entries). + + "errcode" and "vector" are other entry parameters which may be helpful on +@@ -139,34 +139,34 @@ $7 = {called_from = 0xc0112739, entry_ts + hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}}} + +-"Cpus_waiting" has an entry for each cpu other than the current one that ++"Cpus_waiting" has an entry for each CPU other than the current one that + has been stopped. Each entry contains the task_struct address for that +-cpu, the address of the regs for that task and a hold flag. All these ++CPU, the address of the regs for that task and a hold flag. All these + have the proper typing so that, for example: + + p *kgdb_info.cpus_waiting[1].regs + +-will print the registers for cpu 1. ++will print the registers for CPU 1. + + "Hold_on_sstep" is a new feature with this version and comes up set or +-true. What is means is that whenever kgdb is asked to single step all +-other cpus are held (i.e. not allowed to execute). The flag applies to +-all but the current cpu and, again, can be changed: ++true. What this means is that whenever kgdb is asked to single-step all ++other CPUs are held (i.e. not allowed to execute). The flag applies to ++all but the current CPU and, again, can be changed: + + p kgdb_info.hold_on_sstep=0 + +-restores the old behavior of letting all cpus run during single stepping. ++restores the old behavior of letting all CPUs run during single-stepping. + +-Likewise, each cpu has a "hold" flag, which if set, locks that cpu out +-of execution. Note that this has some risk in cases where the cpus need +-to communicate with each other. If kgdb finds no cpu available on exit, ++Likewise, each CPU has a "hold" flag, which if set, locks that CPU out ++of execution. Note that this has some risk in cases where the CPUs need ++to communicate with each other. If kgdb finds no CPU available on exit, + it will push a message thru gdb and stay in kgdb. Note that it is legal +-to hold the current cpu as long as at least one cpu can execute. ++to hold the current CPU as long as at least one CPU can execute. + + 20010621.1117.09 + This version implements an event queue. Events are signaled by calling + a function in the kgdb stub and may be examined from gdb. See EVENTS +-below for details. This version also tighten up the interrupt and SMP ++below for details. This version also tightens up the interrupt and SMP + handling to not allow interrupts on the way to kgdb from a breakpoint + trap. It is fine to allow these interrupts for user code, but not + system debugging. +@@ -190,18 +190,18 @@ machine. This is the machine that will + kernel. + + The two machines will be connected together via a serial line out +-one or the other of the COM ports of the PC. You will need a modem +-eliminator and the appropriate cables. ++one or the other of the COM ports of the PC. You will need the ++appropriate modem eliminator (null modem) cable(s) for this. + + Decide on which tty port you want the machines to communicate, then +-cable them up back-to-back using the null modem. COM1 is /dev/ttyS0 and +-COM2 is /dev/ttyS1. You should test this connection with the two +-machines prior to trying to debug a kernel. Once you have it working, +-on the TARGET machine, enter: ++connect them up back-to-back using the null modem cable. COM1 is ++/dev/ttyS0 and COM2 is /dev/ttyS1. You should test this connection ++with the two machines prior to trying to debug a kernel. Once you ++have it working, on the TARGET machine, enter: + + setserial /dev/ttyS0 (or what ever tty you are using) + +-and record the port and the irq addresses. ++and record the port address and the IRQ number. + + On the DEVELOPMENT machine you need to apply the patch for the kgdb + hooks. You have probably already done that if you are reading this +@@ -212,7 +212,7 @@ On your DEVELOPMENT machine, go to your + configuring in the standard serial driver, it must not be a module. + Either yes or no is ok, but making the serial driver a module means it + will initialize after kgdb has set up the UART interrupt code and may +-cause a failure of the control C option discussed below. The configure ++cause a failure of the control-C option discussed below. The configure + question for the serial driver is under the "Character devices" heading + and is: + +@@ -231,16 +231,16 @@ The baud rate. Default is 115200. What + the host machine is set to the same speed. I recommend the default. + + The port. This is the I/O address of the serial UART that you should +-have gotten using setserial as described above. The standard com1 port +-(3f8) using irq 4 is default . Com2 is 2f8 which by convention uses irq ++have gotten using setserial as described above. The standard COM1 port ++(3f8) using IRQ 4 is default. COM2 is 2f8 which by convention uses IRQ + 3. + +-The port irq (see above). ++The port IRQ (see above). + + Stack overflow test. This option makes a minor change in the trap, + system call and interrupt code to detect stack overflow and transfer +-control to kgdb if it happens. (Some platforms have this in the base +-line code, but the i386 does not.) ++control to kgdb if it happens. (Some platforms have this in the ++baseline code, but the i386 does not.) + + You can also configure the system to recognize the boot option + "console=kgdb" which if given will cause all console output during +@@ -251,9 +251,9 @@ This will happen before any kernel outpu + and will stall the boot until the connection is made. + + You can also configure in a patch to SysRq to enable the kGdb SysRq. +-This request generates a breakpoint. Since the serial port irq line is ++This request generates a breakpoint. Since the serial port IRQ line is + set up after any serial drivers, it is possible that this command will +-work when the control C will not. ++work when the control-C will not. + + Save and exit the Xconfig program. Then do "make clean" , "make dep" + and "make bzImage" (or whatever target you want to make). This gets the +@@ -360,11 +360,11 @@ Triggering kgdbstub at other times + Often you don't need to enter the debugger until much later in the boot + or even after the machine has been running for some time. Once the + kernel is booted and interrupts are on, you can force the system to +-enter the debugger by sending a control C to the debug port. This is ++enter the debugger by sending a control-C to the debug port. This is + what the first line of the recommended .gdbinit file does. This allows + you to start gdb any time after the system is up as well as when the +-system is already at a break point. (In the case where the system is +-already at a break point the control C is not needed, however, it will ++system is already at a breakpoint. (In the case where the system is ++already at a breakpoint the control-C is not needed, however, it will + be ignored by the target so no harm is done. Also note the the echo + command assumes that the port speed is already set. This will be true + once gdb has connected, but it is best to set the port speed before you +@@ -442,7 +442,7 @@ turn on a debug switch with the followin + This will print out the protocol messages that gdb is exchanging with + the target machine. + +-Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c This is ++Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c. This is + the code that talks to the serial port on the target side. There might + be a problem there. In particular there is a section of this code that + tests the UART which will tell you what UART you have if you define +@@ -454,7 +454,7 @@ it finds. (You need to wait so that the + printed. Early in the boot they are cached, waiting for the console to + be enabled. Also, if kgdb is entered thru a breakpoint it is possible + to cause a dead lock by calling printk when the console is locked. The +-stub, thus avoids doing printks from break points especially in the ++stub thus avoids doing printks from breakpoints, especially in the + serial code.) At this time, if the UART fails to do the expected thing, + kgdb will print out (using printk) information on what failed. (These + messages will be buried in all the other boot up messages. Look for +@@ -464,7 +464,7 @@ don't connect, review your answers for t + + setserial /dev/ttyS0 + +-to get the current port and irq information. This command will also ++to get the current port and IRQ information. This command will also + tell you what the system found for the UART type. The stub recognizes + the following UART types: + +@@ -581,11 +581,11 @@ When a breakpoint occurs or user issues + client, all the processors are forced to enter the debugger. Current + thread corresponds to the thread running on the processor where + breakpoint occurred. Threads running on other processor(s) appear +-similar to other non running threads in the 'info threads' output. With +-in the kgdb stub there is a structure "waiting_cpus" in which kgdb +-records the values of "current" and "regs" for each cpu other than the ++similar to other non-running threads in the 'info threads' output. ++Within the kgdb stub there is a structure "waiting_cpus" in which kgdb ++records the values of "current" and "regs" for each CPU other than the + one that hit the breakpoint. "current" is a pointer to the task +-structure for the task that cpu is running, while "regs" points to the ++structure for the task that CPU is running, while "regs" points to the + saved registers for the task. This structure can be examined with the + gdb "p" command. + +@@ -601,22 +601,23 @@ Kill it. restart gdb. Connect to target + 2. gdb cannot connect to target machine (after killing a gdb and + restarting another) If the target machine was not inside debugger when + you killed gdb, gdb cannot connect because the target machine won't +-respond. In this case echo "Ctrl+C"(ASCII 3) in the serial line. +-e.g. echo -e "\003" > /dev/ttyS1 This forces that target machine into +-debugger after which you can connect. ++respond. In this case echo "Ctrl+C"(ASCII 3) to the serial line. ++e.g. echo -e "\003" > /dev/ttyS1 ++This forces that target machine into the debugger, after which you ++can connect. + + 3. gdb cannot connect even after echoing Ctrl+C into serial line + Try changing serial line settings min to 1 and time to 0 + e.g. stty min 1 time 0 < /dev/ttyS1 + Try echoing again + +-check serial line speed and set it to correct value if required ++Check serial line speed and set it to correct value if required + e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 + + EVENTS + ====== + +-Ever want to know the order of things happening? Which cpu did what and ++Ever want to know the order of things happening? Which CPU did what and + when? How did the spinlock get the way it is? Then events are for + you. Events are defined by calls to an event collection interface and + saved for later examination. In this case, kgdb events are saved by a +@@ -631,7 +632,7 @@ Events are signaled to kgdb by calling: + kgdb_ts(data0,data1) + + For each call kgdb records each call in an array along with other info. +-Here is the array def: ++Here is the array definition: + + struct kgdb_and_then_struct { + #ifdef CONFIG_SMP +@@ -646,7 +647,7 @@ struct kgdb_and_then_struct { + int data1; + }; + +-For SMP machines the cpu is recorded, for all machines the TSC is ++For SMP machines the CPU is recorded, for all machines the TSC is + recorded (gets a time stamp) as well as the line number and source file + the call was made from. The address of the (from), the "if" (interrupt + flag) and the two data items are also recorded. The macro kgdb_ts casts +@@ -709,7 +710,7 @@ Amit S. Kale + Extended threads to include the idle threads. + Enhancements to allow breakpoint() at first C code. + Use of module_init() and __setup() to automate the configure. +- Enhanced the cpu "collection" code to work in early bring up. ++ Enhanced the cpu "collection" code to work in early bring-up. + Added ability to call functions from gdb + Print info thread stuff without going back to schedule() +- Now collect the "other" cpus with a IPI/ NMI. +\ No newline at end of file ++ Now collect the "other" cpus with an IPI/ NMI. + +_ diff --git a/lustre/kernel_patches/patches/kgdb-use-ggdb-2.5.73.patch b/lustre/kernel_patches/patches/kgdb-use-ggdb-2.5.73.patch new file mode 100644 index 0000000..da07bd9 --- /dev/null +++ b/lustre/kernel_patches/patches/kgdb-use-ggdb-2.5.73.patch @@ -0,0 +1,17 @@ + arch/i386/Makefile | 2 +- + 1 files changed, 1 insertion(+), 1 deletion(-) + +diff -puN arch/i386/Makefile~kgdb-use-ggdb arch/i386/Makefile +--- 25/arch/i386/Makefile~kgdb-use-ggdb 2003-06-14 22:54:41.000000000 -0700 ++++ 25-akpm/arch/i386/Makefile 2003-06-14 22:54:41.000000000 -0700 +@@ -85,7 +85,7 @@ mcore-$(CONFIG_X86_ES7000) := mach-es700 + # default subarch .h files + mflags-y += -Iinclude/asm-i386/mach-default + +-mflags-$(CONFIG_KGDB) += -g ++mflags-$(CONFIG_KGDB) += -ggdb + mflags-$(CONFIG_KGDB_MORE) += $(shell echo $(CONFIG_KGDB_OPTIONS) | sed -e 's/"//g') + + head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o + +_ diff --git a/lustre/kernel_patches/patches/lkcd-kernel-changes-2.5.73.patch b/lustre/kernel_patches/patches/lkcd-kernel-changes-2.5.73.patch new file mode 100644 index 0000000..ebe09e8 --- /dev/null +++ b/lustre/kernel_patches/patches/lkcd-kernel-changes-2.5.73.patch @@ -0,0 +1,608 @@ + + + + arch/i386/Kconfig | 50 ++++++++++++++++++++++++++++ + arch/i386/boot/Makefile | 1 + arch/i386/kernel/i386_ksyms.c | 19 ++++++++++ + arch/i386/kernel/nmi.c | 2 + + arch/i386/kernel/setup.c | 10 +++++ + arch/i386/kernel/smp.c | 16 +++++++- + arch/i386/kernel/traps.c | 2 + + arch/i386/mm/init.c | 6 +++ + arch/s390/boot/Makefile | 2 - + arch/s390/boot/install.sh | 24 +++++++++---- + drivers/Makefile | 1 + include/asm-i386/kmap_types.h | 3 + + include/asm-i386/mach-default/irq_vectors.h | 1 + include/asm-i386/smp.h | 1 + include/linux/major.h | 2 + + include/linux/sysctl.h | 2 + + init/Makefile | 4 ++ + init/main.c | 10 +++++ + init/version.c | 4 ++ + kernel/ksyms.c | 8 ++++ + kernel/panic.c | 17 +++++++++ + kernel/sched.c | 22 ++++++++++++ + lib/Kconfig | 10 +++-- + mm/page_alloc.c | 3 + + scripts/mkcompile_h | 4 +- + 25 files changed, 207 insertions(+), 17 deletions(-) + +--- linux-2.5.73/drivers/Makefile~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:08 2003 ++++ linux-2.5.73-n9560/drivers/Makefile Mon Jun 30 14:56:26 2003 +@@ -50,3 +50,4 @@ obj-$(CONFIG_ISDN_BOOL) += isdn/ + obj-$(CONFIG_MCA) += mca/ + obj-$(CONFIG_EISA) += eisa/ + obj-$(CONFIG_CPU_FREQ) += cpufreq/ ++obj-$(CONFIG_CRASH_DUMP) += dump/ +--- linux-2.5.73/include/linux/major.h~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:37 2003 ++++ linux-2.5.73-n9560/include/linux/major.h Mon Jun 30 14:56:26 2003 +@@ -157,6 +157,8 @@ + + #define OSST_MAJOR 206 /* OnStream-SCx0 SCSI tape */ + ++#define CRASH_DUMP_MAJOR 221 /* crash dump interface */ ++ + #define IBM_TTY3270_MAJOR 227 + #define IBM_FS3270_MAJOR 228 + +--- linux-2.5.73/include/linux/sysctl.h~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:55 2003 ++++ linux-2.5.73-n9560/include/linux/sysctl.h Mon Jun 30 14:56:26 2003 +@@ -130,6 +130,8 @@ enum + KERN_PIDMAX=55, /* int: PID # limit */ + KERN_CORE_PATTERN=56, /* string: pattern for core-file names */ + KERN_PANIC_ON_OOPS=57, /* int: whether we will panic on an oops */ ++ ++ KERN_DUMP=60, /* directory: dump parameters */ + }; + + +--- linux-2.5.73/include/asm-i386/mach-default/irq_vectors.h~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:38 2003 ++++ linux-2.5.73-n9560/include/asm-i386/mach-default/irq_vectors.h Mon Jun 30 14:56:26 2003 +@@ -48,6 +48,7 @@ + #define INVALIDATE_TLB_VECTOR 0xfd + #define RESCHEDULE_VECTOR 0xfc + #define CALL_FUNCTION_VECTOR 0xfb ++#define DUMP_VECTOR 0xfa + + #define THERMAL_APIC_VECTOR 0xf0 + /* +--- linux-2.5.73/include/asm-i386/kmap_types.h~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:01 2003 ++++ linux-2.5.73-n9560/include/asm-i386/kmap_types.h Mon Jun 30 14:56:26 2003 +@@ -24,7 +24,8 @@ D(10) KM_IRQ0, + D(11) KM_IRQ1, + D(12) KM_SOFTIRQ0, + D(13) KM_SOFTIRQ1, +-D(14) KM_TYPE_NR ++D(14) KM_TYPE_NR, ++D(15) KM_DUMP + }; + + #undef D +--- linux-2.5.73/include/asm-i386/smp.h~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:56 2003 ++++ linux-2.5.73-n9560/include/asm-i386/smp.h Mon Jun 30 14:56:26 2003 +@@ -39,6 +39,7 @@ extern int smp_num_siblings; + extern int cpu_sibling_map[]; + + extern void smp_flush_tlb(void); ++extern void dump_send_ipi(void); + extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); + extern void smp_send_reschedule(int cpu); + extern void smp_invalidate_rcv(void); /* Process an NMI */ +--- linux-2.5.73/arch/i386/kernel/i386_ksyms.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:35 2003 ++++ linux-2.5.73-n9560/arch/i386/kernel/i386_ksyms.c Mon Jun 30 14:56:26 2003 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -33,6 +34,7 @@ + #include + #include + #include ++#include + + extern void dump_thread(struct pt_regs *, struct user *); + extern spinlock_t rtc_lock; +@@ -208,3 +210,20 @@ EXPORT_SYMBOL(kmap_atomic_to_page); + EXPORT_SYMBOL(edd); + EXPORT_SYMBOL(eddnr); + #endif ++ ++#ifdef CONFIG_CRASH_DUMP_MODULE ++#ifdef CONFIG_SMP ++extern irq_desc_t irq_desc[NR_IRQS]; ++extern unsigned long irq_affinity[NR_IRQS]; ++extern void stop_this_cpu(void *); ++EXPORT_SYMBOL(irq_desc); ++EXPORT_SYMBOL(irq_affinity); ++EXPORT_SYMBOL(stop_this_cpu); ++EXPORT_SYMBOL(dump_send_ipi); ++#endif ++extern int pfn_is_ram(unsigned long); ++EXPORT_SYMBOL(pfn_is_ram); ++#ifdef ARCH_HAS_NMI_WATCHDOG ++EXPORT_SYMBOL(touch_nmi_watchdog); ++#endif ++#endif +--- linux-2.5.73/arch/i386/kernel/nmi.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:55 2003 ++++ linux-2.5.73-n9560/arch/i386/kernel/nmi.c Mon Jun 30 14:56:26 2003 +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -426,6 +427,7 @@ void nmi_watchdog_tick (struct pt_regs * + bust_spinlocks(1); + printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip); + show_registers(regs); ++ dump("NMI Watchdog detected LOCKUP", regs); + printk("console shuts up ...\n"); + console_silent(); + spin_unlock(&nmi_print_lock); +--- linux-2.5.73/arch/i386/kernel/setup.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:44 2003 ++++ linux-2.5.73-n9560/arch/i386/kernel/setup.c Mon Jun 30 14:56:26 2003 +@@ -438,6 +438,7 @@ static void __init setup_memory_region(v + print_memory_map(who); + } /* setup_memory_region */ + ++unsigned long crashdump_addr = 0xdeadbeef; + + static void __init parse_cmdline_early (char ** cmdline_p) + { +@@ -531,6 +532,9 @@ static void __init parse_cmdline_early ( + if (c == ' ' && !memcmp(from, "highmem=", 8)) + highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT; + ++ if (c == ' ' && !memcmp(from, "crashdump=", 10)) ++ crashdump_addr = memparse(from+10, &from); ++ + c = *(from++); + if (!c) + break; +@@ -913,6 +917,8 @@ static int __init noreplacement_setup(ch + + __setup("noreplacement", noreplacement_setup); + ++extern void crashdump_reserve(void); ++ + void __init setup_arch(char **cmdline_p) + { + unsigned long max_low_pfn; +@@ -977,6 +983,10 @@ void __init setup_arch(char **cmdline_p) + generic_apic_probe(*cmdline_p); + #endif + ++#ifdef CONFIG_CRASH_DUMP_SOFTBOOT ++ crashdump_reserve(); /* Preserve crash dump state from prev boot */ ++#endif ++ + #ifdef CONFIG_ACPI_BOOT + /* + * Parse the ACPI tables for possible boot-time SMP configuration. +--- linux-2.5.73/arch/i386/kernel/smp.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:32 2003 ++++ linux-2.5.73-n9560/arch/i386/kernel/smp.c Mon Jun 30 16:01:58 2003 +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -144,6 +145,13 @@ inline void __send_IPI_shortcut(unsigned + */ + cfg = __prepare_ICR(shortcut, vector); + ++ if (vector == DUMP_VECTOR) { ++ /* ++ * Setup DUMP IPI to be delivered as an NMI ++ */ ++ cfg = (cfg&~APIC_VECTOR_MASK)|APIC_DM_NMI; ++ } ++ + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ +@@ -450,6 +458,11 @@ void flush_tlb_all(void) + on_each_cpu(do_flush_tlb_all, 0, 1, 1); + } + ++void dump_send_ipi(void) ++{ ++ send_IPI_allbutself(DUMP_VECTOR); ++} ++ + /* + * this function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing +@@ -528,7 +541,7 @@ int smp_call_function (void (*func) (voi + return 0; + } + +-static void stop_this_cpu (void * dummy) ++void stop_this_cpu (void * dummy) + { + /* + * Remove this CPU: +@@ -589,4 +602,3 @@ asmlinkage void smp_call_function_interr + atomic_inc(&call_data->finished); + } + } +- +--- linux-2.5.73/arch/i386/kernel/traps.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:35 2003 ++++ linux-2.5.73-n9560/arch/i386/kernel/traps.c Mon Jun 30 14:56:26 2003 +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_EISA + #include +@@ -258,6 +259,7 @@ void die(const char * str, struct pt_reg + handle_BUG(regs); + printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); + show_registers(regs); ++ dump((char *)str, regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + if (in_interrupt()) +--- linux-2.5.73/arch/i386/mm/init.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:06 2003 ++++ linux-2.5.73-n9560/arch/i386/mm/init.c Mon Jun 30 14:56:26 2003 +@@ -189,6 +189,12 @@ static inline int page_is_ram(unsigned l + return 0; + } + ++/* To enable modules to check if a page is in RAM */ ++int pfn_is_ram(unsigned long pfn) ++{ ++ return (page_is_ram(pfn)); ++} ++ + #ifdef CONFIG_HIGHMEM + pte_t *kmap_pte; + pgprot_t kmap_prot; +--- linux-2.5.73/arch/i386/boot/Makefile~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:42 2003 ++++ linux-2.5.73-n9560/arch/i386/boot/Makefile Mon Jun 30 14:56:26 2003 +@@ -101,3 +101,4 @@ zlilo: $(BOOTIMAGE) + + install: $(BOOTIMAGE) + sh $(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" ++ if [ -f init/kerntypes.o ]; then cp init/kerntypes.o $(INSTALL_PATH)/Kerntypes; fi +--- linux-2.5.73/arch/i386/Kconfig~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:34 2003 ++++ linux-2.5.73-n9560/arch/i386/Kconfig Mon Jun 30 14:56:26 2003 +@@ -1347,6 +1347,56 @@ source "arch/i386/oprofile/Kconfig" + + menu "Kernel hacking" + ++config CRASH_DUMP ++ tristate "Crash dump support (EXPERIMENTAL)" ++ depends on EXPERIMENTAL ++ default n ++ ---help--- ++ Say Y here to enable saving an image of system memory when a panic ++ or other error occurs. Dumps can also be forced with the SysRq+d ++ key if MAGIC_SYSRQ is enabled. ++ ++config CRASH_DUMP_BLOCKDEV ++ tristate "Crash dump block device driver" ++ depends on CRASH_DUMP ++ help ++ Say Y to allow saving crash dumps directly to a disk device. ++ ++config CRASH_DUMP_NETDEV ++ tristate "Crash dump network device driver" ++ depends on CRASH_DUMP ++ help ++ Say Y to allow saving crash dumps over a network device. ++ ++config CRASH_DUMP_MEMDEV ++ bool "Crash dump staged memory driver" ++ depends on CRASH_DUMP ++ help ++ Say Y to allow intermediate saving crash dumps in spare ++ memory pages which would then be written out to disk ++ later. ++ ++config CRASH_DUMP_SOFTBOOT ++ bool "Save crash dump across a soft reboot" ++ depends on CRASH_DUMP_MEMDEV ++ help ++ Say Y to allow a crash dump to be preserved in memory ++ pages across a soft reboot and written out to disk ++ thereafter. For this to work, CRASH_DUMP must be ++ configured as part of the kernel (not as a module). ++ ++config CRASH_DUMP_COMPRESS_RLE ++ tristate "Crash dump RLE compression" ++ depends on CRASH_DUMP ++ help ++ Say Y to allow saving dumps with Run Length Encoding compression. ++ ++config CRASH_DUMP_COMPRESS_GZIP ++ tristate "Crash dump GZIP compression" ++ depends on CRASH_DUMP ++ help ++ Say Y to allow saving dumps with Gnu Zip compression. ++ + config DEBUG_KERNEL + bool "Kernel debugging" + help +--- linux-2.5.73/arch/s390/boot/Makefile~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:36 2003 ++++ linux-2.5.73-n9560/arch/s390/boot/Makefile Mon Jun 30 14:56:26 2003 +@@ -16,4 +16,4 @@ $(obj)/image: vmlinux FORCE + + install: $(CONFIGURE) $(obj)/image + sh -x $(obj)/install.sh $(KERNELRELEASE) $(obj)/image \ +- System.map Kerntypes "$(INSTALL_PATH)" ++ System.map init/kerntypes.o "$(INSTALL_PATH)" +--- linux-2.5.73/arch/s390/boot/install.sh~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:57 2003 ++++ linux-2.5.73-n9560/arch/s390/boot/install.sh Mon Jun 30 14:56:26 2003 +@@ -16,7 +16,8 @@ + # $1 - kernel version + # $2 - kernel image file + # $3 - kernel map file +-# $4 - default install path (blank if root directory) ++# $4 - kernel type file ++# $5 - default install path (blank if root directory) + # + + # User may have a custom install script +@@ -26,13 +27,22 @@ if [ -x /sbin/installkernel ]; then exec + + # Default install - same as make zlilo + +-if [ -f $4/vmlinuz ]; then +- mv $4/vmlinuz $4/vmlinuz.old ++if [ -f $5/vmlinuz ]; then ++ mv $5/vmlinuz $5/vmlinuz.old + fi + +-if [ -f $4/System.map ]; then +- mv $4/System.map $4/System.old ++if [ -f $5/System.map ]; then ++ mv $5/System.map $5/System.old + fi + +-cat $2 > $4/vmlinuz +-cp $3 $4/System.map ++if [ -f $5/Kerntypes ]; then ++ mv $5/Kerntypes $5/Kerntypes.old ++fi ++ ++cat $2 > $5/vmlinuz ++cp $3 $5/System.map ++ ++# copy the kernel type file if it exists ++if [ -f $4 ]; then ++ cp $4 $5/Kerntypes ++fi +--- linux-2.5.73/scripts/mkcompile_h~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:36 2003 ++++ linux-2.5.73-n9560/scripts/mkcompile_h Mon Jun 30 14:56:26 2003 +@@ -33,7 +33,7 @@ UTS_VERSION="$UTS_VERSION `LANG=C date`" + + UTS_LEN=64 + UTS_TRUNCATE="sed -e s/\(.\{1,$UTS_LEN\}\).*/\1/" +- ++LINUX_COMPILE_VERSION_ID="__linux_compile_version_id__`hostname | tr -c '[0-9A-Za-z\n]' '__'`_`LANG=C date | tr -c '[0-9A-Za-z\n]' '_'`" + # Generate a temporary compile.h + + ( echo /\* This file is auto generated, version $VERSION \*/ +@@ -55,6 +55,8 @@ UTS_TRUNCATE="sed -e s/\(.\{1,$UTS_LEN\} + fi + + echo \#define LINUX_COMPILER \"`$CC -v 2>&1 | tail -1`\" ++ echo \#define LINUX_COMPILE_VERSION_ID $LINUX_COMPILE_VERSION_ID ++ echo \#define LINUX_COMPILE_VERSION_ID_TYPE typedef char* "$LINUX_COMPILE_VERSION_ID""_t" + ) > .tmpcompile + + # Only replace the real compile.h if the new one is different, +--- linux-2.5.73/kernel/ksyms.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:30 2003 ++++ linux-2.5.73-n9560/kernel/ksyms.c Mon Jun 30 14:56:26 2003 +@@ -59,6 +59,8 @@ + #include + #include + #include ++#include ++#include + + #if defined(CONFIG_PROC_FS) + #include +@@ -606,3 +608,9 @@ EXPORT_SYMBOL(ptrace_notify); + EXPORT_SYMBOL(console_printk); + + EXPORT_SYMBOL(current_kernel_time); ++ ++#ifdef CONFIG_CRASH_DUMP_MODULE ++EXPORT_SYMBOL(min_low_pfn); ++EXPORT_SYMBOL(dump_oncpu); ++EXPORT_SYMBOL(dump_function_ptr); ++#endif +--- linux-2.5.73/kernel/panic.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:18 2003 ++++ linux-2.5.73-n9560/kernel/panic.c Mon Jun 30 14:56:26 2003 +@@ -16,12 +16,16 @@ + #include + #include + #include ++#ifdef CONFIG_KEXEC ++#include ++#endif + + asmlinkage void sys_sync(void); /* it's really int */ + + int panic_timeout; + int panic_on_oops; + int tainted; ++void (*dump_function_ptr)(const char *, const struct pt_regs *) = 0; + + struct notifier_block *panic_notifier_list; + +@@ -54,6 +58,7 @@ NORET_TYPE void panic(const char * fmt, + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); ++ + printk(KERN_EMERG "Kernel panic: %s\n",buf); + if (in_interrupt()) + printk(KERN_EMERG "In interrupt handler - not syncing\n"); +@@ -76,6 +81,18 @@ NORET_TYPE void panic(const char * fmt, + * We can't use the "normal" timers since we just panicked.. + */ + printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout); ++#ifdef CONFIG_KEXEC ++{ ++ struct kimage *image; ++ image = xchg(&kexec_image, 0); ++ if (image) { ++ printk(KERN_EMERG "by starting a new kernel ..\n"); ++ mdelay(panic_timeout*1000); ++ machine_kexec(image); ++ } ++} ++#endif ++ + mdelay(panic_timeout*1000); + /* + * Should we run the reboot notifier. For the moment Im +--- linux-2.5.73/kernel/sched.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:08 2003 ++++ linux-2.5.73-n9560/kernel/sched.c Mon Jun 30 14:56:26 2003 +@@ -40,6 +40,9 @@ + #define cpu_to_node_mask(cpu) (cpu_online_map) + #endif + ++/* used to soft spin in sched while dump is in progress */ ++int dump_oncpu; ++ + /* + * Convert user-nice values [ -20 ... 0 ... 19 ] + * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], +@@ -1249,6 +1252,15 @@ asmlinkage void schedule(void) + struct list_head *queue; + int idx; + ++ /* ++ * If crash dump is in progress, this other cpu's ++ * need to wait until it completes. ++ * NB: this code is optimized away for kernels without ++ * dumping enabled. ++ */ ++ if (unlikely(dump_oncpu)) ++ goto dump_scheduling_disabled; ++ + /* + * Test if we are atomic. Since do_exit() needs to call into + * schedule() atomically, we ignore that path for now. +@@ -1336,6 +1348,16 @@ switch_tasks: + preempt_enable_no_resched(); + if (test_thread_flag(TIF_NEED_RESCHED)) + goto need_resched; ++ ++ return; ++ ++ dump_scheduling_disabled: ++ /* allow scheduling only if this is the dumping cpu */ ++ if (dump_oncpu != smp_processor_id()+1) { ++ while (dump_oncpu) ++ cpu_relax(); ++ } ++ return; + } + + #ifdef CONFIG_PREEMPT +--- linux-2.5.73/lib/Kconfig~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:56 2003 ++++ linux-2.5.73-n9560/lib/Kconfig Mon Jun 30 14:56:26 2003 +@@ -17,14 +17,16 @@ config CRC32 + # + config ZLIB_INFLATE + tristate +- default y if CRAMFS=y || PPP_DEFLATE=y || JFFS2_FS=y || ZISOFS_FS=y || BINFMT_ZFLAT=y || CRYPTO_DEFLATE=y +- default m if CRAMFS=m || PPP_DEFLATE=m || JFFS2_FS=m || ZISOFS_FS=m || BINFMT_ZFLAT=m || CRYPTO_DEFLATE=m ++ default y if CRAMFS=y || PPP_DEFLATE=y || JFFS2_FS=y || ZISOFS_FS=y || BINFMT_ZFLAT=y || CRYPTO_DEFLATE=y || CRASH_DUMP_COMPRESS_GZIP=y ++ default m if CRAMFS=m || PPP_DEFLATE=m || JFFS2_FS=m || ZISOFS_FS=m || BINFMT_ZFLAT=m || CRYPTO_DEFLATE=m || CRASH_DUMP_COMPRESS_GZIP=m + + config ZLIB_DEFLATE + tristate + default m if PPP_DEFLATE!=y && JFFS2_FS!=y && CRYPTO_DEFLATE!=y && \ +- (PPP_DEFLATE=m || JFFS2_FS=m || CRYPTO_DEFLATE=m) +- default y if PPP_DEFLATE=y || JFFS2_FS=y || CRYPTO_DEFLATE=y ++ (PPP_DEFLATE=m || JFFS2_FS=m || CRYPTO_DEFLATE=m \ ++ || CRASH_DUMP_COMPRESS_GZIP=m ) ++ default y if PPP_DEFLATE=y || JFFS2_FS=y || CRYPTO_DEFLATE=y \ ++ || CRASH_DUMP_COMPRESS_GZIP=y + + endmenu + +--- linux-2.5.73/mm/page_alloc.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:32 2003 ++++ linux-2.5.73-n9560/mm/page_alloc.c Mon Jun 30 14:56:26 2003 +@@ -84,7 +84,8 @@ static void bad_page(const char *functio + page->mapping = NULL; + } + +-#ifndef CONFIG_HUGETLB_PAGE ++#if !defined(CONFIG_HUGETLB_PAGE) && !defined(CONFIG_CRASH_DUMP) \ ++ && !defined(CONFIG_CRASH_DUMP_MODULE) + #define prep_compound_page(page, order) do { } while (0) + #define destroy_compound_page(page, order) do { } while (0) + #else +--- linux-2.5.73/init/Makefile~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:42 2003 ++++ linux-2.5.73-n9560/init/Makefile Mon Jun 30 14:56:26 2003 +@@ -9,6 +9,9 @@ mounts-$(CONFIG_BLK_DEV_RAM) += do_mount + mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o + mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o + ++extra-$(CONFIG_CRASH_DUMP) += kerntypes.o ++CFLAGS_kerntypes.o := -gstabs ++ + # files to be removed upon make clean + clean-files := ../include/linux/compile.h + +@@ -24,3 +27,4 @@ $(obj)/version.o: include/linux/compile. + include/linux/compile.h: FORCE + @echo ' CHK $@' + @sh $(srctree)/scripts/mkcompile_h $@ "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CC) $(CFLAGS)" ++ +--- linux-2.5.73/init/main.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:35 2003 ++++ linux-2.5.73-n9560/init/main.c Mon Jun 30 14:56:26 2003 +@@ -101,6 +101,16 @@ extern void ipc_init(void); + int system_running = 0; + + /* ++ * The kernel_magic value represents the address of _end, which allows ++ * namelist tools to "match" each other respectively. That way a tool ++ * that looks at /dev/mem can verify that it is using the right System.map ++ * file -- if kernel_magic doesn't equal the namelist value of _end, ++ * something's wrong. ++ */ ++extern unsigned long _end; ++unsigned long *kernel_magic = &_end; ++ ++/* + * Boot command-line arguments + */ + #define MAX_INIT_ARGS 8 +--- linux-2.5.73/init/version.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:45 2003 ++++ linux-2.5.73-n9560/init/version.c Mon Jun 30 14:56:26 2003 +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + #define version(a) Version_ ## a + #define version_string(a) version(a) +@@ -24,3 +25,6 @@ struct new_utsname system_utsname = { + const char *linux_banner = + "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n"; ++ ++const char *LINUX_COMPILE_VERSION_ID = __stringify(LINUX_COMPILE_VERSION_ID); ++LINUX_COMPILE_VERSION_ID_TYPE; + +_ -- 1.8.3.1