From f26480e53ba2cbbe2358bc59c1ef03faf877bc1f Mon Sep 17 00:00:00 2001 From: ericm Date: Fri, 10 Oct 2003 09:31:58 +0000 Subject: [PATCH] merge b_devel to b_eq: 20031010 kernel only --- lustre/include/linux/lustre_log.h | 83 +- .../kernel_patches/patches/ext3-noread-inode.patch | 177 - .../patches/inode-protection-from-pdflush.patch | 29 - .../kernel_patches/patches/kexec-2.5.73-full.patch | 1479 ----- lustre/kernel_patches/patches/kgdb-ga-2.5.73.patch | 5046 --------------- .../patches/kgdb-ga-docco-fixes-2.5.73.patch | 347 -- .../patches/kgdb-use-ggdb-2.5.73.patch | 17 - .../kernel_patches/patches/lkcd-cvs-2.5.69.patch | 6418 -------------------- .../patches/lkcd-kernel-changes-2.5.73.patch | 608 -- .../patches/vfs_intent_2.5.72_rev1.patch | 1031 ---- .../patches/vfs_nointent_2.5.69_rev1.patch | 409 -- lustre/kernel_patches/pc/ext3-noread-inode.pc | 3 - .../pc/inode-protection-from-pdflush.pc | 2 - lustre/kernel_patches/pc/kexec-2.5.73-full.pc | 23 - lustre/kernel_patches/pc/kgdb-ga-2.5.73.pc | 28 - .../pc/kgdb-ga-docco-fixes-2.5.73.pc | 1 - lustre/kernel_patches/pc/kgdb-use-ggdb-2.5.73.pc | 1 - lustre/kernel_patches/pc/lkcd-cvs-2.5.69.pc | 19 - .../pc/lkcd-kernel-changes-2.5.73.pc | 25 - lustre/kernel_patches/pc/vfs_intent_2.5.72_rev1.pc | 14 - .../kernel_patches/pc/vfs_nointent_2.5.69_rev1.pc | 4 - lustre/ldlm/Makefile.mk | 2 +- lustre/llite/llite_lib.c | 263 +- lustre/lov/lov_internal.h | 24 +- lustre/obdfilter/filter_internal.h | 28 +- lustre/obdfilter/filter_io.c | 7 +- lustre/obdfilter/filter_log.c | 117 +- lustre/obdfilter/filter_san.c | 8 +- lustre/ptlrpc/Makefile.mk | 2 +- lustre/ptlrpc/recov_thread.c | 58 +- 30 files changed, 505 insertions(+), 15768 deletions(-) delete mode 100644 lustre/kernel_patches/patches/ext3-noread-inode.patch delete mode 100644 lustre/kernel_patches/patches/inode-protection-from-pdflush.patch delete mode 100644 lustre/kernel_patches/patches/kexec-2.5.73-full.patch delete mode 100644 lustre/kernel_patches/patches/kgdb-ga-2.5.73.patch delete mode 100644 lustre/kernel_patches/patches/kgdb-ga-docco-fixes-2.5.73.patch delete mode 100644 lustre/kernel_patches/patches/kgdb-use-ggdb-2.5.73.patch delete mode 100644 lustre/kernel_patches/patches/lkcd-cvs-2.5.69.patch delete mode 100644 lustre/kernel_patches/patches/lkcd-kernel-changes-2.5.73.patch delete mode 100644 lustre/kernel_patches/patches/vfs_intent_2.5.72_rev1.patch delete mode 100644 lustre/kernel_patches/patches/vfs_nointent_2.5.69_rev1.patch delete mode 100644 lustre/kernel_patches/pc/ext3-noread-inode.pc delete mode 100644 lustre/kernel_patches/pc/inode-protection-from-pdflush.pc delete mode 100644 lustre/kernel_patches/pc/kexec-2.5.73-full.pc delete mode 100644 lustre/kernel_patches/pc/kgdb-ga-2.5.73.pc delete mode 100644 lustre/kernel_patches/pc/kgdb-ga-docco-fixes-2.5.73.pc delete mode 100644 lustre/kernel_patches/pc/kgdb-use-ggdb-2.5.73.pc delete mode 100644 lustre/kernel_patches/pc/lkcd-cvs-2.5.69.pc delete mode 100644 lustre/kernel_patches/pc/lkcd-kernel-changes-2.5.73.pc delete mode 100644 lustre/kernel_patches/pc/vfs_intent_2.5.72_rev1.pc delete mode 100644 lustre/kernel_patches/pc/vfs_nointent_2.5.69_rev1.pc diff --git a/lustre/include/linux/lustre_log.h b/lustre/include/linux/lustre_log.h index 59bba98..6c53b52 100644 --- a/lustre/include/linux/lustre_log.h +++ b/lustre/include/linux/lustre_log.h @@ -73,8 +73,9 @@ struct llog_handle { /* llog.c - general API */ typedef int (*llog_cb_t)(struct llog_handle *, struct llog_rec_hdr *, void *); int llog_init_handle(struct llog_handle *handle, int flags, struct obd_uuid *uuid); -extern void llog_free_handle(struct llog_handle *handle); int llog_process(struct llog_handle *loghandle, llog_cb_t cb, void *data); +extern struct llog_handle *llog_alloc_handle(void); +extern void llog_free_handle(struct llog_handle *handle); extern int llog_close(struct llog_handle *cathandle); /* llog_cat.c - catalog api */ @@ -89,13 +90,37 @@ int llog_cat_cancel_records(struct llog_handle *cathandle, int count, struct llog_cookie *cookies); int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); -extern struct llog_handle *llog_alloc_handle(void); -extern int llog_init_catalog(struct llog_handle *cathandle, - struct obd_uuid *tgtuuid); -extern int llog_delete_log(struct llog_handle *cathandle, - struct llog_handle *loghandle); -extern struct llog_handle *llog_new_log(struct llog_handle *cathandle, - struct obd_uuid *tgtuuid); +/* llog_obd.c */ +int obd_llog_setup(struct obd_device *obd, struct obd_device *disk_obd, + int index, int count, struct llog_logid *logid); +int obd_llog_cleanup(struct obd_device *obd); +int obd_llog_origin_add(struct obd_export *exp, + int index, + struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, + struct llog_cookie *logcookies, int numcookies); +int obd_llog_repl_cancel(struct obd_device *, struct lov_stripe_md *lsm, + int count, struct llog_cookie *cookies, int flags); + +int llog_obd_setup_logid(struct obd_device *obd, struct obd_device *disk_obd, + int index, int count, struct llog_logid *logid); +int llog_obd_cleanup(struct obd_device *obd); +int llog_obd_origin_add(struct obd_export *exp, + int index, + struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, + struct llog_cookie *logcookies, int numcookies); +int llog_cat_initialize(struct obd_device *obd, int count); + + +/* llog_net.c */ +int llog_initiator_connect(struct obd_device *obd); +int llog_receptor_accept(struct obd_device *obd, struct obd_import *imp); +int llog_origin_handle_cancel(struct obd_device *obd, struct ptlrpc_request *req); + +/* recov_thread.c */ +int llog_obd_repl_cancel(struct obd_device *obd, + struct lov_stripe_md *lsm, int count, + struct llog_cookie *cookies, int flags); + struct llog_operations { int (*lop_write_rec)(struct llog_handle *loghandle, struct llog_rec_hdr *rec, @@ -107,13 +132,20 @@ struct llog_operations { int (*lop_next_block)(struct llog_handle *h, int *curr_idx, int next_idx, - __u64 *cur_offset, + __u64 *offset, void *buf, int len); int (*lop_create)(struct obd_device *obd, struct llog_handle **, struct llog_logid *logid, char *name); int (*lop_close)(struct llog_handle *handle); int (*lop_read_header)(struct llog_handle *handle); + /* for devices in stacks, using other obd's for log storage */ + int (*lop_origin_setup)(struct obd_device *, int); + int (*lop_origin_cleanup)(struct obd_device *); + int (*lop_origin_open)(struct obd_device *originator, + struct obd_device *disk_obd, + int index, int named, int flags, + struct obd_uuid *log_uuid); }; extern struct llog_operations llog_lvfs_ops; @@ -253,4 +285,37 @@ static inline int llog_create(struct obd_device *obd, struct llog_handle **res, RETURN(rc); } + +/* llog obd interfaces */ +#define LLOG_OBD_MAX_HANDLES 3 + +/* MDS stored handles in OSC */ +#define LLOG_OBD_DEL_LOG_HANDLE 0 + +/* OBDFILTER stored handles in OBDFILTER */ +#define LLOG_OBD_SZ_LOG_HANDLE 0 +#define LLOG_OBD_RD1_LOG_HANDLE 1 + +struct llog_obd_ctxt { + struct obd_device *loc_obd; + struct llog_handle *loc_handles[LLOG_OBD_MAX_HANDLES]; + struct llog_commit_data *loc_llcd; + struct semaphore loc_sem; /* protects loc_llcd */ + struct obd_import *loc_imp; +}; + +void llog_obd_cleanup_ctxt(struct obd_device *obd); +int obd_log_cancel(struct obd_export *exp, struct llog_handle *cathandle, + void *buf, int count, struct llog_cookie *cookies, int flags); + + +int llog_originator_setup(struct obd_device *, int); +int llog_originator_cleanup(struct obd_device *); +int llog_originator_open(struct obd_device *originator, + struct obd_device *disk_obd, + int index, int named, int flags, + struct obd_uuid *log_uuid); + + + #endif diff --git a/lustre/kernel_patches/patches/ext3-noread-inode.patch b/lustre/kernel_patches/patches/ext3-noread-inode.patch deleted file mode 100644 index a50d12b..0000000 --- a/lustre/kernel_patches/patches/ext3-noread-inode.patch +++ /dev/null @@ -1,177 +0,0 @@ - -ext3_get_inode_loc() read inode's block only if: - 1) this inode has no copy in memory - 2) inode's block has another valid inode(s) - -this optimization allows to avoid needless I/O in two cases: -1) just allocated inode is first valid in the inode's block -2) kernel wants to write inode, but buffer in which inode - belongs to gets freed by VM - - - - -diff -puN fs/ext3/inode.c~ext3-noread-inode fs/ext3/inode.c ---- linux-2.5.73/fs/ext3/inode.c~ext3-noread-inode Thu Jul 10 12:03:52 2003 -+++ linux-2.5.73-alexey/fs/ext3/inode.c Thu Jul 10 15:52:59 2003 -@@ -2286,7 +2286,7 @@ out_stop: - * inode's underlying buffer_head on success. - */ - --int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) -+int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc, int in_mem) - { - struct buffer_head *bh = 0; - unsigned long block; -@@ -2328,12 +2328,88 @@ int ext3_get_inode_loc (struct inode *in - EXT3_INODE_SIZE(inode->i_sb); - block = le32_to_cpu(gdp[desc].bg_inode_table) + - (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); -- if (!(bh = sb_bread(inode->i_sb, block))) { -+ if (!(bh = sb_getblk(inode->i_sb, block))) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "unable to read inode block - " - "inode=%lu, block=%lu", inode->i_ino, block); - goto bad_inode; - } -+ if (!buffer_uptodate(bh)) { -+ lock_buffer(bh); -+ if (buffer_uptodate(bh)) { -+ /* someone has already initialized buffer */ -+ unlock_buffer(bh); -+ goto has_buffer; -+ } -+ -+ /* we can't skip I/O if inode is on a disk only */ -+ if (in_mem) { -+ struct buffer_head *bitmap_bh; -+ struct ext3_group_desc *desc; -+ int inodes_per_buffer; -+ int inode_offset, i; -+ int start; -+ -+ /* -+ * if this inode is only valid in buffer we need not I/O -+ */ -+ inodes_per_buffer = bh->b_size / -+ EXT3_INODE_SIZE(inode->i_sb); -+ inode_offset = ((inode->i_ino - 1) % -+ EXT3_INODES_PER_GROUP(inode->i_sb)); -+ start = inode_offset & ~(inodes_per_buffer - 1); -+ -+ /* check is inode bitmap is in cache? */ -+ desc = ext3_get_group_desc(inode->i_sb, block_group, NULL); -+ if (!desc) -+ goto make_io; -+ -+ bitmap_bh = sb_getblk(inode->i_sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ if (!bitmap_bh) -+ goto make_io; -+ -+ /* -+ * if inode bitmap isn't in cache then we may end up by 2 reads -+ * instead of 1 read before optimizing. skip it -+ */ -+ if (!buffer_uptodate(bitmap_bh)) { -+ brelse(bitmap_bh); -+ goto make_io; -+ } -+ for (i = start; i < start + inodes_per_buffer; i++) { -+ if (i == inode_offset) -+ continue; -+ if (ext3_test_bit(i, bitmap_bh->b_data)) -+ break; -+ } -+ brelse(bitmap_bh); -+ if (i == start + inodes_per_buffer) { -+ /* all inodes (but our) are free. so, we skip I/O */ -+ memset(bh->b_data, 0, bh->b_size); -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ goto has_buffer; -+ } -+ } -+ -+make_io: -+ /* -+ * No, there are another valid inodes in the buffer -+ * so, to preserve them we have to read buffer from -+ * the disk -+ */ -+ get_bh(bh); -+ bh->b_end_io = end_buffer_io_sync; -+ submit_bh(READ, bh); -+ wait_on_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ ext3_error (inode->i_sb, "ext3_get_inode_loc", -+ "unable to read inode block - " -+ "inode=%lu, block=%lu", inode->i_ino, block); -+ goto bad_inode; -+ } -+ } -+ has_buffer: - offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); - - iloc->bh = bh; -@@ -2376,7 +2452,7 @@ void ext3_read_inode(struct inode * inod - endif - if (ext3_iopen_get_inode(inode)) - return; -- if (ext3_get_inode_loc(inode, &iloc)) -+ if (ext3_get_inode_loc(inode, &iloc, 0)) - goto bad_inode; - bh = iloc.bh; - raw_inode = iloc.raw_inode; -@@ -2781,7 +2857,7 @@ ext3_reserve_inode_write(handle_t *handl - { - int err = 0; - if (handle) { -- err = ext3_get_inode_loc(inode, iloc); -+ err = ext3_get_inode_loc(inode, iloc, 1); - if (!err) { - BUFFER_TRACE(iloc->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, iloc->bh); -@@ -2879,7 +2955,7 @@ ext3_pin_inode(handle_t *handle, struct - - int err = 0; - if (handle) { -- err = ext3_get_inode_loc(inode, &iloc); -+ err = ext3_get_inode_loc(inode, &iloc, 1); - if (!err) { - BUFFER_TRACE(iloc.bh, "get_write_access"); - err = journal_get_write_access(handle, iloc.bh); -diff -puN fs/ext3/ialloc.c~ext3-noread-inode fs/ext3/ialloc.c ---- linux-2.5.73/fs/ext3/ialloc.c~ext3-noread-inode Thu Jul 10 13:05:37 2003 -+++ linux-2.5.73-alexey/fs/ext3/ialloc.c Thu Jul 10 13:06:12 2003 -@@ -50,7 +50,7 @@ - * - * Return buffer_head of bitmap on success or NULL. - */ --static struct buffer_head * -+struct buffer_head * - read_inode_bitmap(struct super_block * sb, unsigned long block_group) - { - struct ext3_group_desc *desc; -diff -puN include/linux/ext3_fs.h~ext3-noread-inode include/linux/ext3_fs.h ---- linux-2.5.73/include/linux/ext3_fs.h~ext3-noread-inode Thu Jul 10 13:41:59 2003 -+++ linux-2.5.73-alexey/include/linux/ext3_fs.h Thu Jul 10 14:40:13 2003 -@@ -717,6 +717,8 @@ extern unsigned long ext3_count_free_ino - extern unsigned long ext3_count_dirs (struct super_block *); - extern void ext3_check_inodes_bitmap (struct super_block *); - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); -+extern struct buffer_head * read_inode_bitmap(struct super_block *, unsigned long); -+ - - - /* inode.c */ -@@ -724,7 +726,7 @@ extern int ext3_forget(handle_t *, int, - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - --extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *); -+extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *, int); - extern void ext3_read_inode (struct inode *); - extern void ext3_write_inode (struct inode *, int); - extern int ext3_setattr (struct dentry *, struct iattr *); - -_ diff --git a/lustre/kernel_patches/patches/inode-protection-from-pdflush.patch b/lustre/kernel_patches/patches/inode-protection-from-pdflush.patch deleted file mode 100644 index abfdc32..0000000 --- a/lustre/kernel_patches/patches/inode-protection-from-pdflush.patch +++ /dev/null @@ -1,29 +0,0 @@ - -diff -puN fs/fs-writeback.c~inode-protection-from-pdflush fs/fs-writeback.c ---- linux-2.5.73/fs/fs-writeback.c~inode-protection-from-pdflush Mon Jul 7 01:10:17 2003 -+++ linux-2.5.73-alexey/fs/fs-writeback.c Mon Jul 7 01:10:17 2003 -@@ -198,6 +198,11 @@ static void - __writeback_single_inode(struct inode *inode, - struct writeback_control *wbc) - { -+ if (inode->i_flags & I_SKIP_PDFLUSH) { -+ list_move(&inode->i_list, &inode->i_sb->s_dirty); -+ return; -+ } -+ - if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) { - list_move(&inode->i_list, &inode->i_sb->s_dirty); - return; -diff -puN include/linux/fs.h~inode-protection-from-pdflush include/linux/fs.h ---- linux-2.5.73/include/linux/fs.h~inode-protection-from-pdflush Mon Jul 7 01:22:16 2003 -+++ linux-2.5.73-alexey/include/linux/fs.h Mon Jul 7 01:23:18 2003 -@@ -816,6 +816,7 @@ struct super_operations { - #define I_FREEING 16 - #define I_CLEAR 32 - #define I_NEW 64 -+#define I_SKIP_PDFLUSH 1024 /* inode will be out of pdflush's scope */ - - #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) - - -_ diff --git a/lustre/kernel_patches/patches/kexec-2.5.73-full.patch b/lustre/kernel_patches/patches/kexec-2.5.73-full.patch deleted file mode 100644 index 3f45a06..0000000 --- a/lustre/kernel_patches/patches/kexec-2.5.73-full.patch +++ /dev/null @@ -1,1479 +0,0 @@ -# This is a BitKeeper generated patch for the following project: -# Project Name: Linux kernel tree -# This patch format is intended for GNU patch command version 2.5 or higher. -# This patch includes the following deltas: -# ChangeSet 1.1376 -> 1.1380 -# arch/i386/kernel/smp.c 1.32 -> 1.33 -# kernel/sys.c 1.47 -> 1.48 -# arch/i386/Kconfig 1.62 -> 1.63 -# arch/i386/kernel/Makefile 1.44 -> 1.45 -# kernel/Makefile 1.28 -> 1.29 -# arch/i386/kernel/entry.S 1.64 -> 1.65 -# arch/i386/kernel/reboot.c 1.8 -> 1.9 -# arch/i386/kernel/io_apic.c 1.71 -> 1.72 -# arch/i386/kernel/dmi_scan.c 1.36 -> 1.37 -# fs/aio.c 1.32 -> 1.33 -# include/asm-i386/apicdef.h 1.8 -> 1.9 -# MAINTAINERS 1.149 -> 1.150 -# include/asm-i386/unistd.h 1.26 -> 1.27 -# arch/i386/defconfig 1.96 -> 1.97 -# arch/i386/kernel/i8259.c 1.25 -> 1.26 -# include/asm-i386/apic.h 1.13 -> 1.14 -# arch/i386/kernel/apic.c 1.42 -> 1.43 -# include/linux/reboot.h 1.4 -> 1.5 -# (new) -> 1.1 include/linux/kexec.h -# (new) -> 1.1 include/asm-i386/kexec.h -# (new) -> 1.1 kernel/kexec.c -# (new) -> 1.1 arch/i386/kernel/relocate_kernel.S -# (new) -> 1.1 arch/i386/kernel/machine_kexec.c -# -# The following is the BitKeeper ChangeSet Log -# -------------------------------------------- -# 03/06/23 andyp@andyp.pdx.osdl.net 1.1377 -# kexec2-2.5.73-common.patch -# -------------------------------------------- -# 03/06/23 andyp@andyp.pdx.osdl.net 1.1378 -# kexec2-2.5.73-x86.patch -# -------------------------------------------- -# 03/06/23 andyp@andyp.pdx.osdl.net 1.1379 -# kexec2-2.5.73-syscall.patch -# -------------------------------------------- -# 03/06/23 andyp@andyp.pdx.osdl.net 1.1380 -# kexec2-2.5.73-defconfig.patch -# -------------------------------------------- -# -diff -Nru a/MAINTAINERS b/MAINTAINERS ---- a/MAINTAINERS Mon Jun 23 12:22:26 2003 -+++ b/MAINTAINERS Mon Jun 23 12:22:26 2003 -@@ -1067,6 +1067,17 @@ - W: http://www.cse.unsw.edu.au/~neilb/patches/linux-devel/ - S: Maintained - -+KEXEC -+P: Eric Biederman -+M: ebiederm@xmission.com -+M: ebiederman@lnxi.com -+W: http://www.xmission.com/~ebiederm/files/kexec/ -+P: Andy Pfiffer -+M: andyp@osdl.org -+W: http://www.osdl.org/archive/andyp/bloom/Code/Linux/Kexec/ -+L: linux-kernel@vger.kernel.org -+S: Maintained -+ - LANMEDIA WAN CARD DRIVER - P: Andrew Stanley-Jones - M: asj@lanmedia.com -diff -Nru a/arch/i386/Kconfig b/arch/i386/Kconfig ---- a/arch/i386/Kconfig Mon Jun 23 12:22:26 2003 -+++ b/arch/i386/Kconfig Mon Jun 23 12:22:26 2003 -@@ -794,6 +794,23 @@ - depends on ((X86_SUMMIT || X86_GENERICARCH) && NUMA) - default y - -+config KEXEC -+ bool "kexec system call (EXPERIMENTAL)" -+ depends on EXPERIMENTAL -+ help -+ kexec is a system call that implements the ability to shutdown your -+ current kernel, and to start another kernel. It is like a reboot -+ but it is indepedent of the system firmware. And like a reboot -+ you can start any kernel with it not just Linux. -+ -+ The name comes from the similiarity to the exec system call. -+ -+ It is on an going process to be certain the hardware in a machine -+ is properly shutdown, so do not be surprised if this code does not -+ initially work for you. It may help to enable device hotplugging -+ support. As of this writing the exact hardware interface is -+ strongly in flux, so no good recommendation can be made. -+ - endmenu - - -diff -Nru a/arch/i386/defconfig b/arch/i386/defconfig ---- a/arch/i386/defconfig Mon Jun 23 12:22:26 2003 -+++ b/arch/i386/defconfig Mon Jun 23 12:22:26 2003 -@@ -72,6 +72,7 @@ - CONFIG_X86_LOCAL_APIC=y - CONFIG_X86_IO_APIC=y - CONFIG_NR_CPUS=32 -+CONFIG_KEXEC=y - CONFIG_X86_MCE=y - # CONFIG_X86_MCE_NONFATAL is not set - CONFIG_X86_MCE_P4THERMAL=y -diff -Nru a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile ---- a/arch/i386/kernel/Makefile Mon Jun 23 12:22:26 2003 -+++ b/arch/i386/kernel/Makefile Mon Jun 23 12:22:26 2003 -@@ -24,6 +24,7 @@ - obj-$(CONFIG_X86_MPPARSE) += mpparse.o - obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o - obj-$(CONFIG_X86_IO_APIC) += io_apic.o -+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o - obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o suspend_asm.o - obj-$(CONFIG_X86_NUMAQ) += numaq.o - obj-$(CONFIG_X86_SUMMIT) += summit.o -diff -Nru a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c ---- a/arch/i386/kernel/apic.c Mon Jun 23 12:22:26 2003 -+++ b/arch/i386/kernel/apic.c Mon Jun 23 12:22:26 2003 -@@ -26,6 +26,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -175,6 +176,39 @@ - outb(0x70, 0x22); - outb(0x00, 0x23); - } -+#ifdef CONFIG_KEXEC -+ else { -+ /* Go back to Virtual Wire compatibility mode */ -+ unsigned long value; -+ -+ /* For the spurious interrupt use vector F, and enable it */ -+ value = apic_read(APIC_SPIV); -+ value &= ~APIC_VECTOR_MASK; -+ value |= APIC_SPIV_APIC_ENABLED; -+ value |= 0xf; -+ apic_write_around(APIC_SPIV, value); -+ -+ /* For LVT0 make it edge triggered, active high, external and enabled */ -+ value = apic_read(APIC_LVT0); -+ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | -+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | -+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); -+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; -+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXINT); -+ apic_write_around(APIC_LVT0, value); -+ -+ /* For LVT1 make it edge triggered, active high, nmi and enabled */ -+ value = apic_read(APIC_LVT1); -+ value &= ~( -+ APIC_MODE_MASK | APIC_SEND_PENDING | -+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | -+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); -+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; -+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); -+ apic_write_around(APIC_LVT1, value); -+ } -+#endif /* CONFIG_KEXEC */ -+ - } - - void disable_local_APIC(void) -@@ -1113,6 +1147,26 @@ - printk (KERN_INFO "APIC error on CPU%d: %02lx(%02lx)\n", - smp_processor_id(), v , v1); - irq_exit(); -+} -+ -+void stop_apics(void) -+{ -+ /* By resetting the APIC's we disable the nmi watchdog */ -+#if CONFIG_SMP -+ /* -+ * Stop all CPUs and turn off local APICs and the IO-APIC, so -+ * other OSs see a clean IRQ state. -+ */ -+ smp_send_stop(); -+#else -+ disable_local_APIC(); -+#endif -+#if defined(CONFIG_X86_IO_APIC) -+ if (smp_found_config) { -+ disable_IO_APIC(); -+ } -+#endif -+ disconnect_bsp_APIC(); - } - - /* -diff -Nru a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c ---- a/arch/i386/kernel/dmi_scan.c Mon Jun 23 12:22:26 2003 -+++ b/arch/i386/kernel/dmi_scan.c Mon Jun 23 12:22:26 2003 -@@ -222,31 +222,6 @@ - return 0; - } - --/* -- * Some machines require the "reboot=s" commandline option, this quirk makes that automatic. -- */ --static __init int set_smp_reboot(struct dmi_blacklist *d) --{ --#ifdef CONFIG_SMP -- extern int reboot_smp; -- if (reboot_smp == 0) -- { -- reboot_smp = 1; -- printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident); -- } --#endif -- return 0; --} -- --/* -- * Some machines require the "reboot=b,s" commandline option, this quirk makes that automatic. -- */ --static __init int set_smp_bios_reboot(struct dmi_blacklist *d) --{ -- set_smp_reboot(d); -- set_bios_reboot(d); -- return 0; --} - - /* - * Some bioses have a broken protected mode poweroff and need to use realmode -@@ -527,7 +502,7 @@ - MATCH(DMI_BIOS_VERSION, "4.60 PGMA"), - MATCH(DMI_BIOS_DATE, "134526184"), NO_MATCH - } }, -- { set_smp_bios_reboot, "Dell PowerEdge 1300", { /* Handle problems with rebooting on Dell 1300's */ -+ { set_bios_reboot, "Dell PowerEdge 1300", { /* Handle problems with rebooting on Dell 1300's */ - MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"), - NO_MATCH, NO_MATCH -diff -Nru a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S ---- a/arch/i386/kernel/entry.S Mon Jun 23 12:22:26 2003 -+++ b/arch/i386/kernel/entry.S Mon Jun 23 12:22:26 2003 -@@ -876,5 +876,6 @@ - .long sys_clock_nanosleep - .long sys_statfs64 - .long sys_fstatfs64 -+ .long sys_kexec_load /* 270 */ - - nr_syscalls=(.-sys_call_table)/4 -diff -Nru a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c ---- a/arch/i386/kernel/i8259.c Mon Jun 23 12:22:26 2003 -+++ b/arch/i386/kernel/i8259.c Mon Jun 23 12:22:26 2003 -@@ -244,9 +244,21 @@ - return 0; - } - -+static int i8259A_shutdown(struct sys_device *dev) -+{ -+ /* Put the i8259A into a quiescent state that -+ * the kernel initialization code can get it -+ * out of. -+ */ -+ outb(0xff, 0x21); /* mask all of 8259A-1 */ -+ outb(0xff, 0xA1); /* mask all of 8259A-1 */ -+ return 0; -+} -+ - static struct sysdev_class i8259_sysdev_class = { - set_kset_name("i8259"), - .resume = i8259A_resume, -+ .shutdown = i8259A_shutdown, - }; - - static struct sys_device device_i8259A = { -diff -Nru a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c ---- a/arch/i386/kernel/io_apic.c Mon Jun 23 12:22:26 2003 -+++ b/arch/i386/kernel/io_apic.c Mon Jun 23 12:22:26 2003 -@@ -1562,8 +1562,6 @@ - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); -- -- disconnect_bsp_APIC(); - } - - /* -diff -Nru a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c ---- /dev/null Wed Dec 31 16:00:00 1969 -+++ b/arch/i386/kernel/machine_kexec.c Mon Jun 23 12:22:26 2003 -@@ -0,0 +1,116 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+/* -+ * machine_kexec -+ * ======================= -+ */ -+ -+ -+static void set_idt(void *newidt, __u16 limit) -+{ -+ unsigned char curidt[6]; -+ -+ /* ia32 supports unaliged loads & stores */ -+ (*(__u16 *)(curidt)) = limit; -+ (*(__u32 *)(curidt +2)) = (unsigned long)(newidt); -+ -+ __asm__ __volatile__ ( -+ "lidt %0\n" -+ : "=m" (curidt) -+ ); -+}; -+ -+ -+static void set_gdt(void *newgdt, __u16 limit) -+{ -+ unsigned char curgdt[6]; -+ -+ /* ia32 supports unaliged loads & stores */ -+ (*(__u16 *)(curgdt)) = limit; -+ (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt); -+ -+ __asm__ __volatile__ ( -+ "lgdt %0\n" -+ : "=m" (curgdt) -+ ); -+}; -+ -+static void load_segments(void) -+{ -+#define __STR(X) #X -+#define STR(X) __STR(X) -+ -+ __asm__ __volatile__ ( -+ "\tljmp $"STR(__KERNEL_CS)",$1f\n" -+ "\t1:\n" -+ "\tmovl $"STR(__KERNEL_DS)",%eax\n" -+ "\tmovl %eax,%ds\n" -+ "\tmovl %eax,%es\n" -+ "\tmovl %eax,%fs\n" -+ "\tmovl %eax,%gs\n" -+ "\tmovl %eax,%ss\n" -+ ); -+#undef STR -+#undef __STR -+} -+ -+typedef void (*relocate_new_kernel_t)( -+ unsigned long indirection_page, unsigned long reboot_code_buffer, -+ unsigned long start_address); -+ -+const extern unsigned char relocate_new_kernel[]; -+extern void relocate_new_kernel_end(void); -+const extern unsigned int relocate_new_kernel_size; -+extern void use_mm(struct mm_struct *mm); -+ -+void machine_kexec(struct kimage *image) -+{ -+ unsigned long indirection_page; -+ unsigned long reboot_code_buffer; -+ relocate_new_kernel_t rnk; -+ -+ /* switch to an mm where the reboot_code_buffer is identity mapped */ -+ use_mm(&init_mm); -+ stop_apics(); -+ -+ /* Interrupts aren't acceptable while we reboot */ -+ local_irq_disable(); -+ reboot_code_buffer = page_to_pfn(image->reboot_code_pages) << PAGE_SHIFT; -+ indirection_page = image->head & PAGE_MASK; -+ -+ /* copy it out */ -+ memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size); -+ -+ /* The segment registers are funny things, they are -+ * automatically loaded from a table, in memory wherever you -+ * set them to a specific selector, but this table is never -+ * accessed again you set the segment to a different selector. -+ * -+ * The more common model is are caches where the behide -+ * the scenes work is done, but is also dropped at arbitrary -+ * times. -+ * -+ * I take advantage of this here by force loading the -+ * segments, before I zap the gdt with an invalid value. -+ */ -+ load_segments(); -+ /* The gdt & idt are now invalid. -+ * If you want to load them you must set up your own idt & gdt. -+ */ -+ set_gdt(phys_to_virt(0),0); -+ set_idt(phys_to_virt(0),0); -+ -+ /* now call it */ -+ rnk = (relocate_new_kernel_t) reboot_code_buffer; -+ (*rnk)(indirection_page, reboot_code_buffer, image->start); -+} -diff -Nru a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c ---- a/arch/i386/kernel/reboot.c Mon Jun 23 12:22:26 2003 -+++ b/arch/i386/kernel/reboot.c Mon Jun 23 12:22:26 2003 -@@ -8,6 +8,7 @@ - #include - #include - #include -+#include - #include "mach_reboot.h" - - /* -@@ -20,8 +21,7 @@ - int reboot_thru_bios; - - #ifdef CONFIG_SMP --int reboot_smp = 0; --static int reboot_cpu = -1; -+int reboot_cpu = -1; /* specifies the internal linux cpu id, not the apicid */ - /* shamelessly grabbed from lib/vsprintf.c for readability */ - #define is_digit(c) ((c) >= '0' && (c) <= '9') - #endif -@@ -43,7 +43,6 @@ - break; - #ifdef CONFIG_SMP - case 's': /* "smp" reboot by executing reset on BSP or other CPU*/ -- reboot_smp = 1; - if (is_digit(*(str+1))) { - reboot_cpu = (int) (*(str+1) - '0'); - if (is_digit(*(str+2))) -@@ -215,42 +214,7 @@ - - void machine_restart(char * __unused) - { --#ifdef CONFIG_SMP -- int cpuid; -- -- cpuid = GET_APIC_ID(apic_read(APIC_ID)); -- -- if (reboot_smp) { -- -- /* check to see if reboot_cpu is valid -- if its not, default to the BSP */ -- if ((reboot_cpu == -1) || -- (reboot_cpu > (NR_CPUS -1)) || -- !(phys_cpu_present_map & (1< -+#include -+ -+ /* Must be relocatable PIC code callable as a C function, that once -+ * it starts can not use the previous processes stack. -+ * -+ */ -+ .globl relocate_new_kernel -+relocate_new_kernel: -+ /* read the arguments and say goodbye to the stack */ -+ movl 4(%esp), %ebx /* indirection_page */ -+ movl 8(%esp), %ebp /* reboot_code_buffer */ -+ movl 12(%esp), %edx /* start address */ -+ -+ /* zero out flags, and disable interrupts */ -+ pushl $0 -+ popfl -+ -+ /* set a new stack at the bottom of our page... */ -+ lea 4096(%ebp), %esp -+ -+ /* store the parameters back on the stack */ -+ pushl %edx /* store the start address */ -+ -+ /* Set cr0 to a known state: -+ * 31 0 == Paging disabled -+ * 18 0 == Alignment check disabled -+ * 16 0 == Write protect disabled -+ * 3 0 == No task switch -+ * 2 0 == Don't do FP software emulation. -+ * 0 1 == Proctected mode enabled -+ */ -+ movl %cr0, %eax -+ andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax -+ orl $(1<<0), %eax -+ movl %eax, %cr0 -+ -+ /* Set cr4 to a known state: -+ * Setting everything to zero seems safe. -+ */ -+ movl %cr4, %eax -+ andl $0, %eax -+ movl %eax, %cr4 -+ -+ jmp 1f -+1: -+ -+ /* Flush the TLB (needed?) */ -+ xorl %eax, %eax -+ movl %eax, %cr3 -+ -+ /* Do the copies */ -+ cld -+0: /* top, read another word for the indirection page */ -+ movl %ebx, %ecx -+ movl (%ebx), %ecx -+ addl $4, %ebx -+ testl $0x1, %ecx /* is it a destination page */ -+ jz 1f -+ movl %ecx, %edi -+ andl $0xfffff000, %edi -+ jmp 0b -+1: -+ testl $0x2, %ecx /* is it an indirection page */ -+ jz 1f -+ movl %ecx, %ebx -+ andl $0xfffff000, %ebx -+ jmp 0b -+1: -+ testl $0x4, %ecx /* is it the done indicator */ -+ jz 1f -+ jmp 2f -+1: -+ testl $0x8, %ecx /* is it the source indicator */ -+ jz 0b /* Ignore it otherwise */ -+ movl %ecx, %esi /* For every source page do a copy */ -+ andl $0xfffff000, %esi -+ -+ movl $1024, %ecx -+ rep ; movsl -+ jmp 0b -+ -+2: -+ -+ /* To be certain of avoiding problems with self modifying code -+ * I need to execute a serializing instruction here. -+ * So I flush the TLB, it's handy, and not processor dependent. -+ */ -+ xorl %eax, %eax -+ movl %eax, %cr3 -+ -+ /* set all of the registers to known values */ -+ /* leave %esp alone */ -+ -+ xorl %eax, %eax -+ xorl %ebx, %ebx -+ xorl %ecx, %ecx -+ xorl %edx, %edx -+ xorl %esi, %esi -+ xorl %edi, %edi -+ xorl %ebp, %ebp -+ ret -+relocate_new_kernel_end: -+ -+ .globl relocate_new_kernel_size -+relocate_new_kernel_size: -+ .long relocate_new_kernel_end - relocate_new_kernel -diff -Nru a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c ---- a/arch/i386/kernel/smp.c Mon Jun 23 12:22:26 2003 -+++ b/arch/i386/kernel/smp.c Mon Jun 23 12:22:26 2003 -@@ -547,6 +547,30 @@ - - void smp_send_stop(void) - { -+ extern int reboot_cpu; -+ int reboot_cpu_id; -+ -+ /* The boot cpu is always logical cpu 0 */ -+ reboot_cpu_id = 0; -+ -+ /* See if there has been give a command line override . -+ */ -+ if ((reboot_cpu != -1) && !(reboot_cpu >= NR_CPUS) && -+ test_bit(reboot_cpu, &cpu_online_map)) { -+ reboot_cpu_id = reboot_cpu; -+ } -+ -+ /* Make certain the the cpu I'm rebooting on is online */ -+ if (!test_bit(reboot_cpu_id, &cpu_online_map)) { -+ reboot_cpu_id = smp_processor_id(); -+ } -+ -+ /* Make certain I only run on the appropriate processor */ -+ set_cpus_allowed(current, 1 << reboot_cpu_id); -+ -+ /* O.k. Now that I'm on the appropriate processor stop -+ * all of the others. -+ */ - smp_call_function(stop_this_cpu, NULL, 1, 0); - - local_irq_disable(); -diff -Nru a/fs/aio.c b/fs/aio.c ---- a/fs/aio.c Mon Jun 23 12:22:26 2003 -+++ b/fs/aio.c Mon Jun 23 12:22:26 2003 -@@ -536,7 +536,7 @@ - return ioctx; - } - --static void use_mm(struct mm_struct *mm) -+void use_mm(struct mm_struct *mm) - { - struct mm_struct *active_mm = current->active_mm; - atomic_inc(&mm->mm_count); -diff -Nru a/include/asm-i386/apic.h b/include/asm-i386/apic.h ---- a/include/asm-i386/apic.h Mon Jun 23 12:22:26 2003 -+++ b/include/asm-i386/apic.h Mon Jun 23 12:22:26 2003 -@@ -97,6 +97,9 @@ - #define NMI_LOCAL_APIC 2 - #define NMI_INVALID 3 - -+extern void stop_apics(void); -+#else -+static inline void stop_apics(void) { } - #endif /* CONFIG_X86_LOCAL_APIC */ - - #endif /* __ASM_APIC_H */ -diff -Nru a/include/asm-i386/apicdef.h b/include/asm-i386/apicdef.h ---- a/include/asm-i386/apicdef.h Mon Jun 23 12:22:26 2003 -+++ b/include/asm-i386/apicdef.h Mon Jun 23 12:22:26 2003 -@@ -86,6 +86,7 @@ - #define APIC_LVT_REMOTE_IRR (1<<14) - #define APIC_INPUT_POLARITY (1<<13) - #define APIC_SEND_PENDING (1<<12) -+#define APIC_MODE_MASK 0x700 - #define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7) - #define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8)) - #define APIC_MODE_FIXED 0x0 -diff -Nru a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h ---- /dev/null Wed Dec 31 16:00:00 1969 -+++ b/include/asm-i386/kexec.h Mon Jun 23 12:22:26 2003 -@@ -0,0 +1,23 @@ -+#ifndef _I386_KEXEC_H -+#define _I386_KEXEC_H -+ -+#include -+ -+/* -+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. -+ * I.e. Maximum page that is mapped directly into kernel memory, -+ * and kmap is not required. -+ * -+ * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct -+ * calculation for the amount of memory directly mappable into the -+ * kernel memory space. -+ */ -+ -+/* Maximum physical address we can use pages from */ -+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) -+/* Maximum address we can reach in physical address mode */ -+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) -+ -+#define KEXEC_REBOOT_CODE_SIZE 4096 -+ -+#endif /* _I386_KEXEC_H */ -diff -Nru a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h ---- a/include/asm-i386/unistd.h Mon Jun 23 12:22:26 2003 -+++ b/include/asm-i386/unistd.h Mon Jun 23 12:22:26 2003 -@@ -275,8 +275,9 @@ - #define __NR_clock_nanosleep (__NR_timer_create+8) - #define __NR_statfs64 268 - #define __NR_fstatfs64 269 -+#define __NR_sys_kexec_load 270 - --#define NR_syscalls 270 -+#define NR_syscalls 271 - - /* user-visible error numbers are in the range -1 - -124: see */ - -diff -Nru a/include/linux/kexec.h b/include/linux/kexec.h ---- /dev/null Wed Dec 31 16:00:00 1969 -+++ b/include/linux/kexec.h Mon Jun 23 12:22:26 2003 -@@ -0,0 +1,54 @@ -+#ifndef LINUX_KEXEC_H -+#define LINUX_KEXEC_H -+ -+#if CONFIG_KEXEC -+#include -+#include -+#include -+ -+/* -+ * This structure is used to hold the arguments that are used when loading -+ * kernel binaries. -+ */ -+ -+typedef unsigned long kimage_entry_t; -+#define IND_DESTINATION 0x1 -+#define IND_INDIRECTION 0x2 -+#define IND_DONE 0x4 -+#define IND_SOURCE 0x8 -+ -+#define KEXEC_SEGMENT_MAX 8 -+struct kexec_segment { -+ void *buf; -+ size_t bufsz; -+ void *mem; -+ size_t memsz; -+}; -+ -+struct kimage { -+ kimage_entry_t head; -+ kimage_entry_t *entry; -+ kimage_entry_t *last_entry; -+ -+ unsigned long destination; -+ unsigned long offset; -+ -+ unsigned long start; -+ struct page *reboot_code_pages; -+ -+ unsigned long nr_segments; -+ struct kexec_segment segment[KEXEC_SEGMENT_MAX+1]; -+ -+ struct list_head dest_pages; -+ struct list_head unuseable_pages; -+}; -+ -+ -+/* kexec interface functions */ -+extern void machine_kexec(struct kimage *image); -+extern asmlinkage long sys_kexec(unsigned long entry, long nr_segments, -+ struct kexec_segment *segments); -+extern struct kimage *kexec_image; -+#endif -+#endif /* LINUX_KEXEC_H */ -+ -diff -Nru a/include/linux/reboot.h b/include/linux/reboot.h ---- a/include/linux/reboot.h Mon Jun 23 12:22:26 2003 -+++ b/include/linux/reboot.h Mon Jun 23 12:22:26 2003 -@@ -22,6 +22,7 @@ - * POWER_OFF Stop OS and remove all power from system, if possible. - * RESTART2 Restart system using given command string. - * SW_SUSPEND Suspend system using Software Suspend if compiled in -+ * KEXEC Restart the system using a different kernel. - */ - - #define LINUX_REBOOT_CMD_RESTART 0x01234567 -@@ -31,6 +32,7 @@ - #define LINUX_REBOOT_CMD_POWER_OFF 0x4321FEDC - #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4 - #define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2 -+#define LINUX_REBOOT_CMD_KEXEC 0x45584543 - - - #ifdef __KERNEL__ -diff -Nru a/kernel/Makefile b/kernel/Makefile ---- a/kernel/Makefile Mon Jun 23 12:22:26 2003 -+++ b/kernel/Makefile Mon Jun 23 12:22:26 2003 -@@ -18,6 +18,7 @@ - obj-$(CONFIG_CPU_FREQ) += cpufreq.o - obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o - obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o -+obj-$(CONFIG_KEXEC) += kexec.o - obj-$(CONFIG_COMPAT) += compat.o - - ifneq ($(CONFIG_IA64),y) -diff -Nru a/kernel/kexec.c b/kernel/kexec.c ---- /dev/null Wed Dec 31 16:00:00 1969 -+++ b/kernel/kexec.c Mon Jun 23 12:22:26 2003 -@@ -0,0 +1,629 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* When kexec transitions to the new kernel there is a one to one -+ * mapping between physical and virtual addresses. On processors -+ * where you can disable the MMU this is trivial, and easy. For -+ * others it is still a simple predictable page table to setup. -+ * -+ * In that environment kexec copies the new kernel to it's final -+ * resting place. This means I can only support memory whose -+ * physical address can fit in an unsigned long. In particular -+ * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. -+ * If the assembly stub has more restrictive requirements -+ * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be -+ * defined more restrictively in . -+ * -+ * The code for the transition from the current kernel to the -+ * the new kernel is placed in the reboot_code_buffer, whose size -+ * is given by KEXEC_REBOOT_CODE_SIZE. In the best case only a single -+ * page of memory is necessary, but some architectures require more. -+ * Because this memory must be identity mapped in the transition from -+ * virtual to physical addresses it must live in the range -+ * 0 - TASK_SIZE, as only the user space mappings are arbitrarily -+ * modifyable. -+ * -+ * The assembly stub in the reboot code buffer is passed a linked list -+ * of descriptor pages detailing the source pages of the new kernel, -+ * and the destination addresses of those source pages. As this data -+ * structure is not used in the context of the current OS, it must -+ * be self contained. -+ * -+ * The code has been made to work with highmem pages and will use a -+ * destination page in it's final resting place (if it happens -+ * to allocate it). The end product of this is that most of the -+ * physical address space, and most of ram can be used. -+ * -+ * Future directions include: -+ * - allocating a page table with the reboot code buffer identity -+ * mapped, to simplify machine_kexec and make kexec_on_panic, more -+ * reliable. -+ * - allocating the pages for a page table for machines that cannot -+ * disable their MMUs. (Hammer, Alpha...) -+ */ -+ -+/* KIMAGE_NO_DEST is an impossible destination address..., for -+ * allocating pages whose destination address we do not care about. -+ */ -+#define KIMAGE_NO_DEST (-1UL) -+ -+static int kimage_is_destination_range( -+ struct kimage *image, unsigned long start, unsigned long end); -+static struct page *kimage_alloc_reboot_code_pages(struct kimage *image); -+static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long dest); -+ -+ -+static int kimage_alloc(struct kimage **rimage, -+ unsigned long nr_segments, struct kexec_segment *segments) -+{ -+ int result; -+ struct kimage *image; -+ size_t segment_bytes; -+ struct page *reboot_pages; -+ unsigned long i; -+ -+ /* Allocate a controlling structure */ -+ result = -ENOMEM; -+ image = kmalloc(sizeof(*image), GFP_KERNEL); -+ if (!image) { -+ goto out; -+ } -+ memset(image, 0, sizeof(*image)); -+ image->head = 0; -+ image->entry = &image->head; -+ image->last_entry = &image->head; -+ -+ /* Initialize the list of destination pages */ -+ INIT_LIST_HEAD(&image->dest_pages); -+ -+ /* Initialize the list of unuseable pages */ -+ INIT_LIST_HEAD(&image->unuseable_pages); -+ -+ /* Read in the segments */ -+ image->nr_segments = nr_segments; -+ segment_bytes = nr_segments * sizeof*segments; -+ result = copy_from_user(image->segment, segments, segment_bytes); -+ if (result) -+ goto out; -+ -+ /* Verify we have good destination addresses. The caller is -+ * responsible for making certain we don't attempt to load -+ * the new image into invalid or reserved areas of RAM. This -+ * just verifies it is an address we can use. -+ */ -+ result = -EADDRNOTAVAIL; -+ for(i = 0; i < nr_segments; i++) { -+ unsigned long mend; -+ mend = ((unsigned long)(image->segment[i].mem)) + -+ image->segment[i].memsz; -+ if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) -+ goto out; -+ } -+ -+ /* Find a location for the reboot code buffer, and add it -+ * the vector of segments so that it's pages will also be -+ * counted as destination pages. -+ */ -+ result = -ENOMEM; -+ reboot_pages = kimage_alloc_reboot_code_pages(image); -+ if (!reboot_pages) { -+ printk(KERN_ERR "Could not allocate reboot_code_buffer\n"); -+ goto out; -+ } -+ image->reboot_code_pages = reboot_pages; -+ image->segment[nr_segments].buf = 0; -+ image->segment[nr_segments].bufsz = 0; -+ image->segment[nr_segments].mem = (void *)(page_to_pfn(reboot_pages) << PAGE_SHIFT); -+ image->segment[nr_segments].memsz = KEXEC_REBOOT_CODE_SIZE; -+ image->nr_segments++; -+ -+ result = 0; -+ out: -+ if (result == 0) { -+ *rimage = image; -+ } else { -+ kfree(image); -+ } -+ return result; -+} -+ -+static int kimage_is_destination_range( -+ struct kimage *image, unsigned long start, unsigned long end) -+{ -+ unsigned long i; -+ for(i = 0; i < image->nr_segments; i++) { -+ unsigned long mstart, mend; -+ mstart = (unsigned long)image->segment[i].mem; -+ mend = mstart + image->segment[i].memsz; -+ if ((end > mstart) && (start < mend)) { -+ return 1; -+ } -+ } -+ return 0; -+} -+ -+#ifdef CONFIG_MMU -+static int identity_map_pages(struct page *pages, int order) -+{ -+ struct mm_struct *mm; -+ struct vm_area_struct *vma; -+ int error; -+ mm = &init_mm; -+ vma = 0; -+ -+ down_write(&mm->mmap_sem); -+ error = -ENOMEM; -+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); -+ if (!vma) { -+ goto out; -+ } -+ -+ memset(vma, 0, sizeof(vma)); -+ vma->vm_mm = mm; -+ vma->vm_start = page_to_pfn(pages) << PAGE_SHIFT; -+ vma->vm_end = vma->vm_start + (1 << (order + PAGE_SHIFT)); -+ vma->vm_ops = 0; -+ vma->vm_flags = VM_SHARED \ -+ | VM_READ | VM_WRITE | VM_EXEC \ -+ | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC \ -+ | VM_DONTCOPY | VM_RESERVED; -+ vma->vm_page_prot = protection_map[vma->vm_flags & 0xf]; -+ vma->vm_file = NULL; -+ vma->vm_private_data = NULL; -+ INIT_LIST_HEAD(&vma->shared); -+ insert_vm_struct(mm, vma); -+ -+ error = remap_page_range(vma, vma->vm_start, vma->vm_start, -+ vma->vm_end - vma->vm_start, vma->vm_page_prot); -+ if (error) { -+ goto out; -+ } -+ -+ error = 0; -+ out: -+ if (error && vma) { -+ kmem_cache_free(vm_area_cachep, vma); -+ vma = 0; -+ } -+ up_write(&mm->mmap_sem); -+ -+ return error; -+} -+#else -+#define identity_map_pages(pages, order) 0 -+#endif -+ -+struct page *kimage_alloc_reboot_code_pages(struct kimage *image) -+{ -+ /* The reboot code buffer is special. It is the only set of -+ * pages that must be allocated in their final resting place, -+ * and the only set of pages whose final resting place we can -+ * pick. -+ * -+ * At worst this runs in O(N) of the image size. -+ */ -+ struct list_head extra_pages, *pos, *next; -+ struct page *pages; -+ unsigned long addr; -+ int order, count; -+ order = get_order(KEXEC_REBOOT_CODE_SIZE); -+ count = 1 << order; -+ INIT_LIST_HEAD(&extra_pages); -+ do { -+ int i; -+ pages = alloc_pages(GFP_HIGHUSER, order); -+ if (!pages) -+ break; -+ for(i = 0; i < count; i++) { -+ SetPageReserved(pages +i); -+ } -+ addr = page_to_pfn(pages) << PAGE_SHIFT; -+ if ((page_to_pfn(pages) >= (TASK_SIZE >> PAGE_SHIFT)) || -+ kimage_is_destination_range(image, addr, addr + KEXEC_REBOOT_CODE_SIZE)) { -+ list_add(&pages->list, &extra_pages); -+ pages = 0; -+ } -+ } while(!pages); -+ if (pages) { -+ int result; -+ result = identity_map_pages(pages, order); -+ if (result < 0) { -+ list_add(&pages->list, &extra_pages); -+ pages = 0; -+ } -+ } -+ /* If I could convert a multi page allocation into a buch of -+ * single page allocations I could add these pages to -+ * image->dest_pages. For now it is simpler to just free the -+ * pages again. -+ */ -+ list_for_each_safe(pos, next, &extra_pages) { -+ struct page *page; -+ int i; -+ page = list_entry(pos, struct page, list); -+ for(i = 0; i < count; i++) { -+ ClearPageReserved(pages +i); -+ } -+ list_del(&extra_pages); -+ __free_pages(page, order); -+ } -+ return pages; -+} -+ -+static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) -+{ -+ if (image->offset != 0) { -+ image->entry++; -+ } -+ if (image->entry == image->last_entry) { -+ kimage_entry_t *ind_page; -+ struct page *page; -+ page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); -+ if (!page) { -+ return -ENOMEM; -+ } -+ ind_page = page_address(page); -+ *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; -+ image->entry = ind_page; -+ image->last_entry = -+ ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); -+ } -+ *image->entry = entry; -+ image->entry++; -+ image->offset = 0; -+ return 0; -+} -+ -+static int kimage_set_destination( -+ struct kimage *image, unsigned long destination) -+{ -+ int result; -+ destination &= PAGE_MASK; -+ result = kimage_add_entry(image, destination | IND_DESTINATION); -+ if (result == 0) { -+ image->destination = destination; -+ } -+ return result; -+} -+ -+ -+static int kimage_add_page(struct kimage *image, unsigned long page) -+{ -+ int result; -+ page &= PAGE_MASK; -+ result = kimage_add_entry(image, page | IND_SOURCE); -+ if (result == 0) { -+ image->destination += PAGE_SIZE; -+ } -+ return result; -+} -+ -+ -+static void kimage_free_extra_pages(struct kimage *image) -+{ -+ /* Walk through and free any extra destination pages I may have */ -+ struct list_head *pos, *next; -+ list_for_each_safe(pos, next, &image->dest_pages) { -+ struct page *page; -+ page = list_entry(pos, struct page, list); -+ list_del(&page->list); -+ ClearPageReserved(page); -+ __free_page(page); -+ } -+ /* Walk through and free any unuseable pages I have cached */ -+ list_for_each_safe(pos, next, &image->unuseable_pages) { -+ struct page *page; -+ page = list_entry(pos, struct page, list); -+ list_del(&page->list); -+ ClearPageReserved(page); -+ __free_page(page); -+ } -+ -+} -+static int kimage_terminate(struct kimage *image) -+{ -+ int result; -+ result = kimage_add_entry(image, IND_DONE); -+ if (result == 0) { -+ /* Point at the terminating element */ -+ image->entry--; -+ kimage_free_extra_pages(image); -+ } -+ return result; -+} -+ -+#define for_each_kimage_entry(image, ptr, entry) \ -+ for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ -+ ptr = (entry & IND_INDIRECTION)? \ -+ phys_to_virt((entry & PAGE_MASK)): ptr +1) -+ -+static void kimage_free(struct kimage *image) -+{ -+ kimage_entry_t *ptr, entry; -+ kimage_entry_t ind = 0; -+ int i, count, order; -+ if (!image) -+ return; -+ kimage_free_extra_pages(image); -+ for_each_kimage_entry(image, ptr, entry) { -+ if (entry & IND_INDIRECTION) { -+ /* Free the previous indirection page */ -+ if (ind & IND_INDIRECTION) { -+ free_page((unsigned long)phys_to_virt(ind & PAGE_MASK)); -+ } -+ /* Save this indirection page until we are -+ * done with it. -+ */ -+ ind = entry; -+ } -+ else if (entry & IND_SOURCE) { -+ free_page((unsigned long)phys_to_virt(entry & PAGE_MASK)); -+ } -+ } -+ order = get_order(KEXEC_REBOOT_CODE_SIZE); -+ count = 1 << order; -+ do_munmap(&init_mm, -+ page_to_pfn(image->reboot_code_pages) << PAGE_SHIFT, -+ count << PAGE_SHIFT); -+ for(i = 0; i < count; i++) { -+ ClearPageReserved(image->reboot_code_pages + i); -+ } -+ __free_pages(image->reboot_code_pages, order); -+ kfree(image); -+} -+ -+static kimage_entry_t *kimage_dst_used(struct kimage *image, unsigned long page) -+{ -+ kimage_entry_t *ptr, entry; -+ unsigned long destination = 0; -+ for_each_kimage_entry(image, ptr, entry) { -+ if (entry & IND_DESTINATION) { -+ destination = entry & PAGE_MASK; -+ } -+ else if (entry & IND_SOURCE) { -+ if (page == destination) { -+ return ptr; -+ } -+ destination += PAGE_SIZE; -+ } -+ } -+ return 0; -+} -+ -+static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long destination) -+{ -+ /* Here we implment safe guards to ensure that a source page -+ * is not copied to it's destination page before the data on -+ * the destination page is no longer useful. -+ * -+ * To do this we maintain the invariant that a source page is -+ * either it's own destination page, or it is not a -+ * destination page at all. -+ * -+ * That is slightly stronger than required, but the proof -+ * that no problems will not occur is trivial, and the -+ * implemenation is simply to verify. -+ * -+ * When allocating all pages normally this algorithm will run -+ * in O(N) time, but in the worst case it will run in O(N^2) -+ * time. If the runtime is a problem the data structures can -+ * be fixed. -+ */ -+ struct page *page; -+ unsigned long addr; -+ -+ /* Walk through the list of destination pages, and see if I -+ * have a match. -+ */ -+ list_for_each_entry(page, &image->dest_pages, list) { -+ addr = page_to_pfn(page) << PAGE_SHIFT; -+ if (addr == destination) { -+ list_del(&page->list); -+ return page; -+ } -+ } -+ page = 0; -+ while(1) { -+ kimage_entry_t *old; -+ /* Allocate a page, if we run out of memory give up */ -+ page = alloc_page(gfp_mask); -+ if (!page) { -+ return 0; -+ } -+ SetPageReserved(page); -+ /* If the page cannot be used file it away */ -+ if (page_to_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { -+ list_add(&page->list, &image->unuseable_pages); -+ continue; -+ } -+ addr = page_to_pfn(page) << PAGE_SHIFT; -+ -+ /* If it is the destination page we want use it */ -+ if (addr == destination) -+ break; -+ -+ /* If the page is not a destination page use it */ -+ if (!kimage_is_destination_range(image, addr, addr + PAGE_SIZE)) -+ break; -+ -+ /* I know that the page is someones destination page. -+ * See if there is already a source page for this -+ * destination page. And if so swap the source pages. -+ */ -+ old = kimage_dst_used(image, addr); -+ if (old) { -+ /* If so move it */ -+ unsigned long old_addr; -+ struct page *old_page; -+ -+ old_addr = *old & PAGE_MASK; -+ old_page = pfn_to_page(old_addr >> PAGE_SHIFT); -+ copy_highpage(page, old_page); -+ *old = addr | (*old & ~PAGE_MASK); -+ -+ /* The old page I have found cannot be a -+ * destination page, so return it. -+ */ -+ addr = old_addr; -+ page = old_page; -+ break; -+ } -+ else { -+ /* Place the page on the destination list I -+ * will use it later. -+ */ -+ list_add(&page->list, &image->dest_pages); -+ } -+ } -+ return page; -+} -+ -+static int kimage_load_segment(struct kimage *image, -+ struct kexec_segment *segment) -+{ -+ unsigned long mstart; -+ int result; -+ unsigned long offset; -+ unsigned long offset_end; -+ unsigned char *buf; -+ -+ result = 0; -+ buf = segment->buf; -+ mstart = (unsigned long)segment->mem; -+ -+ offset_end = segment->memsz; -+ -+ result = kimage_set_destination(image, mstart); -+ if (result < 0) { -+ goto out; -+ } -+ for(offset = 0; offset < segment->memsz; offset += PAGE_SIZE) { -+ struct page *page; -+ char *ptr; -+ size_t size, leader; -+ page = kimage_alloc_page(image, GFP_HIGHUSER, mstart + offset); -+ if (page == 0) { -+ result = -ENOMEM; -+ goto out; -+ } -+ result = kimage_add_page(image, page_to_pfn(page) << PAGE_SHIFT); -+ if (result < 0) { -+ goto out; -+ } -+ ptr = kmap(page); -+ if (segment->bufsz < offset) { -+ /* We are past the end zero the whole page */ -+ memset(ptr, 0, PAGE_SIZE); -+ kunmap(page); -+ continue; -+ } -+ size = PAGE_SIZE; -+ leader = 0; -+ if ((offset == 0)) { -+ leader = mstart & ~PAGE_MASK; -+ } -+ if (leader) { -+ /* We are on the first page zero the unused portion */ -+ memset(ptr, 0, leader); -+ size -= leader; -+ ptr += leader; -+ } -+ if (size > (segment->bufsz - offset)) { -+ size = segment->bufsz - offset; -+ } -+ if (size < (PAGE_SIZE - leader)) { -+ /* zero the trailing part of the page */ -+ memset(ptr + size, 0, (PAGE_SIZE - leader) - size); -+ } -+ result = copy_from_user(ptr, buf + offset, size); -+ kunmap(page); -+ if (result) { -+ result = (result < 0)?result : -EIO; -+ goto out; -+ } -+ } -+ out: -+ return result; -+} -+ -+/* -+ * Exec Kernel system call: for obvious reasons only root may call it. -+ * -+ * This call breaks up into three pieces. -+ * - A generic part which loads the new kernel from the current -+ * address space, and very carefully places the data in the -+ * allocated pages. -+ * -+ * - A generic part that interacts with the kernel and tells all of -+ * the devices to shut down. Preventing on-going dmas, and placing -+ * the devices in a consistent state so a later kernel can -+ * reinitialize them. -+ * -+ * - A machine specific part that includes the syscall number -+ * and the copies the image to it's final destination. And -+ * jumps into the image at entry. -+ * -+ * kexec does not sync, or unmount filesystems so if you need -+ * that to happen you need to do that yourself. -+ */ -+struct kimage *kexec_image = 0; -+ -+asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, -+ struct kexec_segment *segments, unsigned long flags) -+{ -+ struct kimage *image; -+ int result; -+ -+ /* We only trust the superuser with rebooting the system. */ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ /* In case we need just a little bit of special behavior for -+ * reboot on panic -+ */ -+ if (flags != 0) -+ return -EINVAL; -+ -+ if (nr_segments > KEXEC_SEGMENT_MAX) -+ return -EINVAL; -+ image = 0; -+ -+ result = 0; -+ if (nr_segments > 0) { -+ unsigned long i; -+ result = kimage_alloc(&image, nr_segments, segments); -+ if (result) { -+ goto out; -+ } -+ image->start = entry; -+ for(i = 0; i < nr_segments; i++) { -+ result = kimage_load_segment(image, &segments[i]); -+ if (result) { -+ goto out; -+ } -+ } -+ result = kimage_terminate(image); -+ if (result) { -+ goto out; -+ } -+ } -+ -+ image = xchg(&kexec_image, image); -+ -+ out: -+ kimage_free(image); -+ return result; -+} -diff -Nru a/kernel/sys.c b/kernel/sys.c ---- a/kernel/sys.c Mon Jun 23 12:22:26 2003 -+++ b/kernel/sys.c Mon Jun 23 12:22:26 2003 -@@ -16,6 +16,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -207,6 +208,7 @@ - cond_syscall(sys_lookup_dcookie) - cond_syscall(sys_swapon) - cond_syscall(sys_swapoff) -+cond_syscall(sys_kexec_load) - cond_syscall(sys_init_module) - cond_syscall(sys_delete_module) - cond_syscall(sys_socketpair) -@@ -450,6 +452,27 @@ - machine_restart(buffer); - break; - -+#ifdef CONFIG_KEXEC -+ case LINUX_REBOOT_CMD_KEXEC: -+ { -+ struct kimage *image; -+ if (arg) { -+ unlock_kernel(); -+ return -EINVAL; -+ } -+ image = xchg(&kexec_image, 0); -+ if (!image) { -+ unlock_kernel(); -+ return -EINVAL; -+ } -+ notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); -+ system_running = 0; -+ device_shutdown(); -+ printk(KERN_EMERG "Starting new kernel\n"); -+ machine_kexec(image); -+ break; -+ } -+#endif - #ifdef CONFIG_SOFTWARE_SUSPEND - case LINUX_REBOOT_CMD_SW_SUSPEND: - if (!software_suspend_enabled) { diff --git a/lustre/kernel_patches/patches/kgdb-ga-2.5.73.patch b/lustre/kernel_patches/patches/kgdb-ga-2.5.73.patch deleted file mode 100644 index b3e0bbe..0000000 --- a/lustre/kernel_patches/patches/kgdb-ga-2.5.73.patch +++ /dev/null @@ -1,5046 +0,0 @@ - - -This kgdb will get called and will trap almost any kernel -fault WITHOUT BEING ARMED. - -It is entered at boot time via "kgdb" in the boot string, -not "gdb". This entry occurs when the first setup on the -boot string is called, not sometime later. You will not -find a "waiting for gdb" on your console, as the console has -not yet been enabled at this time. (Note, this early stuff -is a bit fragile as the full trap table has yet to be -loaded, something I might address, sometime... So don't try -to look at memory that can not be reached, for example. -Once the full trap table is loaded this restriction goes -away.) - -If you hard code it, you can put a breakpoint() as the FIRST -LINE OF C CODE. - -It does NOT use the serial driver, but if the serial driver -is loaded, it tells it to release the port to avoid -conflict. - -The threads stuff is not configurable, does not require -redirection of schedule() calls and does back track to the -first non schedule() caller on the info threads command. If -you switch to the thread, however, it will show it in the -switch code (as it should). - -It is MUCH more aggressive and paranoid about grabbing the -other cpus on entry. It issues a "send_nmi_all_but_self()" -rather than depending on them to interrupt or hit an NMI -sometime in the distant future. If a cpu does not come to -the party, it will continue without it so all is not lost. - -It does not have anything to do with IOCTL calls, but does -do the control-C thing. - -There is a LOT of info in the patch which ends up in -.../Documentation/i386/kgdb/* - -There is a nifty little thing call kgdb_ts() (kgdb time -stamp) which is a function you can code calls to which puts -some useful stuff in a circular buffer which can be examined -with the supplied gdb macros. - -It also allows you do to do "p foobar(...)" i.e. to call a -function from gdb, just like gdb allows in program -debugging. - -In an SMP system, you can choose to "hold" any given set of -cpus. It also defaults to holding other cpus on single step -(this can be overridden). - -This said, you can imagine my consternation when I found it -"lost it" on continues on 2.5. I found and fixed this this -early pm, a hold cpu on exit goof on my part. - -Oh, and a final point, the configure options are more -extensive (the serial port is set up here, for example, (can -not wait for a command line to do this)). There is one to -do system call exit tests. This is VERY new and causes the -kernel to hit a hard "int 3" if a system call attempts to -exit with preempt count other than zero. This is a fault, -of course, but the current 2.5 is full of them so I don't -recommend turning this on. - - - - - Documentation/i386/kgdb/andthen | 100 + - Documentation/i386/kgdb/debug-nmi.txt | 37 - Documentation/i386/kgdb/gdb-globals.txt | 71 + - Documentation/i386/kgdb/gdbinit | 14 - Documentation/i386/kgdb/gdbinit-modules | 146 ++ - Documentation/i386/kgdb/gdbinit.hw | 117 + - Documentation/i386/kgdb/kgdb.txt | 715 ++++++++++ - Documentation/i386/kgdb/loadmodule.sh | 78 + - MAINTAINERS | 6 - arch/i386/Kconfig | 180 ++ - arch/i386/Makefile | 3 - arch/i386/kernel/Makefile | 1 - arch/i386/kernel/entry.S | 28 - arch/i386/kernel/kgdb_stub.c | 2214 ++++++++++++++++++++++++++++++++ - arch/i386/kernel/nmi.c | 25 - arch/i386/kernel/smp.c | 12 - arch/i386/kernel/traps.c | 86 + - arch/i386/lib/Makefile | 1 - arch/i386/lib/kgdb_serial.c | 485 +++++++ - arch/i386/mm/fault.c | 6 - drivers/char/keyboard.c | 3 - drivers/char/sysrq.c | 15 - drivers/serial/8250.c | 42 - include/asm-i386/bugs.h | 21 - include/asm-i386/kgdb.h | 59 - include/asm-i386/kgdb_local.h | 102 + - include/linux/config.h | 3 - kernel/sched.c | 7 - 28 files changed, 4565 insertions(+), 12 deletions(-) - -diff -puN arch/i386/Kconfig~kgdb-ga arch/i386/Kconfig ---- 25/arch/i386/Kconfig~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/arch/i386/Kconfig 2003-06-25 23:14:17.000000000 -0700 -@@ -1419,14 +1419,194 @@ config DEBUG_SPINLOCK_SLEEP - If you say Y here, various routines which may sleep will become very - noisy if they are called with a spinlock held. - -+config KGDB -+ bool "Include kgdb kernel debugger" -+ depends on DEBUG_KERNEL -+ help -+ If you say Y here, the system will be compiled with the debug -+ option (-g) and a debugging stub will be included in the -+ kernel. This stub communicates with gdb on another (host) -+ computer via a serial port. The host computer should have -+ access to the kernel binary file (vmlinux) and a serial port -+ that is connected to the target machine. Gdb can be made to -+ configure the serial port or you can use stty and setserial to -+ do this. See the 'target' command in gdb. This option also -+ configures in the ability to request a breakpoint early in the -+ boot process. To request the breakpoint just include 'kgdb' -+ as a boot option when booting the target machine. The system -+ will then break as soon as it looks at the boot options. This -+ option also installs a breakpoint in panic and sends any -+ kernel faults to the debugger. For more information see the -+ Documentation/i386/kgdb.txt file. -+ -+choice -+ depends on KGDB -+ prompt "Debug serial port BAUD" -+ default KGDB_115200BAUD -+ help -+ Gdb and the kernel stub need to agree on the baud rate to be -+ used. Some systems (x86 family at this writing) allow this to -+ be configured. -+ -+config KGDB_9600BAUD -+ bool "9600" -+ -+config KGDB_19200BAUD -+ bool "19200" -+ -+config KGDB_38400BAUD -+ bool "38400" -+ -+config KGDB_57600BAUD -+ bool "57600" -+ -+config KGDB_115200BAUD -+ bool "115200" -+endchoice -+ -+config KGDB_PORT -+ hex "hex I/O port address of the debug serial port" -+ depends on KGDB -+ default 3f8 -+ help -+ Some systems (x86 family at this writing) allow the port -+ address to be configured. The number entered is assumed to be -+ hex, don't put 0x in front of it. The standard address are: -+ COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx -+ will tell you what you have. It is good to test the serial -+ connection with a live system before trying to debug. -+ -+config KGDB_IRQ -+ int "IRQ of the debug serial port" -+ depends on KGDB -+ default 4 -+ help -+ This is the irq for the debug port. If everything is working -+ correctly and the kernel has interrupts on a control C to the -+ port should cause a break into the kernel debug stub. -+ -+config DEBUG_INFO -+ bool -+ default y -+ -+config KGDB_MORE -+ bool "Add any additional compile options" -+ depends on KGDB -+ default n -+ help -+ Saying yes here turns on the ability to enter additional -+ compile options. -+ -+ -+config KGDB_OPTIONS -+ depends on KGDB_MORE -+ string "Additional compile arguments" -+ default "-O1" -+ help -+ This option allows you enter additional compile options for -+ the whole kernel compile. Each platform will have a default -+ that seems right for it. For example on PPC "-ggdb -O1", and -+ for i386 "-O1". Note that by configuring KGDB "-g" is already -+ turned on. In addition, on i386 platforms -+ "-fomit-frame-pointer" is deleted from the standard compile -+ options. -+ -+config NO_KGDB_CPUS -+ int "Number of CPUs" -+ depends on KGDB && SMP -+ default NR_CPUS -+ help -+ -+ This option sets the number of cpus for kgdb ONLY. It is used -+ to prune some internal structures so they look "nice" when -+ displayed with gdb. This is to overcome possibly larger -+ numbers that may have been entered above. Enter the real -+ number to get nice clean kgdb_info displays. -+ -+config KGDB_TS -+ bool "Enable kgdb time stamp macros?" -+ depends on KGDB -+ default n -+ help -+ Kgdb event macros allow you to instrument your code with calls -+ to the kgdb event recording function. The event log may be -+ examined with gdb at a break point. Turning on this -+ capability also allows you to choose how many events to -+ keep. Kgdb always keeps the lastest events. -+ -+choice -+ depends on KGDB_TS -+ prompt "Max number of time stamps to save?" -+ default KGDB_TS_128 -+ -+config KGDB_TS_64 -+ bool "64" -+ -+config KGDB_TS_128 -+ bool "128" -+ -+config KGDB_TS_256 -+ bool "256" -+ -+config KGDB_TS_512 -+ bool "512" -+ -+config KGDB_TS_1024 -+ bool "1024" -+ -+endchoice -+ -+config STACK_OVERFLOW_TEST -+ bool "Turn on kernel stack overflow testing?" -+ depends on KGDB -+ default n -+ help -+ This option enables code in the front line interrupt handlers -+ to check for kernel stack overflow on interrupts and system -+ calls. This is part of the kgdb code on x86 systems. -+ -+config KGDB_CONSOLE -+ bool "Enable serial console thru kgdb port" -+ depends on KGDB -+ default n -+ help -+ This option enables the command line "console=kgdb" option. -+ When the system is booted with this option in the command line -+ all kernel printk output is sent to gdb (as well as to other -+ consoles). For this to work gdb must be connected. For this -+ reason, this command line option will generate a breakpoint if -+ gdb has not yet connected. After the gdb continue command is -+ given all pent up console output will be printed by gdb on the -+ host machine. Neither this option, nor KGDB require the -+ serial driver to be configured. -+ -+config KGDB_SYSRQ -+ bool "Turn on SysRq 'G' command to do a break?" -+ depends on KGDB -+ default y -+ help -+ This option includes an option in the SysRq code that allows -+ you to enter SysRq G which generates a breakpoint to the KGDB -+ stub. This will work if the keyboard is alive and can -+ interrupt the system. Because of constraints on when the -+ serial port interrupt can be enabled, this code may allow you -+ to interrupt the system before the serial port control C is -+ available. Just say yes here. -+ - config FRAME_POINTER - bool "Compile the kernel with frame pointers" -+ default KGDB - help - If you say Y here the resulting kernel image will be slightly larger - and slower, but it will give very useful debugging information. - If you don't debug the kernel, you can say N, but we may not be able - to solve problems without frame pointers. - -+config MAGIC_SYSRQ -+ bool -+ depends on KGDB_SYSRQ -+ default y -+ - config X86_EXTRA_IRQS - bool - depends on X86_LOCAL_APIC || X86_VOYAGER -diff -puN arch/i386/kernel/entry.S~kgdb-ga arch/i386/kernel/entry.S ---- 25/arch/i386/kernel/entry.S~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/arch/i386/kernel/entry.S 2003-06-25 23:14:17.000000000 -0700 -@@ -48,6 +48,18 @@ - #include - #include - #include "irq_vectors.h" -+ /* We do not recover from a stack overflow, but at least -+ * we know it happened and should be able to track it down. -+ */ -+#ifdef CONFIG_STACK_OVERFLOW_TEST -+#define STACK_OVERFLOW_TEST \ -+ testl $7680,%esp; \ -+ jnz 10f; \ -+ call stack_overflow; \ -+10: -+#else -+#define STACK_OVERFLOW_TEST -+#endif - - EBX = 0x00 - ECX = 0x04 -@@ -98,7 +110,8 @@ TSS_ESP0_OFFSET = (4 - 0x200) - pushl %ebx; \ - movl $(__USER_DS), %edx; \ - movl %edx, %ds; \ -- movl %edx, %es; -+ movl %edx, %es; \ -+ STACK_OVERFLOW_TEST - - #define RESTORE_INT_REGS \ - popl %ebx; \ -@@ -298,6 +311,19 @@ syscall_exit: - testw $_TIF_ALLWORK_MASK, %cx # current->work - jne syscall_exit_work - restore_all: -+#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS -+ movl EFLAGS(%esp), %eax # mix EFLAGS and CS -+ movb CS(%esp), %al -+ testl $(VM_MASK | 3), %eax -+ jz resume_kernelX # returning to kernel or vm86-space -+ -+ cmpl $0,TI_PRE_COUNT(%ebx) # non-zero preempt_count ? -+ jz resume_kernelX -+ -+ int $3 -+ -+resume_kernelX: -+#endif - RESTORE_ALL - - # perform work that needs to be done immediately before resumption -diff -puN /dev/null arch/i386/kernel/kgdb_stub.c ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25-akpm/arch/i386/kernel/kgdb_stub.c 2003-06-25 23:14:17.000000000 -0700 -@@ -0,0 +1,2214 @@ -+/* -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2, or (at your option) any -+ * later version. -+ * -+ * This program is distributed in the hope that it will be useful, but -+ * WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ */ -+ -+/* -+ * Copyright (c) 2000 VERITAS Software Corporation. -+ * -+ */ -+/**************************************************************************** -+ * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ -+ * -+ * Module name: remcom.c $ -+ * Revision: 1.34 $ -+ * Date: 91/03/09 12:29:49 $ -+ * Contributor: Lake Stevens Instrument Division$ -+ * -+ * Description: low level support for gdb debugger. $ -+ * -+ * Considerations: only works on target hardware $ -+ * -+ * Written by: Glenn Engel $ -+ * Updated by: David Grothe -+ * ModuleState: Experimental $ -+ * -+ * NOTES: See Below $ -+ * -+ * Modified for 386 by Jim Kingdon, Cygnus Support. -+ * Compatibility with 2.1.xx kernel by David Grothe -+ * -+ * Changes to allow auto initilization. All that is needed is that it -+ * be linked with the kernel and a break point (int 3) be executed. -+ * The header file defines BREAKPOINT to allow one to do -+ * this. It should also be possible, once the interrupt system is up, to -+ * call putDebugChar("+"). Once this is done, the remote debugger should -+ * get our attention by sending a ^C in a packet. George Anzinger -+ * -+ * Integrated into 2.2.5 kernel by Tigran Aivazian -+ * Added thread support, support for multiple processors, -+ * support for ia-32(x86) hardware debugging. -+ * Amit S. Kale ( akale@veritas.com ) -+ * -+ * -+ * To enable debugger support, two things need to happen. One, a -+ * call to set_debug_traps() is necessary in order to allow any breakpoints -+ * or error conditions to be properly intercepted and reported to gdb. -+ * Two, a breakpoint needs to be generated to begin communication. This -+ * is most easily accomplished by a call to breakpoint(). Breakpoint() -+ * simulates a breakpoint by executing an int 3. -+ * -+ ************* -+ * -+ * The following gdb commands are supported: -+ * -+ * command function Return value -+ * -+ * g return the value of the CPU registers hex data or ENN -+ * G set the value of the CPU registers OK or ENN -+ * -+ * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN -+ * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN -+ * -+ * c Resume at current address SNN ( signal NN) -+ * cAA..AA Continue at address AA..AA SNN -+ * -+ * s Step one instruction SNN -+ * sAA..AA Step one instruction from AA..AA SNN -+ * -+ * k kill -+ * -+ * ? What was the last sigval ? SNN (signal NN) -+ * -+ * All commands and responses are sent with a packet which includes a -+ * checksum. A packet consists of -+ * -+ * $#. -+ * -+ * where -+ * :: -+ * :: < two hex digits computed as modulo 256 sum of > -+ * -+ * When a packet is received, it is first acknowledged with either '+' or '-'. -+ * '+' indicates a successful transfer. '-' indicates a failed transfer. -+ * -+ * Example: -+ * -+ * Host: Reply: -+ * $m0,10#2a +$00010203040506070809101112131415#42 -+ * -+ ****************************************************************************/ -+#define KGDB_VERSION "<20030530.0126.22>" -+#include -+#include -+#include /* for strcpy */ -+#include -+#include -+#include -+#include -+#include /* for linux pt_regs struct */ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/************************************************************************ -+ * -+ * external low-level support routines -+ */ -+typedef void (*Function) (void); /* pointer to a function */ -+ -+/* Thread reference */ -+typedef unsigned char threadref[8]; -+ -+extern void putDebugChar(int); /* write a single character */ -+extern int getDebugChar(void); /* read and return a single char */ -+ -+/************************************************************************/ -+/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ -+/* at least NUMREGBYTES*2 are needed for register packets */ -+/* Longer buffer is needed to list all threads */ -+#define BUFMAX 1024 -+ -+char *kgdb_version = KGDB_VERSION; -+ -+/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ -+int debug_regs = 0; /* set to non-zero to print registers */ -+ -+/* filled in by an external module */ -+char *gdb_module_offsets; -+ -+static const char hexchars[] = "0123456789abcdef"; -+ -+/* Number of bytes of registers. */ -+#define NUMREGBYTES 64 -+/* -+ * Note that this register image is in a different order than -+ * the register image that Linux produces at interrupt time. -+ * -+ * Linux's register image is defined by struct pt_regs in ptrace.h. -+ * Just why GDB uses a different order is a historical mystery. -+ */ -+enum regnames { _EAX, /* 0 */ -+ _ECX, /* 1 */ -+ _EDX, /* 2 */ -+ _EBX, /* 3 */ -+ _ESP, /* 4 */ -+ _EBP, /* 5 */ -+ _ESI, /* 6 */ -+ _EDI, /* 7 */ -+ _PC /* 8 also known as eip */ , -+ _PS /* 9 also known as eflags */ , -+ _CS, /* 10 */ -+ _SS, /* 11 */ -+ _DS, /* 12 */ -+ _ES, /* 13 */ -+ _FS, /* 14 */ -+ _GS /* 15 */ -+}; -+ -+/*************************** ASSEMBLY CODE MACROS *************************/ -+/* -+ * Put the error code here just in case the user cares. -+ * Likewise, the vector number here (since GDB only gets the signal -+ * number through the usual means, and that's not very specific). -+ * The called_from is the return address so he can tell how we entered kgdb. -+ * This will allow him to seperate out the various possible entries. -+ */ -+#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ -+ -+#define PID_MAX PID_MAX_DEFAULT -+ -+#ifdef CONFIG_SMP -+void smp_send_nmi_allbutself(void); -+#define IF_SMP(x) x -+#undef MAX_NO_CPUS -+#ifndef CONFIG_NO_KGDB_CPUS -+#define CONFIG_NO_KGDB_CPUS 2 -+#endif -+#if CONFIG_NO_KGDB_CPUS > NR_CPUS -+#define MAX_NO_CPUS NR_CPUS -+#else -+#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS -+#endif -+#define hold_init hold_on_sstep: 1, -+#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) -+#define NUM_CPUS num_online_cpus() -+extern volatile unsigned long cpu_callout_map; -+#else -+#define IF_SMP(x) -+#define hold_init -+#undef MAX_NO_CPUS -+#define MAX_NO_CPUS 1 -+#define NUM_CPUS 1 -+#endif -+#define NOCPU (struct task_struct *)0xbad1fbad -+/* *INDENT-OFF* */ -+struct kgdb_info { -+ int used_malloc; -+ void *called_from; -+ long long entry_tsc; -+ int errcode; -+ int vector; -+ int print_debug_info; -+#ifdef CONFIG_SMP -+ int hold_on_sstep; -+ struct { -+ volatile struct task_struct *task; -+ int pid; -+ int hold; -+ struct pt_regs *regs; -+ } cpus_waiting[MAX_NO_CPUS]; -+#endif -+} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; -+ -+/* *INDENT-ON* */ -+ -+#define used_m kgdb_info.used_malloc -+/* -+ * This is little area we set aside to contain the stack we -+ * need to build to allow gdb to call functions. We use one -+ * per cpu to avoid locking issues. We will do all this work -+ * with interrupts off so that should take care of the protection -+ * issues. -+ */ -+#define LOOKASIDE_SIZE 200 /* should be more than enough */ -+#define MALLOC_MAX 200 /* Max malloc size */ -+struct { -+ unsigned int esp; -+ int array[LOOKASIDE_SIZE]; -+} fn_call_lookaside[MAX_NO_CPUS]; -+ -+static int trap_cpu; -+static unsigned int OLD_esp; -+ -+#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] -+#define IF_BIT 0x200 -+#define TF_BIT 0x100 -+ -+#define MALLOC_ROUND 8-1 -+ -+static char malloc_array[MALLOC_MAX]; -+IF_SMP(static void to_gdb(const char *mess)); -+void * -+malloc(int size) -+{ -+ -+ if (size <= (MALLOC_MAX - used_m)) { -+ int old_used = used_m; -+ used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); -+ return &malloc_array[old_used]; -+ } else { -+ return NULL; -+ } -+} -+ -+/* -+ * Gdb calls functions by pushing agruments, including a return address -+ * on the stack and the adjusting EIP to point to the function. The -+ * whole assumption in GDB is that we are on a different stack than the -+ * one the "user" i.e. code that hit the break point, is on. This, of -+ * course is not true in the kernel. Thus various dodges are needed to -+ * do the call without directly messing with EIP (which we can not change -+ * as it is just a location and not a register. To adjust it would then -+ * require that we move every thing below EIP up or down as needed. This -+ * will not work as we may well have stack relative pointer on the stack -+ * (such as the pointer to regs, for example). -+ -+ * So here is what we do: -+ * We detect gdb attempting to store into the stack area and instead, store -+ * into the fn_call_lookaside.array at the same relative location as if it -+ * were the area ESP pointed at. We also trap ESP modifications -+ * and uses these to adjust fn_call_lookaside.esp. On entry -+ * fn_call_lookaside.esp will be set to point at the last entry in -+ * fn_call_lookaside.array. This allows us to check if it has changed, and -+ * if so, on exit, we add the registers we will use to do the move and a -+ * trap/ interrupt return exit sequence. We then adjust the eflags in the -+ * regs array (remember we now have a copy in the fn_call_lookaside.array) to -+ * kill the interrupt bit, AND we change EIP to point at our set up stub. -+ * As part of the register set up we preset the registers to point at the -+ * begining and end of the fn_call_lookaside.array, so all the stub needs to -+ * do is move words from the array to the stack until ESP= the desired value -+ * then do the rti. This will then transfer to the desired function with -+ * all the correct registers. Nifty huh? -+ */ -+extern asmlinkage void fn_call_stub(void); -+extern asmlinkage void fn_rtn_stub(void); -+/* *INDENT-OFF* */ -+__asm__("fn_rtn_stub:\n\t" -+ "movl %eax,%esp\n\t" -+ "fn_call_stub:\n\t" -+ "1:\n\t" -+ "addl $-4,%ebx\n\t" -+ "movl (%ebx), %eax\n\t" -+ "pushl %eax\n\t" -+ "cmpl %esp,%ecx\n\t" -+ "jne 1b\n\t" -+ "popl %eax\n\t" -+ "popl %ebx\n\t" -+ "popl %ecx\n\t" -+ "iret \n\t"); -+/* *INDENT-ON* */ -+#define gdb_i386vector kgdb_info.vector -+#define gdb_i386errcode kgdb_info.errcode -+#define waiting_cpus kgdb_info.cpus_waiting -+#define remote_debug kgdb_info.print_debug_info -+#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold -+/* gdb locks */ -+ -+#ifdef CONFIG_SMP -+static int in_kgdb_called; -+static spinlock_t waitlocks[MAX_NO_CPUS] = -+ {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; -+/* -+ * The following array has the thread pointer of each of the "other" -+ * cpus. We make it global so it can be seen by gdb. -+ */ -+volatile int in_kgdb_entry_log[MAX_NO_CPUS]; -+volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; -+/* -+static spinlock_t continuelocks[MAX_NO_CPUS]; -+*/ -+spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; -+/* waiters on our spinlock plus us */ -+static atomic_t spinlock_waiters = ATOMIC_INIT(1); -+static int spinlock_count = 0; -+static int spinlock_cpu = 0; -+/* -+ * Note we use nested spin locks to account for the case where a break -+ * point is encountered when calling a function by user direction from -+ * kgdb. Also there is the memory exception recursion to account for. -+ * Well, yes, but this lets other cpus thru too. Lets add a -+ * cpu id to the lock. -+ */ -+#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ -+ spinlock_cpu != smp_processor_id()){\ -+ atomic_inc(&spinlock_waiters); \ -+ while (! spin_trylock(x)) {\ -+ in_kgdb(®s);\ -+ }\ -+ atomic_dec(&spinlock_waiters); \ -+ spinlock_count = 1; \ -+ spinlock_cpu = smp_processor_id(); \ -+ }else{ \ -+ spinlock_count++; \ -+ } -+#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) -+extern volatile unsigned long cpu_callout_map; -+#else -+unsigned kgdb_spinlock = 0; -+#define KGDB_SPIN_LOCK(x) --*x -+#define KGDB_SPIN_UNLOCK(x) ++*x -+#endif -+ -+int -+hex(char ch) -+{ -+ if ((ch >= 'a') && (ch <= 'f')) -+ return (ch - 'a' + 10); -+ if ((ch >= '0') && (ch <= '9')) -+ return (ch - '0'); -+ if ((ch >= 'A') && (ch <= 'F')) -+ return (ch - 'A' + 10); -+ return (-1); -+} -+ -+/* scan for the sequence $# */ -+void -+getpacket(char *buffer) -+{ -+ unsigned char checksum; -+ unsigned char xmitcsum; -+ int i; -+ int count; -+ char ch; -+ -+ do { -+ /* wait around for the start character, ignore all other characters */ -+ while ((ch = (getDebugChar() & 0x7f)) != '$') ; -+ checksum = 0; -+ xmitcsum = -1; -+ -+ count = 0; -+ -+ /* now, read until a # or end of buffer is found */ -+ while (count < BUFMAX) { -+ ch = getDebugChar() & 0x7f; -+ if (ch == '#') -+ break; -+ checksum = checksum + ch; -+ buffer[count] = ch; -+ count = count + 1; -+ } -+ buffer[count] = 0; -+ -+ if (ch == '#') { -+ xmitcsum = hex(getDebugChar() & 0x7f) << 4; -+ xmitcsum += hex(getDebugChar() & 0x7f); -+ if ((remote_debug) && (checksum != xmitcsum)) { -+ printk -+ ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", -+ checksum, xmitcsum, buffer); -+ } -+ -+ if (checksum != xmitcsum) -+ putDebugChar('-'); /* failed checksum */ -+ else { -+ putDebugChar('+'); /* successful transfer */ -+ /* if a sequence char is present, reply the sequence ID */ -+ if (buffer[2] == ':') { -+ putDebugChar(buffer[0]); -+ putDebugChar(buffer[1]); -+ /* remove sequence chars from buffer */ -+ count = strlen(buffer); -+ for (i = 3; i <= count; i++) -+ buffer[i - 3] = buffer[i]; -+ } -+ } -+ } -+ } while (checksum != xmitcsum); -+ -+ if (remote_debug) -+ printk("R:%s\n", buffer); -+} -+ -+/* send the packet in buffer. */ -+ -+void -+putpacket(char *buffer) -+{ -+ unsigned char checksum; -+ int count; -+ char ch; -+ -+ /* $#. */ -+ do { -+ if (remote_debug) -+ printk("T:%s\n", buffer); -+ putDebugChar('$'); -+ checksum = 0; -+ count = 0; -+ -+ while ((ch = buffer[count])) { -+ putDebugChar(ch); -+ checksum += ch; -+ count += 1; -+ } -+ -+ putDebugChar('#'); -+ putDebugChar(hexchars[checksum >> 4]); -+ putDebugChar(hexchars[checksum % 16]); -+ -+ } while ((getDebugChar() & 0x7f) != '+'); -+ -+} -+ -+static char remcomInBuffer[BUFMAX]; -+static char remcomOutBuffer[BUFMAX]; -+static short error; -+ -+void -+debug_error(char *format, char *parm) -+{ -+ if (remote_debug) -+ printk(format, parm); -+} -+ -+static void -+print_regs(struct pt_regs *regs) -+{ -+ printk("EAX=%08lx ", regs->eax); -+ printk("EBX=%08lx ", regs->ebx); -+ printk("ECX=%08lx ", regs->ecx); -+ printk("EDX=%08lx ", regs->edx); -+ printk("\n"); -+ printk("ESI=%08lx ", regs->esi); -+ printk("EDI=%08lx ", regs->edi); -+ printk("EBP=%08lx ", regs->ebp); -+ printk("ESP=%08lx ", (long) ®s->esp); -+ printk("\n"); -+ printk(" DS=%08x ", regs->xds); -+ printk(" ES=%08x ", regs->xes); -+ printk(" SS=%08x ", __KERNEL_DS); -+ printk(" FL=%08lx ", regs->eflags); -+ printk("\n"); -+ printk(" CS=%08x ", regs->xcs); -+ printk(" IP=%08lx ", regs->eip); -+#if 0 -+ printk(" FS=%08x ", regs->fs); -+ printk(" GS=%08x ", regs->gs); -+#endif -+ printk("\n"); -+ -+} /* print_regs */ -+ -+#define NEW_esp fn_call_lookaside[trap_cpu].esp -+ -+static void -+regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) -+{ -+ gdb_regs[_EAX] = regs->eax; -+ gdb_regs[_EBX] = regs->ebx; -+ gdb_regs[_ECX] = regs->ecx; -+ gdb_regs[_EDX] = regs->edx; -+ gdb_regs[_ESI] = regs->esi; -+ gdb_regs[_EDI] = regs->edi; -+ gdb_regs[_EBP] = regs->ebp; -+ gdb_regs[_DS] = regs->xds; -+ gdb_regs[_ES] = regs->xes; -+ gdb_regs[_PS] = regs->eflags; -+ gdb_regs[_CS] = regs->xcs; -+ gdb_regs[_PC] = regs->eip; -+ /* Note, as we are a debugging the kernel, we will always -+ * trap in kernel code, this means no priviledge change, -+ * and so the pt_regs structure is not completely valid. In a non -+ * privilege change trap, only EFLAGS, CS and EIP are put on the stack, -+ * SS and ESP are not stacked, this means that the last 2 elements of -+ * pt_regs is not valid (they would normally refer to the user stack) -+ * also, using regs+1 is no good because you end up will a value that is -+ * 2 longs (8) too high. This used to cause stepping over functions -+ * to fail, so my fix is to use the address of regs->esp, which -+ * should point at the end of the stack frame. Note I have ignored -+ * completely exceptions that cause an error code to be stacked, such -+ * as double fault. Stuart Hughes, Zentropix. -+ * original code: gdb_regs[_ESP] = (int) (regs + 1) ; -+ -+ * this is now done on entry and moved to OLD_esp (as well as NEW_esp). -+ */ -+ gdb_regs[_ESP] = NEW_esp; -+ gdb_regs[_SS] = __KERNEL_DS; -+ gdb_regs[_FS] = 0xFFFF; -+ gdb_regs[_GS] = 0xFFFF; -+} /* regs_to_gdb_regs */ -+ -+static void -+gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) -+{ -+ regs->eax = gdb_regs[_EAX]; -+ regs->ebx = gdb_regs[_EBX]; -+ regs->ecx = gdb_regs[_ECX]; -+ regs->edx = gdb_regs[_EDX]; -+ regs->esi = gdb_regs[_ESI]; -+ regs->edi = gdb_regs[_EDI]; -+ regs->ebp = gdb_regs[_EBP]; -+ regs->xds = gdb_regs[_DS]; -+ regs->xes = gdb_regs[_ES]; -+ regs->eflags = gdb_regs[_PS]; -+ regs->xcs = gdb_regs[_CS]; -+ regs->eip = gdb_regs[_PC]; -+ NEW_esp = gdb_regs[_ESP]; /* keep the value */ -+#if 0 /* can't change these */ -+ regs->esp = gdb_regs[_ESP]; -+ regs->xss = gdb_regs[_SS]; -+ regs->fs = gdb_regs[_FS]; -+ regs->gs = gdb_regs[_GS]; -+#endif -+ -+} /* gdb_regs_to_regs */ -+extern void scheduling_functions_start_here(void); -+extern void scheduling_functions_end_here(void); -+#define first_sched ((unsigned long) scheduling_functions_start_here) -+#define last_sched ((unsigned long) scheduling_functions_end_here) -+ -+int thread_list = 0; -+ -+void -+get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) -+{ -+ unsigned long stack_page; -+ int count = 0; -+ IF_SMP(int i); -+ if (!p || p == current) { -+ regs_to_gdb_regs(gdb_regs, regs); -+ return; -+ } -+#ifdef CONFIG_SMP -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ if (p == kgdb_info.cpus_waiting[i].task) { -+ regs_to_gdb_regs(gdb_regs, -+ kgdb_info.cpus_waiting[i].regs); -+ gdb_regs[_ESP] = -+ (int) &kgdb_info.cpus_waiting[i].regs->esp; -+ -+ return; -+ } -+ } -+#endif -+ memset(gdb_regs, 0, NUMREGBYTES); -+ gdb_regs[_ESP] = p->thread.esp; -+ gdb_regs[_PC] = p->thread.eip; -+ gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; -+ gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); -+ gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); -+ -+/* -+ * This code is to give a more informative notion of where a process -+ * is waiting. It is used only when the user asks for a thread info -+ * list. If he then switches to the thread, s/he will find the task -+ * is in schedule, but a back trace should show the same info we come -+ * up with. This code was shamelessly purloined from process.c. It was -+ * then enhanced to provide more registers than simply the program -+ * counter. -+ */ -+ -+ if (!thread_list) { -+ return; -+ } -+ -+ if (p->state == TASK_RUNNING) -+ return; -+ stack_page = (unsigned long) p->thread_info; -+ if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > 8188 + stack_page) -+ return; -+ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ -+ do { -+ if (gdb_regs[_EBP] < stack_page || -+ gdb_regs[_EBP] > 8184 + stack_page) -+ return; -+ gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); -+ gdb_regs[_ESP] = gdb_regs[_EBP] + 8; -+ gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; -+ if (gdb_regs[_PC] < first_sched || gdb_regs[_PC] >= last_sched) -+ return; -+ } while (count++ < 16); -+ return; -+} -+ -+/* Indicate to caller of mem2hex or hex2mem that there has been an -+ error. */ -+static volatile int mem_err = 0; -+static volatile int mem_err_expected = 0; -+static volatile int mem_err_cnt = 0; -+static int garbage_loc = -1; -+ -+int -+get_char(char *addr) -+{ -+ return *addr; -+} -+ -+void -+set_char(char *addr, int val, int may_fault) -+{ -+ /* -+ * This code traps references to the area mapped to the kernel -+ * stack as given by the regs and, instead, stores to the -+ * fn_call_lookaside[cpu].array -+ */ -+ if (may_fault && -+ (unsigned int) addr < OLD_esp && -+ ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { -+ addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); -+ } -+ *addr = val; -+} -+ -+/* convert the memory pointed to by mem into hex, placing result in buf */ -+/* return a pointer to the last char put in buf (null) */ -+/* If MAY_FAULT is non-zero, then we should set mem_err in response to -+ a fault; if zero treat a fault like any other fault in the stub. */ -+char * -+mem2hex(char *mem, char *buf, int count, int may_fault) -+{ -+ int i; -+ unsigned char ch; -+ -+ if (may_fault) { -+ mem_err_expected = 1; -+ mem_err = 0; -+ } -+ for (i = 0; i < count; i++) { -+ /* printk("%lx = ", mem) ; */ -+ -+ ch = get_char(mem++); -+ -+ /* printk("%02x\n", ch & 0xFF) ; */ -+ if (may_fault && mem_err) { -+ if (remote_debug) -+ printk("Mem fault fetching from addr %lx\n", -+ (long) (mem - 1)); -+ *buf = 0; /* truncate buffer */ -+ return (buf); -+ } -+ *buf++ = hexchars[ch >> 4]; -+ *buf++ = hexchars[ch % 16]; -+ } -+ *buf = 0; -+ if (may_fault) -+ mem_err_expected = 0; -+ return (buf); -+} -+ -+/* convert the hex array pointed to by buf into binary to be placed in mem */ -+/* return a pointer to the character AFTER the last byte written */ -+/* NOTE: We use the may fault flag to also indicate if the write is to -+ * the registers (0) or "other" memory (!=0) -+ */ -+char * -+hex2mem(char *buf, char *mem, int count, int may_fault) -+{ -+ int i; -+ unsigned char ch; -+ -+ if (may_fault) { -+ mem_err_expected = 1; -+ mem_err = 0; -+ } -+ for (i = 0; i < count; i++) { -+ ch = hex(*buf++) << 4; -+ ch = ch + hex(*buf++); -+ set_char(mem++, ch, may_fault); -+ -+ if (may_fault && mem_err) { -+ if (remote_debug) -+ printk("Mem fault storing to addr %lx\n", -+ (long) (mem - 1)); -+ return (mem); -+ } -+ } -+ if (may_fault) -+ mem_err_expected = 0; -+ return (mem); -+} -+ -+/**********************************************/ -+/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ -+/* RETURN NUMBER OF CHARS PROCESSED */ -+/**********************************************/ -+int -+hexToInt(char **ptr, int *intValue) -+{ -+ int numChars = 0; -+ int hexValue; -+ -+ *intValue = 0; -+ -+ while (**ptr) { -+ hexValue = hex(**ptr); -+ if (hexValue >= 0) { -+ *intValue = (*intValue << 4) | hexValue; -+ numChars++; -+ } else -+ break; -+ -+ (*ptr)++; -+ } -+ -+ return (numChars); -+} -+ -+#define stubhex(h) hex(h) -+ -+static int -+stub_unpack_int(char *buff, int fieldlength) -+{ -+ int nibble; -+ int retval = 0; -+ -+ while (fieldlength) { -+ nibble = stubhex(*buff++); -+ retval |= nibble; -+ fieldlength--; -+ if (fieldlength) -+ retval = retval << 4; -+ } -+ return retval; -+} -+ -+static char * -+pack_hex_byte(char *pkt, int byte) -+{ -+ *pkt++ = hexchars[(byte >> 4) & 0xf]; -+ *pkt++ = hexchars[(byte & 0xf)]; -+ return pkt; -+} -+ -+#define BUF_THREAD_ID_SIZE 16 -+ -+static char * -+pack_threadid(char *pkt, threadref * id) -+{ -+ char *limit; -+ unsigned char *altid; -+ -+ altid = (unsigned char *) id; -+ limit = pkt + BUF_THREAD_ID_SIZE; -+ while (pkt < limit) -+ pkt = pack_hex_byte(pkt, *altid++); -+ return pkt; -+} -+ -+static char * -+unpack_byte(char *buf, int *value) -+{ -+ *value = stub_unpack_int(buf, 2); -+ return buf + 2; -+} -+ -+static char * -+unpack_threadid(char *inbuf, threadref * id) -+{ -+ char *altref; -+ char *limit = inbuf + BUF_THREAD_ID_SIZE; -+ int x, y; -+ -+ altref = (char *) id; -+ -+ while (inbuf < limit) { -+ x = stubhex(*inbuf++); -+ y = stubhex(*inbuf++); -+ *altref++ = (x << 4) | y; -+ } -+ return inbuf; -+} -+ -+void -+int_to_threadref(threadref * id, int value) -+{ -+ unsigned char *scan; -+ -+ scan = (unsigned char *) id; -+ { -+ int i = 4; -+ while (i--) -+ *scan++ = 0; -+ } -+ *scan++ = (value >> 24) & 0xff; -+ *scan++ = (value >> 16) & 0xff; -+ *scan++ = (value >> 8) & 0xff; -+ *scan++ = (value & 0xff); -+} -+ -+static int -+threadref_to_int(threadref * ref) -+{ -+ int i, value = 0; -+ unsigned char *scan; -+ -+ scan = (char *) ref; -+ scan += 4; -+ i = 4; -+ while (i-- > 0) -+ value = (value << 8) | ((*scan++) & 0xff); -+ return value; -+} -+ -+#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ -+extern struct task_struct *kgdb_get_idle(int cpu); -+#define idle_task(cpu) kgdb_get_idle(cpu) -+#else -+#define idle_task(cpu) init_tasks[cpu] -+#endif -+ -+struct task_struct * -+getthread(int pid) -+{ -+ struct task_struct *thread; -+ if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { -+ -+ return idle_task(pid - PID_MAX); -+ } else { -+ /* -+ * find_task_by_pid is relatively safe all the time -+ * Other pid functions require lock downs which imply -+ * that we may be interrupting them (as we get here -+ * in the middle of most any lock down) -+ */ -+ thread = find_task_by_pid(pid); -+ if (thread) { -+ return thread; -+ } -+ } -+ return NULL; -+} -+/* *INDENT-OFF* */ -+struct hw_breakpoint { -+ unsigned enabled; -+ unsigned type; -+ unsigned len; -+ unsigned addr; -+} breakinfo[4] = { {enabled:0}, -+ {enabled:0}, -+ {enabled:0}, -+ {enabled:0}}; -+/* *INDENT-ON* */ -+unsigned hw_breakpoint_status; -+void -+correct_hw_break(void) -+{ -+ int breakno; -+ int correctit; -+ int breakbit; -+ unsigned dr7; -+ -+ asm volatile ("movl %%db7, %0\n":"=r" (dr7) -+ :); -+ /* *INDENT-OFF* */ -+ do { -+ unsigned addr0, addr1, addr2, addr3; -+ asm volatile ("movl %%db0, %0\n" -+ "movl %%db1, %1\n" -+ "movl %%db2, %2\n" -+ "movl %%db3, %3\n" -+ :"=r" (addr0), "=r"(addr1), -+ "=r"(addr2), "=r"(addr3) -+ :); -+ } while (0); -+ /* *INDENT-ON* */ -+ correctit = 0; -+ for (breakno = 0; breakno < 3; breakno++) { -+ breakbit = 2 << (breakno << 1); -+ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { -+ correctit = 1; -+ dr7 |= breakbit; -+ dr7 &= ~(0xf0000 << (breakno << 2)); -+ dr7 |= (((breakinfo[breakno].len << 2) | -+ breakinfo[breakno].type) << 16) << -+ (breakno << 2); -+ switch (breakno) { -+ case 0: -+ asm volatile ("movl %0, %%dr0\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ -+ case 1: -+ asm volatile ("movl %0, %%dr1\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ -+ case 2: -+ asm volatile ("movl %0, %%dr2\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ -+ case 3: -+ asm volatile ("movl %0, %%dr3\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ } -+ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { -+ correctit = 1; -+ dr7 &= ~breakbit; -+ dr7 &= ~(0xf0000 << (breakno << 2)); -+ } -+ } -+ if (correctit) { -+ asm volatile ("movl %0, %%db7\n"::"r" (dr7)); -+ } -+} -+ -+int -+remove_hw_break(unsigned breakno) -+{ -+ if (!breakinfo[breakno].enabled) { -+ return -1; -+ } -+ breakinfo[breakno].enabled = 0; -+ return 0; -+} -+ -+int -+set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) -+{ -+ if (breakinfo[breakno].enabled) { -+ return -1; -+ } -+ breakinfo[breakno].enabled = 1; -+ breakinfo[breakno].type = type; -+ breakinfo[breakno].len = len; -+ breakinfo[breakno].addr = addr; -+ return 0; -+} -+ -+#ifdef CONFIG_SMP -+static int in_kgdb_console = 0; -+ -+int -+in_kgdb(struct pt_regs *regs) -+{ -+ unsigned flags; -+ int cpu = smp_processor_id(); -+ in_kgdb_called = 1; -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ -+ in_kgdb_console) { /* or we are doing slow i/o */ -+ return 1; -+ } -+ return 0; -+ } -+ -+ /* As I see it the only reason not to let all cpus spin on -+ * the same spin_lock is to allow selected ones to proceed. -+ * This would be a good thing, so we leave it this way. -+ * Maybe someday.... Done ! -+ -+ * in_kgdb() is called from an NMI so we don't pretend -+ * to have any resources, like printk() for example. -+ */ -+ -+ kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ -+ /* -+ * log arival of this cpu -+ * The NMI keeps on ticking. Protect against recurring more -+ * than once, and ignor the cpu that has the kgdb lock -+ */ -+ in_kgdb_entry_log[cpu]++; -+ in_kgdb_here_log[cpu] = regs; -+ if (cpu == spinlock_cpu || waiting_cpus[cpu].task) { -+ goto exit_in_kgdb; -+ } -+ /* -+ * For protection of the initilization of the spin locks by kgdb -+ * it locks the kgdb spinlock before it gets the wait locks set -+ * up. We wait here for the wait lock to be taken. If the -+ * kgdb lock goes away first?? Well, it could be a slow exit -+ * sequence where the wait lock is removed prior to the kgdb lock -+ * so if kgdb gets unlocked, we just exit. -+ */ -+ while (spin_is_locked(&kgdb_spinlock) && -+ !spin_is_locked(waitlocks + cpu)) ; -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ goto exit_in_kgdb; -+ } -+ waiting_cpus[cpu].task = current; -+ waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); -+ waiting_cpus[cpu].regs = regs; -+ -+ spin_unlock_wait(waitlocks + cpu); -+ /* -+ * log departure of this cpu -+ */ -+ waiting_cpus[cpu].task = 0; -+ waiting_cpus[cpu].pid = 0; -+ waiting_cpus[cpu].regs = 0; -+ correct_hw_break(); -+ exit_in_kgdb: -+ in_kgdb_here_log[cpu] = 0; -+ kgdb_local_irq_restore(flags); -+ return 1; -+ /* -+ spin_unlock(continuelocks + smp_processor_id()); -+ */ -+} -+ -+void -+smp__in_kgdb(struct pt_regs regs) -+{ -+ ack_APIC_irq(); -+ in_kgdb(®s); -+} -+#else -+int -+in_kgdb(struct pt_regs *regs) -+{ -+ return (kgdb_spinlock); -+} -+#endif -+ -+void -+printexceptioninfo(int exceptionNo, int errorcode, char *buffer) -+{ -+ unsigned dr6; -+ int i; -+ switch (exceptionNo) { -+ case 1: /* debug exception */ -+ break; -+ case 3: /* breakpoint */ -+ sprintf(buffer, "Software breakpoint"); -+ return; -+ default: -+ sprintf(buffer, "Details not available"); -+ return; -+ } -+ asm volatile ("movl %%db6, %0\n":"=r" (dr6) -+ :); -+ if (dr6 & 0x4000) { -+ sprintf(buffer, "Single step"); -+ return; -+ } -+ for (i = 0; i < 4; ++i) { -+ if (dr6 & (1 << i)) { -+ sprintf(buffer, "Hardware breakpoint %d", i); -+ return; -+ } -+ } -+ sprintf(buffer, "Unknown trap"); -+ return; -+} -+ -+/* -+ * This function does all command procesing for interfacing to gdb. -+ * -+ * NOTE: The INT nn instruction leaves the state of the interrupt -+ * enable flag UNCHANGED. That means that when this routine -+ * is entered via a breakpoint (INT 3) instruction from code -+ * that has interrupts enabled, then interrupts will STILL BE -+ * enabled when this routine is entered. The first thing that -+ * we do here is disable interrupts so as to prevent recursive -+ * entries and bothersome serial interrupts while we are -+ * trying to run the serial port in polled mode. -+ * -+ * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so -+ * it is always necessary to do a restore_flags before returning -+ * so as to let go of that lock. -+ */ -+int -+kgdb_handle_exception(int exceptionVector, -+ int signo, int err_code, struct pt_regs *linux_regs) -+{ -+ struct task_struct *usethread = NULL; -+ struct task_struct *thread_list_start = 0, *thread = NULL; -+ int addr, length; -+ int breakno, breaktype; -+ char *ptr; -+ int newPC; -+ threadref thref; -+ int threadid; -+ int thread_min = PID_MAX + MAX_NO_CPUS; -+ int maxthreads; -+ int nothreads; -+ unsigned long flags; -+ int gdb_regs[NUMREGBYTES / 4]; -+ int dr6; -+ IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ -+#define NO_NMI 1 -+#define NO_SYNC 2 -+#define regs (*linux_regs) -+#define NUMREGS NUMREGBYTES/4 -+ /* -+ * If the entry is not from the kernel then return to the Linux -+ * trap handler and let it process the interrupt normally. -+ */ -+ if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { -+ printk("ignoring non-kernel exception\n"); -+ print_regs(®s); -+ return (0); -+ } -+ -+ kgdb_local_irq_save(flags); -+ -+ /* Get kgdb spinlock */ -+ -+ KGDB_SPIN_LOCK(&kgdb_spinlock); -+ rdtscll(kgdb_info.entry_tsc); -+ /* -+ * We depend on this spinlock and the NMI watch dog to control the -+ * other cpus. They will arrive at "in_kgdb()" as a result of the -+ * NMI and will wait there for the following spin locks to be -+ * released. -+ */ -+#ifdef CONFIG_SMP -+ -+ if (cpu_callout_map & ~MAX_CPU_MASK) { -+ printk("kgdb : too many cpus, possibly not mapped" -+ " in contiguous space, change MAX_NO_CPUS" -+ " in kgdb_stub and make new kernel.\n" -+ " cpu_callout_map is %lx\n", cpu_callout_map); -+ goto exit_just_unlock; -+ } -+ -+ if (spinlock_count == 1) { -+ int time, end_time, dum; -+ int i; -+ int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) -+ }; -+ if (remote_debug) { -+ printk("kgdb : cpu %d entry, syncing others\n", -+ smp_processor_id()); -+ } -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ /* -+ * Use trylock as we may already hold the lock if -+ * we are holding the cpu. Net result is all -+ * locked. -+ */ -+ spin_trylock(&waitlocks[i]); -+ } -+ for (i = 0; i < MAX_NO_CPUS; i++) -+ cpu_logged_in[i] = 0; -+ /* -+ * Wait for their arrival. We know the watch dog is active if -+ * in_kgdb() has ever been called, as it is always called on a -+ * watchdog tick. -+ */ -+ rdtsc(dum, time); -+ end_time = time + 2; /* Note: we use the High order bits! */ -+ i = 1; -+ if (num_online_cpus() > 1) { -+ int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; -+ smp_send_nmi_allbutself(); -+ while (i < num_online_cpus() && time != end_time) { -+ int j; -+ for (j = 0; j < MAX_NO_CPUS; j++) { -+ if (waiting_cpus[j].task && -+ !cpu_logged_in[j]) { -+ i++; -+ cpu_logged_in[j] = 1; -+ if (remote_debug) { -+ printk -+ ("kgdb : cpu %d arrived at kgdb\n", -+ j); -+ } -+ break; -+ } else if (!waiting_cpus[j].task && -+ !cpu_online(j)) { -+ waiting_cpus[j].task = NOCPU; -+ cpu_logged_in[j] = 1; -+ waiting_cpus[j].hold = 1; -+ break; -+ } -+ if (!waiting_cpus[j].task && -+ in_kgdb_here_log[j]) { -+ -+ int wait = 100000; -+ while (wait--) ; -+ if (!waiting_cpus[j].task && -+ in_kgdb_here_log[j]) { -+ printk -+ ("kgdb : cpu %d stall" -+ " in in_kgdb\n", -+ j); -+ i++; -+ cpu_logged_in[j] = 1; -+ waiting_cpus[j].task = -+ (struct task_struct -+ *) 1; -+ } -+ } -+ } -+ -+ if (in_kgdb_entry_log[smp_processor_id()] > -+ (me_in_kgdb + 10)) { -+ break; -+ } -+ -+ rdtsc(dum, time); -+ } -+ if (i < num_online_cpus()) { -+ printk -+ ("kgdb : time out, proceeding without sync\n"); -+#if 0 -+ printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", -+ waiting_cpus[0].task != 0, -+ waiting_cpus[1].task != 0); -+ printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", -+ cpu_logged_in[0], cpu_logged_in[1]); -+ printk -+ ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", -+ in_kgdb_here_log[0] != 0, -+ in_kgdb_here_log[1] != 0); -+#endif -+ entry_state = NO_SYNC; -+ } else { -+#if 0 -+ int ent = -+ in_kgdb_entry_log[smp_processor_id()] - -+ me_in_kgdb; -+ printk("kgdb : sync after %d entries\n", ent); -+#endif -+ } -+ } else { -+ if (remote_debug) { -+ printk -+ ("kgdb : %d cpus, but watchdog not active\n" -+ "proceeding without locking down other cpus\n", -+ num_online_cpus()); -+ entry_state = NO_NMI; -+ } -+ } -+ } -+#endif -+ -+ if (remote_debug) { -+ unsigned long *lp = (unsigned long *) &linux_regs; -+ -+ printk("handle_exception(exceptionVector=%d, " -+ "signo=%d, err_code=%d, linux_regs=%p)\n", -+ exceptionVector, signo, err_code, linux_regs); -+ if (debug_regs) { -+ print_regs(®s); -+ printk("Stk: %8lx %8lx %8lx %8lx" -+ " %8lx %8lx %8lx %8lx\n", -+ lp[0], lp[1], lp[2], lp[3], -+ lp[4], lp[5], lp[6], lp[7]); -+ printk(" %8lx %8lx %8lx %8lx" -+ " %8lx %8lx %8lx %8lx\n", -+ lp[8], lp[9], lp[10], lp[11], -+ lp[12], lp[13], lp[14], lp[15]); -+ printk(" %8lx %8lx %8lx %8lx " -+ "%8lx %8lx %8lx %8lx\n", -+ lp[16], lp[17], lp[18], lp[19], -+ lp[20], lp[21], lp[22], lp[23]); -+ printk(" %8lx %8lx %8lx %8lx " -+ "%8lx %8lx %8lx %8lx\n", -+ lp[24], lp[25], lp[26], lp[27], -+ lp[28], lp[29], lp[30], lp[31]); -+ } -+ } -+ -+ /* Disable hardware debugging while we are in kgdb */ -+ /* Get the debug register status register */ -+/* *INDENT-OFF* */ -+ __asm__("movl %0,%%db7" -+ : /* no output */ -+ :"r"(0)); -+ -+ asm volatile ("movl %%db6, %0\n" -+ :"=r" (hw_breakpoint_status) -+ :); -+ -+/* *INDENT-ON* */ -+ switch (exceptionVector) { -+ case 0: /* divide error */ -+ case 1: /* debug exception */ -+ case 2: /* NMI */ -+ case 3: /* breakpoint */ -+ case 4: /* overflow */ -+ case 5: /* bounds check */ -+ case 6: /* invalid opcode */ -+ case 7: /* device not available */ -+ case 8: /* double fault (errcode) */ -+ case 10: /* invalid TSS (errcode) */ -+ case 12: /* stack fault (errcode) */ -+ case 16: /* floating point error */ -+ case 17: /* alignment check (errcode) */ -+ default: /* any undocumented */ -+ break; -+ case 11: /* segment not present (errcode) */ -+ case 13: /* general protection (errcode) */ -+ case 14: /* page fault (special errcode) */ -+ case 19: /* cache flush denied */ -+ if (mem_err_expected) { -+ /* -+ * This fault occured because of the -+ * get_char or set_char routines. These -+ * two routines use either eax of edx to -+ * indirectly reference the location in -+ * memory that they are working with. -+ * For a page fault, when we return the -+ * instruction will be retried, so we -+ * have to make sure that these -+ * registers point to valid memory. -+ */ -+ mem_err = 1; /* set mem error flag */ -+ mem_err_expected = 0; -+ mem_err_cnt++; /* helps in debugging */ -+ /* make valid address */ -+ regs.eax = (long) &garbage_loc; -+ /* make valid address */ -+ regs.edx = (long) &garbage_loc; -+ if (remote_debug) -+ printk("Return after memory error: " -+ "mem_err_cnt=%d\n", mem_err_cnt); -+ if (debug_regs) -+ print_regs(®s); -+ goto exit_kgdb; -+ } -+ break; -+ } -+ if (remote_debug) -+ printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); -+ -+ gdb_i386vector = exceptionVector; -+ gdb_i386errcode = err_code; -+ kgdb_info.called_from = __builtin_return_address(0); -+#ifdef CONFIG_SMP -+ /* -+ * OK, we can now communicate, lets tell gdb about the sync. -+ * but only if we had a problem. -+ */ -+ switch (entry_state) { -+ case NO_NMI: -+ to_gdb("NMI not active, other cpus not stopped\n"); -+ break; -+ case NO_SYNC: -+ to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); -+ default:; -+ } -+ -+#endif -+/* -+ * Set up the gdb function call area. -+ */ -+ trap_cpu = smp_processor_id(); -+ OLD_esp = NEW_esp = (int) (&linux_regs->esp); -+ -+ IF_SMP(once_again:) -+ /* reply to host that an exception has occurred */ -+ remcomOutBuffer[0] = 'S'; -+ remcomOutBuffer[1] = hexchars[signo >> 4]; -+ remcomOutBuffer[2] = hexchars[signo % 16]; -+ remcomOutBuffer[3] = 0; -+ -+ putpacket(remcomOutBuffer); -+ -+ while (1 == 1) { -+ error = 0; -+ remcomOutBuffer[0] = 0; -+ getpacket(remcomInBuffer); -+ switch (remcomInBuffer[0]) { -+ case '?': -+ remcomOutBuffer[0] = 'S'; -+ remcomOutBuffer[1] = hexchars[signo >> 4]; -+ remcomOutBuffer[2] = hexchars[signo % 16]; -+ remcomOutBuffer[3] = 0; -+ break; -+ case 'd': -+ remote_debug = !(remote_debug); /* toggle debug flag */ -+ printk("Remote debug %s\n", -+ remote_debug ? "on" : "off"); -+ break; -+ case 'g': /* return the value of the CPU registers */ -+ get_gdb_regs(usethread, ®s, gdb_regs); -+ mem2hex((char *) gdb_regs, -+ remcomOutBuffer, NUMREGBYTES, 0); -+ break; -+ case 'G': /* set the value of the CPU registers - return OK */ -+ hex2mem(&remcomInBuffer[1], -+ (char *) gdb_regs, NUMREGBYTES, 0); -+ if (!usethread || usethread == current) { -+ gdb_regs_to_regs(gdb_regs, ®s); -+ strcpy(remcomOutBuffer, "OK"); -+ } else { -+ strcpy(remcomOutBuffer, "E00"); -+ } -+ break; -+ -+ case 'P':{ /* set the value of a single CPU register - -+ return OK */ -+ /* -+ * For some reason, gdb wants to talk about psudo -+ * registers (greater than 15). These may have -+ * meaning for ptrace, but for us it is safe to -+ * ignor them. We do this by dumping them into -+ * _GS which we also ignor, but do have memory for. -+ */ -+ int regno; -+ -+ ptr = &remcomInBuffer[1]; -+ regs_to_gdb_regs(gdb_regs, ®s); -+ if ((!usethread || usethread == current) && -+ hexToInt(&ptr, ®no) && -+ *ptr++ == '=' && (regno >= 0)) { -+ regno = -+ (regno >= NUMREGS ? _GS : regno); -+ hex2mem(ptr, (char *) &gdb_regs[regno], -+ 4, 0); -+ gdb_regs_to_regs(gdb_regs, ®s); -+ strcpy(remcomOutBuffer, "OK"); -+ break; -+ } -+ strcpy(remcomOutBuffer, "E01"); -+ break; -+ } -+ -+ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ -+ case 'm': -+ /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ -+ ptr = &remcomInBuffer[1]; -+ if (hexToInt(&ptr, &addr) && -+ (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { -+ ptr = 0; -+ /* -+ * hex doubles the byte count -+ */ -+ if (length > (BUFMAX / 2)) -+ length = BUFMAX / 2; -+ mem2hex((char *) addr, -+ remcomOutBuffer, length, 1); -+ if (mem_err) { -+ strcpy(remcomOutBuffer, "E03"); -+ debug_error("memory fault\n", NULL); -+ } -+ } -+ -+ if (ptr) { -+ strcpy(remcomOutBuffer, "E01"); -+ debug_error -+ ("malformed read memory command: %s\n", -+ remcomInBuffer); -+ } -+ break; -+ -+ /* MAA..AA,LLLL: -+ Write LLLL bytes at address AA.AA return OK */ -+ case 'M': -+ /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ -+ ptr = &remcomInBuffer[1]; -+ if (hexToInt(&ptr, &addr) && -+ (*(ptr++) == ',') && -+ (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { -+ hex2mem(ptr, (char *) addr, length, 1); -+ -+ if (mem_err) { -+ strcpy(remcomOutBuffer, "E03"); -+ debug_error("memory fault\n", NULL); -+ } else { -+ strcpy(remcomOutBuffer, "OK"); -+ } -+ -+ ptr = 0; -+ } -+ if (ptr) { -+ strcpy(remcomOutBuffer, "E02"); -+ debug_error -+ ("malformed write memory command: %s\n", -+ remcomInBuffer); -+ } -+ break; -+ -+ /* cAA..AA Continue at address AA..AA(optional) */ -+ /* sAA..AA Step one instruction from AA..AA(optional) */ -+ /* D detach, reply OK and then continue */ -+ case 'c': -+ case 's': -+ case 'D': -+ -+ /* try to read optional parameter, -+ pc unchanged if no parm */ -+ ptr = &remcomInBuffer[1]; -+ if (hexToInt(&ptr, &addr)) { -+ if (remote_debug) -+ printk("Changing EIP to 0x%x\n", addr); -+ -+ regs.eip = addr; -+ } -+ -+ newPC = regs.eip; -+ -+ /* clear the trace bit */ -+ regs.eflags &= 0xfffffeff; -+ -+ /* set the trace bit if we're stepping */ -+ if (remcomInBuffer[0] == 's') -+ regs.eflags |= 0x100; -+ -+ /* detach is a friendly version of continue. Note that -+ debugging is still enabled (e.g hit control C) -+ until the process that issued an ioctl TIOCGDB -+ terminates -+ */ -+ if (remcomInBuffer[0] == 'D') { -+ strcpy(remcomOutBuffer, "OK"); -+ putpacket(remcomOutBuffer); -+ } -+ -+ if (remote_debug) { -+ printk("Resuming execution\n"); -+ print_regs(®s); -+ } -+ asm volatile ("movl %%db6, %0\n":"=r" (dr6) -+ :); -+ if (!(dr6 & 0x4000)) { -+ for (breakno = 0; breakno < 4; ++breakno) { -+ if (dr6 & (1 << breakno) && -+ (breakinfo[breakno].type == 0)) { -+ /* Set restore flag */ -+ regs.eflags |= 0x10000; -+ break; -+ } -+ } -+ } -+ correct_hw_break(); -+ asm volatile ("movl %0, %%db6\n"::"r" (0)); -+ goto exit_kgdb; -+ -+ /* kill the program */ -+ case 'k': /* do nothing */ -+ break; -+ -+ /* query */ -+ case 'q': -+ switch (remcomInBuffer[1]) { -+ case 'L': -+ /* List threads */ -+ thread_list = 2; -+ thread_list_start = (usethread ? : current); -+ unpack_byte(remcomInBuffer + 3, &maxthreads); -+ unpack_threadid(remcomInBuffer + 5, &thref); -+ do { -+ int buf_thread_limit = -+ (BUFMAX - 22) / BUF_THREAD_ID_SIZE; -+ if (maxthreads > buf_thread_limit) { -+ maxthreads = buf_thread_limit; -+ } -+ } while (0); -+ remcomOutBuffer[0] = 'q'; -+ remcomOutBuffer[1] = 'M'; -+ remcomOutBuffer[4] = '0'; -+ pack_threadid(remcomOutBuffer + 5, &thref); -+ -+ threadid = threadref_to_int(&thref); -+ for (nothreads = 0; -+ nothreads < maxthreads && -+ threadid < PID_MAX + MAX_NO_CPUS; -+ threadid++) { -+ thread = getthread(threadid); -+ if (thread) { -+ int_to_threadref(&thref, -+ threadid); -+ pack_threadid(remcomOutBuffer + -+ 21 + -+ nothreads * 16, -+ &thref); -+ nothreads++; -+ if (thread_min > threadid) -+ thread_min = threadid; -+ } -+ } -+ -+ if (threadid == PID_MAX + MAX_NO_CPUS) { -+ remcomOutBuffer[4] = '1'; -+ } -+ pack_hex_byte(remcomOutBuffer + 2, nothreads); -+ remcomOutBuffer[21 + nothreads * 16] = '\0'; -+ break; -+ -+ case 'C': -+ /* Current thread id */ -+ remcomOutBuffer[0] = 'Q'; -+ remcomOutBuffer[1] = 'C'; -+ threadid = current->pid; -+ if (!threadid) { -+ /* -+ * idle thread -+ */ -+ for (threadid = PID_MAX; -+ threadid < PID_MAX + MAX_NO_CPUS; -+ threadid++) { -+ if (current == -+ idle_task(threadid - -+ PID_MAX)) -+ break; -+ } -+ } -+ int_to_threadref(&thref, threadid); -+ pack_threadid(remcomOutBuffer + 2, &thref); -+ remcomOutBuffer[18] = '\0'; -+ break; -+ -+ case 'E': -+ /* Print exception info */ -+ printexceptioninfo(exceptionVector, -+ err_code, remcomOutBuffer); -+ break; -+ } -+ break; -+ -+ /* task related */ -+ case 'H': -+ switch (remcomInBuffer[1]) { -+ case 'g': -+ ptr = &remcomInBuffer[2]; -+ hexToInt(&ptr, &threadid); -+ thread = getthread(threadid); -+ if (!thread) { -+ remcomOutBuffer[0] = 'E'; -+ remcomOutBuffer[1] = '\0'; -+ break; -+ } -+ /* -+ * Just in case I forget what this is all about, -+ * the "thread info" command to gdb causes it -+ * to ask for a thread list. It then switches -+ * to each thread and asks for the registers. -+ * For this (and only this) usage, we want to -+ * fudge the registers of tasks not on the run -+ * list (i.e. waiting) to show the routine that -+ * called schedule. Also, gdb, is a minimalist -+ * in that if the current thread is the last -+ * it will not re-read the info when done. -+ * This means that in this case we must show -+ * the real registers. So here is how we do it: -+ * Each entry we keep track of the min -+ * thread in the list (the last that gdb will) -+ * get info for. We also keep track of the -+ * starting thread. -+ * "thread_list" is cleared when switching back -+ * to the min thread if it is was current, or -+ * if it was not current, thread_list is set -+ * to 1. When the switch to current comes, -+ * if thread_list is 1, clear it, else do -+ * nothing. -+ */ -+ usethread = thread; -+ if ((thread_list == 1) && -+ (thread == thread_list_start)) { -+ thread_list = 0; -+ } -+ if (thread_list && (threadid == thread_min)) { -+ if (thread == thread_list_start) { -+ thread_list = 0; -+ } else { -+ thread_list = 1; -+ } -+ } -+ /* follow through */ -+ case 'c': -+ remcomOutBuffer[0] = 'O'; -+ remcomOutBuffer[1] = 'K'; -+ remcomOutBuffer[2] = '\0'; -+ break; -+ } -+ break; -+ -+ /* Query thread status */ -+ case 'T': -+ ptr = &remcomInBuffer[1]; -+ hexToInt(&ptr, &threadid); -+ thread = getthread(threadid); -+ if (thread) { -+ remcomOutBuffer[0] = 'O'; -+ remcomOutBuffer[1] = 'K'; -+ remcomOutBuffer[2] = '\0'; -+ if (thread_min > threadid) -+ thread_min = threadid; -+ } else { -+ remcomOutBuffer[0] = 'E'; -+ remcomOutBuffer[1] = '\0'; -+ } -+ break; -+ -+ case 'Y': -+ ptr = &remcomInBuffer[1]; -+ hexToInt(&ptr, &breakno); -+ ptr++; -+ hexToInt(&ptr, &breaktype); -+ ptr++; -+ hexToInt(&ptr, &length); -+ ptr++; -+ hexToInt(&ptr, &addr); -+ if (set_hw_break(breakno & 0x3, -+ breaktype & 0x3, -+ length & 0x3, addr) == 0) { -+ strcpy(remcomOutBuffer, "OK"); -+ } else { -+ strcpy(remcomOutBuffer, "ERROR"); -+ } -+ break; -+ -+ /* Remove hardware breakpoint */ -+ case 'y': -+ ptr = &remcomInBuffer[1]; -+ hexToInt(&ptr, &breakno); -+ if (remove_hw_break(breakno & 0x3) == 0) { -+ strcpy(remcomOutBuffer, "OK"); -+ } else { -+ strcpy(remcomOutBuffer, "ERROR"); -+ } -+ break; -+ -+ case 'r': /* reboot */ -+ strcpy(remcomOutBuffer, "OK"); -+ putpacket(remcomOutBuffer); -+ /*to_gdb("Rebooting\n"); */ -+ /* triplefault no return from here */ -+ { -+ static long no_idt[2]; -+ __asm__ __volatile__("lidt %0"::"m"(no_idt)); -+ BREAKPOINT; -+ } -+ -+ } /* switch */ -+ -+ /* reply to the request */ -+ putpacket(remcomOutBuffer); -+ } /* while(1==1) */ -+ /* -+ * reached by goto only. -+ */ -+ exit_kgdb: -+ /* -+ * Here is where we set up to trap a gdb function call. NEW_esp -+ * will be changed if we are trying to do this. We handle both -+ * adding and subtracting, thus allowing gdb to put grung on -+ * the stack which it removes later. -+ */ -+ if (NEW_esp != OLD_esp) { -+ int *ptr = END_OF_LOOKASIDE; -+ if (NEW_esp < OLD_esp) -+ ptr -= (OLD_esp - NEW_esp) / sizeof (int); -+ *--ptr = linux_regs->eflags; -+ *--ptr = linux_regs->xcs; -+ *--ptr = linux_regs->eip; -+ *--ptr = linux_regs->ecx; -+ *--ptr = linux_regs->ebx; -+ *--ptr = linux_regs->eax; -+ linux_regs->ecx = NEW_esp - (sizeof (int) * 6); -+ linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; -+ if (NEW_esp < OLD_esp) { -+ linux_regs->eip = (unsigned int) fn_call_stub; -+ } else { -+ linux_regs->eip = (unsigned int) fn_rtn_stub; -+ linux_regs->eax = NEW_esp; -+ } -+ linux_regs->eflags &= ~(IF_BIT | TF_BIT); -+ } -+#ifdef CONFIG_SMP -+ /* -+ * Release gdb wait locks -+ * Sanity check time. Must have at least one cpu to run. Also single -+ * step must not be done if the current cpu is on hold. -+ */ -+ if (spinlock_count == 1) { -+ int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; -+ int cpu_avail = 0; -+ int i; -+ -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ if (!cpu_online(i)) -+ break; -+ if (!hold_cpu(i)) { -+ cpu_avail = 1; -+ } -+ } -+ /* -+ * Early in the bring up there will be NO cpus on line... -+ */ -+ if (!cpu_avail && cpu_online_map) { -+ to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); -+ goto once_again; -+ } -+ if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { -+ to_gdb -+ ("Current cpu must be unblocked to single step\n"); -+ goto once_again; -+ } -+ if (!(ss_hold)) { -+ int i; -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ if (!hold_cpu(i)) { -+ spin_unlock(&waitlocks[i]); -+ } -+ } -+ } else { -+ spin_unlock(&waitlocks[smp_processor_id()]); -+ } -+ /* Release kgdb spinlock */ -+ KGDB_SPIN_UNLOCK(&kgdb_spinlock); -+ /* -+ * If this cpu is on hold, this is where we -+ * do it. Note, the NMI will pull us out of here, -+ * but will return as the above lock is not held. -+ * We will stay here till another cpu releases the lock for us. -+ */ -+ spin_unlock_wait(waitlocks + smp_processor_id()); -+ kgdb_local_irq_restore(flags); -+ return (0); -+ } -+ exit_just_unlock: -+#endif -+ /* Release kgdb spinlock */ -+ KGDB_SPIN_UNLOCK(&kgdb_spinlock); -+ kgdb_local_irq_restore(flags); -+ return (0); -+} -+ -+/* this function is used to set up exception handlers for tracing and -+ * breakpoints. -+ * This function is not needed as the above line does all that is needed. -+ * We leave it for backward compatitability... -+ */ -+void -+set_debug_traps(void) -+{ -+ /* -+ * linux_debug_hook is defined in traps.c. We store a pointer -+ * to our own exception handler into it. -+ -+ * But really folks, every hear of labeled common, an old Fortran -+ * concept. Lots of folks can reference it and it is define if -+ * anyone does. Only one can initialize it at link time. We do -+ * this with the hook. See the statement above. No need for any -+ * executable code and it is ready as soon as the kernel is -+ * loaded. Very desirable in kernel debugging. -+ -+ linux_debug_hook = handle_exception ; -+ */ -+ -+ /* In case GDB is started before us, ack any packets (presumably -+ "$?#xx") sitting there. -+ putDebugChar ('+'); -+ -+ initialized = 1; -+ */ -+} -+ -+/* This function will generate a breakpoint exception. It is used at the -+ beginning of a program to sync up with a debugger and can be used -+ otherwise as a quick means to stop program execution and "break" into -+ the debugger. */ -+/* But really, just use the BREAKPOINT macro. We will handle the int stuff -+ */ -+ -+#ifdef later -+/* -+ * possibly we should not go thru the traps.c code at all? Someday. -+ */ -+void -+do_kgdb_int3(struct pt_regs *regs, long error_code) -+{ -+ kgdb_handle_exception(3, 5, error_code, regs); -+ return; -+} -+#endif -+#undef regs -+#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS -+asmlinkage void -+bad_sys_call_exit(int stuff) -+{ -+ struct pt_regs *regs = (struct pt_regs *) &stuff; -+ printk("Sys call %d return with %x preempt_count\n", -+ (int) regs->orig_eax, preempt_count()); -+} -+#endif -+#ifdef CONFIG_STACK_OVERFLOW_TEST -+#include -+asmlinkage void -+stack_overflow(void) -+{ -+#ifdef BREAKPOINT -+ BREAKPOINT; -+#else -+ printk("Kernel stack overflow, looping forever\n"); -+#endif -+ while (1) { -+ } -+} -+#endif -+ -+#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) -+char gdbconbuf[BUFMAX]; -+ -+static void -+kgdb_gdb_message(const char *s, unsigned count) -+{ -+ int i; -+ int wcount; -+ char *bufptr; -+ /* -+ * This takes care of NMI while spining out chars to gdb -+ */ -+ IF_SMP(in_kgdb_console = 1); -+ gdbconbuf[0] = 'O'; -+ bufptr = gdbconbuf + 1; -+ while (count > 0) { -+ if ((count << 1) > (BUFMAX - 2)) { -+ wcount = (BUFMAX - 2) >> 1; -+ } else { -+ wcount = count; -+ } -+ count -= wcount; -+ for (i = 0; i < wcount; i++) { -+ bufptr = pack_hex_byte(bufptr, s[i]); -+ } -+ *bufptr = '\0'; -+ s += wcount; -+ -+ putpacket(gdbconbuf); -+ -+ } -+ IF_SMP(in_kgdb_console = 0); -+} -+#endif -+#ifdef CONFIG_SMP -+static void -+to_gdb(const char *s) -+{ -+ int count = 0; -+ while (s[count] && (count++ < BUFMAX)) ; -+ kgdb_gdb_message(s, count); -+} -+#endif -+#ifdef CONFIG_KGDB_CONSOLE -+#include -+#include -+#include -+#include -+#include -+ -+void -+kgdb_console_write(struct console *co, const char *s, unsigned count) -+{ -+ -+ if (gdb_i386vector == -1) { -+ /* -+ * We have not yet talked to gdb. What to do... -+ * lets break, on continue we can do the write. -+ * But first tell him whats up. Uh, well no can do, -+ * as this IS the console. Oh well... -+ * We do need to wait or the messages will be lost. -+ * Other option would be to tell the above code to -+ * ignore this breakpoint and do an auto return, -+ * but that might confuse gdb. Also this happens -+ * early enough in boot up that we don't have the traps -+ * set up yet, so... -+ */ -+ breakpoint(); -+ } -+ kgdb_gdb_message(s, count); -+} -+ -+/* -+ * ------------------------------------------------------------ -+ * Serial KGDB driver -+ * ------------------------------------------------------------ -+ */ -+ -+static struct console kgdbcons = { -+ name:"kgdb", -+ write:kgdb_console_write, -+#ifdef CONFIG_KGDB_USER_CONSOLE -+ device:kgdb_console_device, -+#endif -+ flags:CON_PRINTBUFFER | CON_ENABLED, -+ index:-1, -+}; -+ -+/* -+ * The trick here is that this file gets linked before printk.o -+ * That means we get to peer at the console info in the command -+ * line before it does. If we are up, we register, otherwise, -+ * do nothing. By returning 0, we allow printk to look also. -+ */ -+static int kgdb_console_enabled; -+ -+int __init -+kgdb_console_init(char *str) -+{ -+ if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { -+ register_console(&kgdbcons); -+ kgdb_console_enabled = 1; -+ } -+ return 0; /* let others look at the string */ -+} -+ -+__setup("console=", kgdb_console_init); -+ -+#ifdef CONFIG_KGDB_USER_CONSOLE -+static kdev_t kgdb_console_device(struct console *c); -+/* This stuff sort of works, but it knocks out telnet devices -+ * we are leaving it here in case we (or you) find time to figure it out -+ * better.. -+ */ -+ -+/* -+ * We need a real char device as well for when the console is opened for user -+ * space activities. -+ */ -+ -+static int -+kgdb_consdev_open(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static ssize_t -+kgdb_consdev_write(struct file *file, const char *buf, -+ size_t count, loff_t * ppos) -+{ -+ int size, ret = 0; -+ static char kbuf[128]; -+ static DECLARE_MUTEX(sem); -+ -+ /* We are not reentrant... */ -+ if (down_interruptible(&sem)) -+ return -ERESTARTSYS; -+ -+ while (count > 0) { -+ /* need to copy the data from user space */ -+ size = count; -+ if (size > sizeof (kbuf)) -+ size = sizeof (kbuf); -+ if (copy_from_user(kbuf, buf, size)) { -+ ret = -EFAULT; -+ break;; -+ } -+ kgdb_console_write(&kgdbcons, kbuf, size); -+ count -= size; -+ ret += size; -+ buf += size; -+ } -+ -+ up(&sem); -+ -+ return ret; -+} -+ -+struct file_operations kgdb_consdev_fops = { -+ open:kgdb_consdev_open, -+ write:kgdb_consdev_write -+}; -+static kdev_t -+kgdb_console_device(struct console *c) -+{ -+ return MKDEV(TTYAUX_MAJOR, 1); -+} -+ -+/* -+ * This routine gets called from the serial stub in the i386/lib -+ * This is so it is done late in bring up (just before the console open). -+ */ -+void -+kgdb_console_finit(void) -+{ -+ if (kgdb_console_enabled) { -+ char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); -+ char *cp = cptr; -+ while (*cptr && *cptr != '(') -+ cptr++; -+ *cptr = 0; -+ unregister_chrdev(TTYAUX_MAJOR, cp); -+ register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); -+ } -+} -+#endif -+#endif -+#ifdef CONFIG_KGDB_TS -+#include /* time stamp code */ -+#include /* in_interrupt */ -+#ifdef CONFIG_KGDB_TS_64 -+#define DATA_POINTS 64 -+#endif -+#ifdef CONFIG_KGDB_TS_128 -+#define DATA_POINTS 128 -+#endif -+#ifdef CONFIG_KGDB_TS_256 -+#define DATA_POINTS 256 -+#endif -+#ifdef CONFIG_KGDB_TS_512 -+#define DATA_POINTS 512 -+#endif -+#ifdef CONFIG_KGDB_TS_1024 -+#define DATA_POINTS 1024 -+#endif -+#ifndef DATA_POINTS -+#define DATA_POINTS 128 /* must be a power of two */ -+#endif -+#define INDEX_MASK (DATA_POINTS - 1) -+#if (INDEX_MASK & DATA_POINTS) -+#error "CONFIG_KGDB_TS_COUNT must be a power of 2" -+#endif -+struct kgdb_and_then_struct { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ struct task_struct *task; -+ long long at_time; -+ int from_ln; -+ char *in_src; -+ void *from; -+ int *with_shpf; -+ int data0; -+ int data1; -+}; -+struct kgdb_and_then_struct2 { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ struct task_struct *task; -+ long long at_time; -+ int from_ln; -+ char *in_src; -+ void *from; -+ int *with_shpf; -+ struct task_struct *t1; -+ struct task_struct *t2; -+}; -+struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; -+ -+struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; -+int kgdb_and_then_count; -+ -+void -+kgdb_tstamp(int line, char *source, int data0, int data1) -+{ -+ static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; -+ int flags; -+ kgdb_local_irq_save(flags); -+ spin_lock(&ts_spin); -+ rdtscll(kgdb_and_then->at_time); -+#ifdef CONFIG_SMP -+ kgdb_and_then->on_cpu = smp_processor_id(); -+#endif -+ kgdb_and_then->task = current; -+ kgdb_and_then->from_ln = line; -+ kgdb_and_then->in_src = source; -+ kgdb_and_then->from = __builtin_return_address(0); -+ kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | -+ (preempt_count() << 8)); -+ kgdb_and_then->data0 = data0; -+ kgdb_and_then->data1 = data1; -+ kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; -+ spin_unlock(&ts_spin); -+ kgdb_local_irq_restore(flags); -+#ifdef CONFIG_PREEMPT -+ -+#endif -+ return; -+} -+#endif -+typedef int gdb_debug_hook(int exceptionVector, -+ int signo, int err_code, struct pt_regs *linux_regs); -+gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ -diff -puN arch/i386/kernel/Makefile~kgdb-ga arch/i386/kernel/Makefile ---- 25/arch/i386/kernel/Makefile~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/arch/i386/kernel/Makefile 2003-06-25 23:14:17.000000000 -0700 -@@ -14,6 +14,7 @@ obj-y += timers/ - obj-$(CONFIG_ACPI) += acpi/ - obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o - obj-$(CONFIG_MCA) += mca.o -+obj-$(CONFIG_KGDB) += kgdb_stub.o - obj-$(CONFIG_X86_MSR) += msr.o - obj-$(CONFIG_X86_CPUID) += cpuid.o - obj-$(CONFIG_MICROCODE) += microcode.o -diff -puN arch/i386/kernel/nmi.c~kgdb-ga arch/i386/kernel/nmi.c ---- 25/arch/i386/kernel/nmi.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/arch/i386/kernel/nmi.c 2003-06-25 23:14:17.000000000 -0700 -@@ -31,7 +31,17 @@ - #include - #include - -+#ifdef CONFIG_KGDB -+#include -+#ifdef CONFIG_SMP -+unsigned int nmi_watchdog = NMI_IO_APIC; -+#else -+unsigned int nmi_watchdog = NMI_LOCAL_APIC; -+#endif -+#else - unsigned int nmi_watchdog = NMI_NONE; -+#endif -+ - static unsigned int nmi_hz = HZ; - unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ - extern void show_registers(struct pt_regs *regs); -@@ -398,6 +408,9 @@ void touch_nmi_watchdog (void) - for (i = 0; i < NR_CPUS; i++) - alert_counter[i] = 0; - } -+#ifdef CONFIG_KGDB -+int tune_watchdog = 5*HZ; -+#endif - - void nmi_watchdog_tick (struct pt_regs * regs) - { -@@ -411,12 +424,24 @@ void nmi_watchdog_tick (struct pt_regs * - - sum = irq_stat[cpu].apic_timer_irqs; - -+#ifdef CONFIG_KGDB -+ if (! in_kgdb(regs) && last_irq_sums[cpu] == sum ) { -+ -+#else - if (last_irq_sums[cpu] == sum) { -+#endif - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - alert_counter[cpu]++; -+#ifdef CONFIG_KGDB -+ if (alert_counter[cpu] == tune_watchdog) { -+ kgdb_handle_exception(2, SIGPWR, 0, regs); -+ last_irq_sums[cpu] = sum; -+ alert_counter[cpu] = 0; -+ } -+#endif - if (alert_counter[cpu] == 5*nmi_hz) { - spin_lock(&nmi_print_lock); - /* -diff -puN arch/i386/kernel/smp.c~kgdb-ga arch/i386/kernel/smp.c ---- 25/arch/i386/kernel/smp.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/arch/i386/kernel/smp.c 2003-06-25 23:14:17.000000000 -0700 -@@ -459,7 +459,17 @@ void smp_send_reschedule(int cpu) - { - send_IPI_mask(1 << cpu, RESCHEDULE_VECTOR); - } -- -+#ifdef CONFIG_KGDB -+/* -+ * By using the NMI code instead of a vector we just sneak thru the -+ * word generator coming out with just what we want. AND it does -+ * not matter if clustered_apic_mode is set or not. -+ */ -+void smp_send_nmi_allbutself(void) -+{ -+ send_IPI_allbutself(APIC_DM_NMI); -+} -+#endif - /* - * Structure and data for smp_call_function(). This is designed to minimise - * static memory requirements. It also looks cleaner. -diff -puN arch/i386/kernel/traps.c~kgdb-ga arch/i386/kernel/traps.c ---- 25/arch/i386/kernel/traps.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/arch/i386/kernel/traps.c 2003-06-25 23:14:17.000000000 -0700 -@@ -90,6 +90,42 @@ asmlinkage void alignment_check(void); - asmlinkage void spurious_interrupt_bug(void); - asmlinkage void machine_check(void); - -+#ifdef CONFIG_KGDB -+extern void sysenter_entry(void); -+#include -+#include -+extern void int3(void); -+extern void debug(void); -+void set_intr_gate(unsigned int n, void *addr); -+static void set_intr_usr_gate(unsigned int n, void *addr); -+/* -+ * Should be able to call this breakpoint() very early in -+ * bring up. Just hard code the call where needed. -+ * The breakpoint() code is here because set_?_gate() functions -+ * are local (static) to trap.c. They need be done only once, -+ * but it does not hurt to do them over. -+ */ -+void breakpoint(void) -+{ -+ set_intr_usr_gate(3,&int3); /* disable ints on trap */ -+ set_intr_gate(1,&debug); -+ set_intr_gate(14,&page_fault); -+ -+ BREAKPOINT; -+} -+#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \ -+ { \ -+ if (!user_mode(regs) ) \ -+ { \ -+ kgdb_handle_exception(trapnr, signr, error_code, regs); \ -+ after; \ -+ } else if ((trapnr == 3) && (regs->eflags &0x200)) local_irq_enable(); \ -+ } -+#else -+#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) -+#endif -+ -+ - static int kstack_depth_to_print = 24; - - void show_trace(struct task_struct *task, unsigned long * stack) -@@ -258,6 +294,15 @@ void die(const char * str, struct pt_reg - bust_spinlocks(1); - handle_BUG(regs); - printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); -+#ifdef CONFIG_KGDB -+ /* This is about the only place we want to go to kgdb even if in -+ * user mode. But we must go in via a trap so within kgdb we will -+ * always be in kernel mode. -+ */ -+ if (user_mode(regs)) -+ BREAKPOINT; -+#endif -+ CHK_REMOTE_DEBUG(0,SIGTRAP,err,regs,) - show_registers(regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); -@@ -327,6 +372,7 @@ static inline void do_trap(int trapnr, i - #define DO_ERROR(trapnr, signr, str, name) \ - asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ - { \ -+ CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,)\ - do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ - } - -@@ -344,7 +390,9 @@ asmlinkage void do_##name(struct pt_regs - #define DO_VM86_ERROR(trapnr, signr, str, name) \ - asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ - { \ -+ CHK_REMOTE_DEBUG(trapnr, signr, error_code,regs, return)\ - do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ -+ return; \ - } - - #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -@@ -387,8 +435,10 @@ gp_in_vm86: - return; - - gp_in_kernel: -- if (!fixup_exception(regs)) -+ if (!fixup_exception(regs)){ -+ CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,) - die("general protection fault", regs, error_code); -+ } - } - - static void mem_parity_error(unsigned char reason, struct pt_regs * regs) -@@ -550,8 +600,18 @@ asmlinkage void do_debug(struct pt_regs - * allowing programs to debug themselves without the ptrace() - * interface. - */ -- if ((regs->xcs & 3) == 0) -+#ifdef CONFIG_KGDB -+ /* -+ * I think this is the only "real" case of a TF in the kernel -+ * that really belongs to user space. Others are -+ * "Ours all ours!" -+ */ -+ if (((regs->xcs & 3) == 0) && ((void *)regs->eip == sysenter_entry)) - goto clear_TF_reenable; -+#else -+ if ((regs->xcs & 3) == 0) -+ goto clear_TF_reenable; -+#endif - if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) - goto clear_TF; - } -@@ -563,6 +623,17 @@ asmlinkage void do_debug(struct pt_regs - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - -+#ifdef CONFIG_KGDB -+ /* -+ * If this is a kernel mode trap, we need to reset db7 to allow us -+ * to continue sanely ALSO skip the signal delivery -+ */ -+ if ((regs->xcs & 3) == 0) -+ goto clear_dr7; -+ -+ /* if not kernel, allow ints but only if they were on */ -+ if ( regs->eflags & 0x200) local_irq_enable(); -+#endif - /* If this is a kernel mode trap, save the user PC on entry to - * the kernel, that's what the debugger can make sense of. - */ -@@ -577,6 +648,7 @@ clear_dr7: - __asm__("movl %0,%%db7" - : /* no output */ - : "r" (0)); -+ CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,) - return; - - debug_vm86: -@@ -823,6 +895,12 @@ static void __init set_call_gate(void *a - { - _set_gate(a,12,3,addr,__KERNEL_CS); - } -+#ifdef CONFIG_KGDB -+void set_intr_usr_gate(unsigned int n, void *addr) -+{ -+ _set_gate(idt_table+n,14,3,addr,__KERNEL_CS); -+} -+#endif - - static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) - { -@@ -849,7 +927,11 @@ void __init trap_init(void) - set_trap_gate(0,÷_error); - set_intr_gate(1,&debug); - set_intr_gate(2,&nmi); -+#ifndef CONFIG_KGDB - set_system_gate(3,&int3); /* int3-5 can be called from all */ -+#else -+ set_intr_usr_gate(3,&int3); /* int3-5 can be called from all */ -+#endif - set_system_gate(4,&overflow); - set_system_gate(5,&bounds); - set_trap_gate(6,&invalid_op); -diff -puN /dev/null arch/i386/lib/kgdb_serial.c ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25-akpm/arch/i386/lib/kgdb_serial.c 2003-06-25 23:14:17.000000000 -0700 -@@ -0,0 +1,485 @@ -+/* -+ * Serial interface GDB stub -+ * -+ * Written (hacked together) by David Grothe (dave@gcom.com) -+ * Modified to allow invokation early in boot see also -+ * kgdb.h for instructions by George Anzinger(george@mvista.com) -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_KGDB_USER_CONSOLE -+extern void kgdb_console_finit(void); -+#endif -+#define PRNT_off -+#define TEST_EXISTANCE -+#ifdef PRNT -+#define dbprintk(s) printk s -+#else -+#define dbprintk(s) -+#endif -+#define TEST_INTERRUPT_off -+#ifdef TEST_INTERRUPT -+#define intprintk(s) printk s -+#else -+#define intprintk(s) -+#endif -+ -+#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) -+ -+#define GDB_BUF_SIZE 512 /* power of 2, please */ -+ -+static char gdb_buf[GDB_BUF_SIZE]; -+static int gdb_buf_in_inx; -+static atomic_t gdb_buf_in_cnt; -+static int gdb_buf_out_inx; -+ -+struct async_struct *gdb_async_info; -+static int gdb_async_irq; -+ -+#define outb_px(a,b) outb_p(b,a) -+ -+static void program_uart(struct async_struct *info); -+static void write_char(struct async_struct *info, int chr); -+/* -+ * Get a byte from the hardware data buffer and return it -+ */ -+static int -+read_data_bfr(struct async_struct *info) -+{ -+ char it = inb_p(info->port + UART_LSR); -+ -+ if (it & UART_LSR_DR) -+ return (inb_p(info->port + UART_RX)); -+ /* -+ * If we have a framing error assume somebody messed with -+ * our uart. Reprogram it and send '-' both ways... -+ */ -+ if (it & 0xc) { -+ program_uart(info); -+ write_char(info, '-'); -+ return ('-'); -+ } -+ return (-1); -+ -+} /* read_data_bfr */ -+ -+/* -+ * Get a char if available, return -1 if nothing available. -+ * Empty the receive buffer first, then look at the interface hardware. -+ -+ * Locking here is a bit of a problem. We MUST not lock out communication -+ * if we are trying to talk to gdb about a kgdb entry. ON the other hand -+ * we can loose chars in the console pass thru if we don't lock. It is also -+ * possible that we could hold the lock or be waiting for it when kgdb -+ * NEEDS to talk. Since kgdb locks down the world, it does not need locks. -+ * We do, of course have possible issues with interrupting a uart operation, -+ * but we will just depend on the uart status to help keep that straight. -+ -+ */ -+static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; -+#ifdef CONFIG_SMP -+extern spinlock_t kgdb_spinlock; -+#endif -+ -+static int -+read_char(struct async_struct *info) -+{ -+ int chr; -+ unsigned long flags; -+ local_irq_save(flags); -+#ifdef CONFIG_SMP -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ spin_lock(&uart_interrupt_lock); -+ } -+#endif -+ if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ -+ chr = gdb_buf[gdb_buf_out_inx++]; -+ gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); -+ atomic_dec(&gdb_buf_in_cnt); -+ } else { -+ chr = read_data_bfr(info); -+ } -+#ifdef CONFIG_SMP -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ spin_unlock(&uart_interrupt_lock); -+ } -+#endif -+ local_irq_restore(flags); -+ return (chr); -+} -+ -+/* -+ * Wait until the interface can accept a char, then write it. -+ */ -+static void -+write_char(struct async_struct *info, int chr) -+{ -+ while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; -+ -+ outb_p(chr, info->port + UART_TX); -+ -+} /* write_char */ -+ -+/* -+ * Mostly we don't need a spinlock, but since the console goes -+ * thru here with interrutps on, well, we need to catch those -+ * chars. -+ */ -+/* -+ * This is the receiver interrupt routine for the GDB stub. -+ * It will receive a limited number of characters of input -+ * from the gdb host machine and save them up in a buffer. -+ * -+ * When the gdb stub routine getDebugChar() is called it -+ * draws characters out of the buffer until it is empty and -+ * then reads directly from the serial port. -+ * -+ * We do not attempt to write chars from the interrupt routine -+ * since the stubs do all of that via putDebugChar() which -+ * writes one byte after waiting for the interface to become -+ * ready. -+ * -+ * The debug stubs like to run with interrupts disabled since, -+ * after all, they run as a consequence of a breakpoint in -+ * the kernel. -+ * -+ * Perhaps someone who knows more about the tty driver than I -+ * care to learn can make this work for any low level serial -+ * driver. -+ */ -+static irqreturn_t -+gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ struct async_struct *info; -+ unsigned long flags; -+ -+ info = gdb_async_info; -+ if (!info || !info->tty || irq != gdb_async_irq) -+ return IRQ_NONE; -+ -+ local_irq_save(flags); -+ spin_lock(&uart_interrupt_lock); -+ do { -+ int chr = read_data_bfr(info); -+ intprintk(("Debug char on int: %x hex\n", chr)); -+ if (chr < 0) -+ continue; -+ -+ if (chr == 3) { /* Ctrl-C means remote interrupt */ -+ BREAKPOINT; -+ continue; -+ } -+ -+ if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { -+ /* buffer overflow tosses early char */ -+ read_char(info); -+ } -+ gdb_buf[gdb_buf_in_inx++] = chr; -+ gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); -+ } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); -+ spin_unlock(&uart_interrupt_lock); -+ local_irq_restore(flags); -+ return IRQ_HANDLED; -+} /* gdb_interrupt */ -+ -+/* -+ * Just a NULL routine for testing. -+ */ -+void -+gdb_null(void) -+{ -+} /* gdb_null */ -+ -+/* These structure are filled in with values defined in asm/kgdb_local.h -+ */ -+static struct serial_state state = SB_STATE; -+static struct async_struct local_info = SB_INFO; -+static int ok_to_enable_ints = 0; -+static void kgdb_enable_ints_now(void); -+ -+extern char *kgdb_version; -+/* -+ * Hook an IRQ for KGDB. -+ * -+ * This routine is called from putDebugChar, below. -+ */ -+static int ints_disabled = 1; -+int -+gdb_hook_interrupt(struct async_struct *info, int verb) -+{ -+ struct serial_state *state = info->state; -+ unsigned long flags; -+ int port; -+#ifdef TEST_EXISTANCE -+ int scratch, scratch2; -+#endif -+ -+ /* The above fails if memory managment is not set up yet. -+ * Rather than fail the set up, just keep track of the fact -+ * and pick up the interrupt thing later. -+ */ -+ gdb_async_info = info; -+ port = gdb_async_info->port; -+ gdb_async_irq = state->irq; -+ if (verb) { -+ printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", -+ kgdb_version, -+ port, -+ gdb_async_irq, gdb_async_info->state->custom_divisor); -+ } -+ local_irq_save(flags); -+#ifdef TEST_EXISTANCE -+ /* Existance test */ -+ /* Should not need all this, but just in case.... */ -+ -+ scratch = inb_p(port + UART_IER); -+ outb_px(port + UART_IER, 0); -+ outb_px(0xff, 0x080); -+ scratch2 = inb_p(port + UART_IER); -+ outb_px(port + UART_IER, scratch); -+ if (scratch2) { -+ printk -+ ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); -+ local_irq_restore(flags); -+ return 1; /* We failed; there's nothing here */ -+ } -+ scratch2 = inb_p(port + UART_LCR); -+ outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ -+ outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ -+ outb_px(port + UART_LCR, 0); -+ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); -+ scratch = inb_p(port + UART_IIR) >> 6; -+ if (scratch == 1) { -+ printk("gdb_hook_interrupt: Undefined UART type!" -+ " Not a UART! \n"); -+ local_irq_restore(flags); -+ return 1; -+ } else { -+ dbprintk(("gdb_hook_interrupt: UART type " -+ "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); -+ } -+ scratch = inb_p(port + UART_MCR); -+ outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); -+ outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); -+ scratch2 = inb_p(port + UART_MSR) & 0xF0; -+ outb_px(port + UART_MCR, scratch); -+ if (scratch2 != 0x90) { -+ printk("gdb_hook_interrupt: " -+ "Loop back test failed! Not a UART!\n"); -+ local_irq_restore(flags); -+ return scratch2 + 1000; /* force 0 to fail */ -+ } -+#endif /* test existance */ -+ program_uart(info); -+ local_irq_restore(flags); -+ -+ return (0); -+ -+} /* gdb_hook_interrupt */ -+ -+static void -+program_uart(struct async_struct *info) -+{ -+ int port = info->port; -+ -+ (void) inb_p(port + UART_RX); -+ outb_px(port + UART_IER, 0); -+ -+ (void) inb_p(port + UART_RX); /* serial driver comments say */ -+ (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ -+ (void) inb_p(port + UART_MSR); -+ outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); -+ outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ -+ outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ -+ outb_px(port + UART_MCR, info->MCR); -+ -+ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ -+ outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ -+ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ -+ if (!ints_disabled) { -+ intprintk(("KGDB: Sending %d to port %x offset %d\n", -+ gdb_async_info->IER, -+ (int) gdb_async_info->port, UART_IER)); -+ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); -+ } -+ return; -+} -+ -+/* -+ * getDebugChar -+ * -+ * This is a GDB stub routine. It waits for a character from the -+ * serial interface and then returns it. If there is no serial -+ * interface connection then it returns a bogus value which will -+ * almost certainly cause the system to hang. In the -+ */ -+int kgdb_in_isr = 0; -+int kgdb_in_lsr = 0; -+extern spinlock_t kgdb_spinlock; -+ -+/* Caller takes needed protections */ -+ -+int -+getDebugChar(void) -+{ -+ volatile int chr, dum, time, end_time; -+ -+ dbprintk(("getDebugChar(port %x): ", gdb_async_info->port)); -+ -+ if (gdb_async_info == NULL) { -+ gdb_hook_interrupt(&local_info, 0); -+ } -+ /* -+ * This trick says if we wait a very long time and get -+ * no char, return the -1 and let the upper level deal -+ * with it. -+ */ -+ rdtsc(dum, time); -+ end_time = time + 2; -+ while (((chr = read_char(gdb_async_info)) == -1) && -+ (end_time - time) > 0) { -+ rdtsc(dum, time); -+ }; -+ /* -+ * This covers our butts if some other code messes with -+ * our uart, hay, it happens :o) -+ */ -+ if (chr == -1) -+ program_uart(gdb_async_info); -+ -+ dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); -+ return (chr); -+ -+} /* getDebugChar */ -+ -+static int count = 3; -+static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; -+ -+static int __init -+kgdb_enable_ints(void) -+{ -+ if (gdb_async_info == NULL) { -+ gdb_hook_interrupt(&local_info, 1); -+ } -+ ok_to_enable_ints = 1; -+ kgdb_enable_ints_now(); -+#ifdef CONFIG_KGDB_USER_CONSOLE -+ kgdb_console_finit(); -+#endif -+ return 0; -+} -+ -+#ifdef CONFIG_SERIAL_8250 -+void shutdown_for_kgdb(struct async_struct *gdb_async_info); -+#endif -+ -+#ifdef CONFIG_DISCONTIGMEM -+static inline int kgdb_mem_init_done(void) -+{ -+ return highmem_start_page != NULL; -+} -+#else -+static inline int kgdb_mem_init_done(void) -+{ -+ return max_mapnr != 0; -+} -+#endif -+ -+static void -+kgdb_enable_ints_now(void) -+{ -+ if (!spin_trylock(&one_at_atime)) -+ return; -+ if (!ints_disabled) -+ goto exit; -+ if (kgdb_mem_init_done() && -+ ints_disabled) { /* don't try till mem init */ -+#ifdef CONFIG_SERIAL_8250 -+ /* -+ * The ifdef here allows the system to be configured -+ * without the serial driver. -+ * Don't make it a module, however, it will steal the port -+ */ -+ shutdown_for_kgdb(gdb_async_info); -+#endif -+ ints_disabled = request_irq(gdb_async_info->state->irq, -+ gdb_interrupt, -+ IRQ_T(gdb_async_info), -+ "KGDB-stub", NULL); -+ intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); -+ } -+ if (!ints_disabled) { -+ intprintk(("KGDB: Sending %d to port %x offset %d\n", -+ gdb_async_info->IER, -+ (int) gdb_async_info->port, UART_IER)); -+ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); -+ } -+ exit: -+ spin_unlock(&one_at_atime); -+} -+ -+/* -+ * putDebugChar -+ * -+ * This is a GDB stub routine. It waits until the interface is ready -+ * to transmit a char and then sends it. If there is no serial -+ * interface connection then it simply returns to its caller, having -+ * pretended to send the char. Caller takes needed protections. -+ */ -+void -+putDebugChar(int chr) -+{ -+ dbprintk(("putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", -+ gdb_async_info->port, -+ chr, -+ chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); -+ -+ if (gdb_async_info == NULL) { -+ gdb_hook_interrupt(&local_info, 0); -+ } -+ -+ write_char(gdb_async_info, chr); /* this routine will wait */ -+ count = (chr == '#') ? 0 : count + 1; -+ if ((count == 2)) { /* try to enable after */ -+ if (ints_disabled & ok_to_enable_ints) -+ kgdb_enable_ints_now(); /* try to enable after */ -+ -+ /* We do this a lot because, well we really want to get these -+ * interrupts. The serial driver will clear these bits when it -+ * initializes the chip. Every thing else it does is ok, -+ * but this. -+ */ -+ if (!ints_disabled) { -+ outb_px(gdb_async_info->port + UART_IER, -+ gdb_async_info->IER); -+ } -+ } -+ -+} /* putDebugChar */ -+ -+module_init(kgdb_enable_ints); -diff -puN arch/i386/lib/Makefile~kgdb-ga arch/i386/lib/Makefile ---- 25/arch/i386/lib/Makefile~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/arch/i386/lib/Makefile 2003-06-25 23:14:17.000000000 -0700 -@@ -9,4 +9,5 @@ lib-y = checksum.o delay.o \ - - lib-$(CONFIG_X86_USE_3DNOW) += mmx.o - lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o -+lib-$(CONFIG_KGDB) += kgdb_serial.o - lib-$(CONFIG_DEBUG_IOVIRT) += iodebug.o -diff -puN arch/i386/Makefile~kgdb-ga arch/i386/Makefile ---- 25/arch/i386/Makefile~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/arch/i386/Makefile 2003-06-25 23:14:17.000000000 -0700 -@@ -85,6 +85,9 @@ mcore-$(CONFIG_X86_ES7000) := mach-es700 - # default subarch .h files - mflags-y += -Iinclude/asm-i386/mach-default - -+mflags-$(CONFIG_KGDB) += -g -+mflags-$(CONFIG_KGDB_MORE) += $(shell echo $(CONFIG_KGDB_OPTIONS) | sed -e 's/"//g') -+ - head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o - - libs-y += arch/i386/lib/ -diff -puN arch/i386/mm/fault.c~kgdb-ga arch/i386/mm/fault.c ---- 25/arch/i386/mm/fault.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/arch/i386/mm/fault.c 2003-06-25 23:14:17.000000000 -0700 -@@ -236,6 +236,12 @@ no_context: - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ -+#ifdef CONFIG_KGDB -+ if (!user_mode(regs)){ -+ kgdb_handle_exception(14,SIGBUS, error_code, regs); -+ return; -+ } -+#endif - - bust_spinlocks(1); - -diff -puN /dev/null Documentation/i386/kgdb/andthen ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/andthen 2003-06-25 23:14:17.000000000 -0700 -@@ -0,0 +1,100 @@ -+ -+define set_andthen -+ set var $thp=0 -+ set var $thp=(struct kgdb_and_then_struct *)&kgdb_data[0] -+ set var $at_size = (sizeof kgdb_data)/(sizeof *$thp) -+ set var $at_oc=kgdb_and_then_count -+ set var $at_cc=$at_oc -+end -+ -+define andthen_next -+ set var $at_cc=$arg0 -+end -+ -+define andthen -+ andthen_set_edge -+ if ($at_cc >= $at_oc) -+ printf "Outside window. Window size is %d\n",($at_oc-$at_low) -+ else -+ printf "%d: ",$at_cc -+ output *($thp+($at_cc++ % $at_size )) -+ printf "\n" -+ end -+end -+define andthen_set_edge -+ set var $at_oc=kgdb_and_then_count -+ set var $at_low = $at_oc - $at_size -+ if ($at_low < 0 ) -+ set var $at_low = 0 -+ end -+ if (( $at_cc > $at_oc) || ($at_cc < $at_low)) -+ printf "Count outside of window, setting count to " -+ if ($at_cc >= $at_oc) -+ set var $at_cc = $at_oc -+ else -+ set var $at_cc = $at_low -+ end -+ printf "%d\n",$at_cc -+ end -+end -+ -+define beforethat -+ andthen_set_edge -+ if ($at_cc <= $at_low) -+ printf "Outside window. Window size is %d\n",($at_oc-$at_low) -+ else -+ printf "%d: ",$at_cc-1 -+ output *($thp+(--$at_cc % $at_size )) -+ printf "\n" -+ end -+end -+ -+document andthen_next -+ andthen_next -+ . sets the number of the event to display next. If this event -+ . is not in the event pool, either andthen or beforethat will -+ . correct it to the nearest event pool edge. The event pool -+ . ends at the last event recorded and begins -+ . prior to that. If beforethat is used next, it will display -+ . event -1. -+. -+ andthen commands are: set_andthen, andthen_next, andthen and beforethat -+end -+ -+ -+document andthen -+ andthen -+. displays the next event in the list. sets up to display -+. the oldest saved event first. -+. (optional) count of the event to display. -+. note the number of events saved is specified at configure time. -+. if events are saved between calls to andthen the index will change -+. but the displayed event will be the next one (unless the event buffer -+. is overrun). -+. -+. andthen commands are: set_andthen, andthen_next, andthen and beforethat -+end -+ -+document set_andthen -+ set_andthen -+. sets up to use the and commands. -+. if you have defined your own struct, use the above and -+. then enter the following: -+. p $thp=(struct kgdb_and_then_structX *)&kgdb_data[0] -+. where is the name of your structure. -+. -+. andthen commands are: set_andthen, andthen_next, andthen and beforethat -+end -+ -+document beforethat -+ beforethat -+. displays the next prior event in the list. sets up to -+. display the last occuring event first. -+. -+. note the number of events saved is specified at configure time. -+. if events are saved between calls to beforethat the index will change -+. but the displayed event will be the next one (unless the event buffer -+. is overrun). -+. -+. andthen commands are: set_andthen, andthen_next, andthen and beforethat -+end -diff -puN /dev/null Documentation/i386/kgdb/debug-nmi.txt ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/debug-nmi.txt 2003-06-25 23:14:17.000000000 -0700 -@@ -0,0 +1,37 @@ -+Subject: Debugging with NMI -+Date: Mon, 12 Jul 1999 11:28:31 -0500 -+From: David Grothe -+Organization: Gcom, Inc -+To: David Grothe -+ -+Kernel hackers: -+ -+Maybe this is old hat, but it is new to me -- -+ -+On an ISA bus machine, if you short out the A1 and B1 pins of an ISA -+slot you will generate an NMI to the CPU. This interrupts even a -+machine that is hung in a loop with interrupts disabled. Used in -+conjunction with kgdb < -+ftp://ftp.gcom.com/pub/linux/src/kgdb-2.3.35/kgdb-2.3.35.tgz > you can -+gain debugger control of a machine that is hung in the kernel! Even -+without kgdb the kernel will print a stack trace so you can find out -+where it was hung. -+ -+The A1/B1 pins are directly opposite one another and the farthest pins -+towards the bracket end of the ISA bus socket. You can stick a paper -+clip or multi-meter probe between them to short them out. -+ -+I had a spare ISA bus to PC104 bus adapter around. The PC104 end of the -+board consists of two rows of wire wrap pins. So I wired a push button -+between the A1/B1 pins and now have an ISA board that I can stick into -+any ISA bus slot for debugger entry. -+ -+Microsoft has a circuit diagram of a PCI card at -+http://www.microsoft.com/hwdev/DEBUGGING/DMPSW.HTM. If you want to -+build one you will have to mail them and ask for the PAL equations. -+Nobody makes one comercially. -+ -+[THIS TIP COMES WITH NO WARRANTY WHATSOEVER. It works for me, but if -+your machine catches fire, it is your problem, not mine.] -+ -+-- Dave (the kgdb guy) -diff -puN /dev/null Documentation/i386/kgdb/gdb-globals.txt ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/gdb-globals.txt 2003-06-25 23:14:17.000000000 -0700 -@@ -0,0 +1,71 @@ -+Sender: akale@veritas.com -+Date: Fri, 23 Jun 2000 19:26:35 +0530 -+From: "Amit S. Kale" -+Organization: Veritas Software (India) -+To: Dave Grothe , linux-kernel@vger.rutgers.edu -+CC: David Milburn , -+ "Edouard G. Parmelan" , -+ ezannoni@cygnus.com, Keith Owens -+Subject: Re: Module debugging using kgdb -+ -+Dave Grothe wrote: -+> -+> Amit: -+> -+> There is a 2.4.0 version of kgdb on our ftp site: -+> ftp://ftp.gcom.com/pub/linux/src/kgdb. I mirrored your version of gdb -+> and loadmodule.sh there. -+> -+> Have a look at the README file and see if I go it right. If not, send -+> me some corrections and I will update it. -+> -+> Does your version of gdb solve the global variable problem? -+ -+Yes. -+Thanks to Elena Zanoni, gdb (developement version) can now calculate -+correctly addresses of dynamically loaded object files. I have not been -+following gdb developement for sometime and am not sure when symbol -+address calculation fix is going to appear in a gdb stable version. -+ -+Elena, any idea when the fix will make it to a prebuilt gdb from a -+redhat release? -+ -+For the time being I have built a gdb developement version. It can be -+used for module debugging with loadmodule.sh script. -+ -+The problem with calculating of module addresses with previous versions -+of gdb was as follows: -+gdb did not use base address of a section while calculating address of -+a symbol in the section in an object file loaded via 'add-symbol-file'. -+It used address of .text segment instead. Due to this addresses of -+symbols in .data, .bss etc. (e.g. global variables) were calculated incorrectly. -+ -+Above mentioned fix allow gdb to use base address of a segment while -+calculating address of a symbol in it. It adds a parameter '-s' to -+'add-symbol-file' command for specifying base address of a segment. -+ -+loadmodule.sh script works as follows. -+ -+1. Copy a module file to target machine. -+2. Load the module on the target machine using insmod with -m parameter. -+insmod produces a module load map which contains base addresses of all -+sections in the module and addresses of symbols in the module file. -+3. Find all sections and their base addresses in the module from -+the module map. -+4. Generate a script that loads the module file. The script uses -+'add-symbol-file' and specifies address of text segment followed by -+addresses of all segments in the module. -+ -+Here is an example gdb script produced by loadmodule.sh script. -+ -+add-symbol-file foo 0xd082c060 -s .text.lock 0xd08cbfb5 -+-s .fixup 0xd08cfbdf -s .rodata 0xd08cfde0 -s __ex_table 0xd08e3b38 -+-s .data 0xd08e3d00 -s .bss 0xd08ec8c0 -s __ksymtab 0xd08ee838 -+ -+With this command gdb can calculate addresses of symbols in ANY segment -+in a module file. -+ -+Regards. -+-- -+Amit Kale -+Veritas Software ( http://www.veritas.com ) -diff -puN /dev/null Documentation/i386/kgdb/gdbinit ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/gdbinit 2003-06-25 23:14:17.000000000 -0700 -@@ -0,0 +1,14 @@ -+shell echo -e "\003" >/dev/ttyS0 -+set remotebaud 38400 -+target remote /dev/ttyS0 -+define si -+stepi -+printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx -+printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp -+x/i $eip -+end -+define ni -+nexti -+printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx -+printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp -+x/i $eip -diff -puN /dev/null Documentation/i386/kgdb/gdbinit.hw ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/gdbinit.hw 2003-06-25 23:14:17.000000000 -0700 -@@ -0,0 +1,117 @@ -+ -+#Using ia-32 hardware breakpoints. -+# -+#4 hardware breakpoints are available in ia-32 processors. These breakpoints -+#do not need code modification. They are set using debug registers. -+# -+#Each hardware breakpoint can be of one of the -+#three types: execution, write, access. -+#1. An Execution breakpoint is triggered when code at the breakpoint address is -+#executed. -+#2. A write breakpoint ( aka watchpoints ) is triggered when memory location -+#at the breakpoint address is written. -+#3. An access breakpoint is triggered when memory location at the breakpoint -+#address is either read or written. -+# -+#As hardware breakpoints are available in limited number, use software -+#breakpoints ( br command in gdb ) instead of execution hardware breakpoints. -+# -+#Length of an access or a write breakpoint defines length of the datatype to -+#be watched. Length is 1 for char, 2 short , 3 int. -+# -+#For placing execution, write and access breakpoints, use commands -+#hwebrk, hwwbrk, hwabrk -+#To remove a breakpoint use hwrmbrk command. -+# -+#These commands take following types of arguments. For arguments associated -+#with each command, use help command. -+#1. breakpointno: 0 to 3 -+#2. length: 1 to 3 -+#3. address: Memory location in hex ( without 0x ) e.g c015e9bc -+# -+#Use the command exinfo to find which hardware breakpoint occured. -+ -+#hwebrk breakpointno address -+define hwebrk -+ maintenance packet Y$arg0,0,0,$arg1 -+end -+document hwebrk -+ hwebrk
-+ Places a hardware execution breakpoint -+ = 0 - 3 -+
= Hex digits without leading "0x". -+end -+ -+#hwwbrk breakpointno length address -+define hwwbrk -+ maintenance packet Y$arg0,1,$arg1,$arg2 -+end -+document hwwbrk -+ hwwbrk
-+ Places a hardware write breakpoint -+ = 0 - 3 -+ = 1 (1 byte), 2 (2 byte), 3 (4 byte) -+
= Hex digits without leading "0x". -+end -+ -+#hwabrk breakpointno length address -+define hwabrk -+ maintenance packet Y$arg0,1,$arg1,$arg2 -+end -+document hwabrk -+ hwabrk
-+ Places a hardware access breakpoint -+ = 0 - 3 -+ = 1 (1 byte), 2 (2 byte), 3 (4 byte) -+
= Hex digits without leading "0x". -+end -+ -+#hwrmbrk breakpointno -+define hwrmbrk -+ maintenance packet y$arg0 -+end -+document hwrmbrk -+ hwrmbrk -+ = 0 - 3 -+ Removes a hardware breakpoint -+end -+ -+define reboot -+ maintenance packet r -+end -+#exinfo -+define exinfo -+ maintenance packet qE -+end -+document exinfo -+ exinfo -+ Gives information about a breakpoint. -+end -+define get_th -+ p $th=(struct thread_info *)((int)$esp & ~8191) -+end -+document get_th -+ get_tu -+ Gets and prints the current thread_info pointer, Defines th to be it. -+end -+define get_cu -+ p $cu=(struct thread_info *)((int)$esp & ~8191)->task -+end -+document get_cu -+ get_cu -+ Gets and print the "current" value. Defines $cu to be it. -+end -+define int_off -+ set var $flags=$eflags -+ set $eflags=$eflags&~0x200 -+ end -+define int_on -+ set var $eflags|=$flags&0x200 -+ end -+document int_off -+ saves the current interrupt state and clears the processor interrupt -+ flag. Use int_on to restore the saved flag. -+end -+document int_on -+ Restores the interrupt flag saved by int_off. -+end -diff -puN /dev/null Documentation/i386/kgdb/gdbinit-modules ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/gdbinit-modules 2003-06-25 23:14:17.000000000 -0700 -@@ -0,0 +1,146 @@ -+# -+# Usefull GDB user-command to debug Linux Kernel Modules with gdbstub. -+# -+# This don't work for Linux-2.0 or older. -+# -+# Author Edouard G. Parmelan -+# -+# -+# Fri Apr 30 20:33:29 CEST 1999 -+# First public release. -+# -+# Major cleanup after experiment Linux-2.0 kernel without success. -+# Symbols of a module are not in the correct order, I can't explain -+# why :( -+# -+# Fri Mar 19 15:41:40 CET 1999 -+# Initial version. -+# -+# Thu Jan 6 16:29:03 CST 2000 -+# A little fixing by Dave Grothe -+# -+# Mon Jun 19 09:33:13 CDT 2000 -+# Alignment changes from Edouard Parmelan -+# -+# The basic idea is to find where insmod load the module and inform -+# GDB to load the symbol table of the module with the GDB command -+# ``add-symbol-file
''. -+# -+# The Linux kernel holds the list of all loaded modules in module_list, -+# this list end with &kernel_module (exactly with module->next == NULL, -+# but the last module is not a real module). -+# -+# Insmod allocates the struct module before the object file. Since -+# Linux-2.1, this structure contain his size. The real address of -+# the object file is then (char*)module + module->size_of_struct. -+# -+# You can use three user functions ``mod-list'', ``mod-print-symbols'' -+# and ``add-module-symbols''. -+# -+# mod-list list all loaded modules with the format: -+# -+# -+# As soon as you have found the address of your module, you can -+# print its exported symbols (mod-print-symbols) or inform GDB to add -+# symbols from your module file (mod-add-symbols). -+# -+# The argument that you give to mod-print-symbols or mod-add-symbols -+# is the from the mod-list command. -+# -+# When using the mod-add-symbols command you must also give the full -+# pathname of the modules object code file. -+# -+# The command mod-add-lis is an example of how to make this easier. -+# You can edit this macro to contain the path name of your own -+# favorite module and then use it as a shorthand to load it. You -+# still need the module-address, however. -+# -+# The internal function ``mod-validate'' set the GDB variable $mod -+# as a ``struct module*'' if the kernel known the module otherwise -+# $mod is set to NULL. This ensure to not add symbols for a wrong -+# address. -+# -+# Have a nice hacking day ! -+# -+# -+define mod-list -+ set $mod = (struct module*)module_list -+ # the last module is the kernel, ignore it -+ while $mod != &kernel_module -+ printf "%p\t%s\n", (long)$mod, ($mod)->name -+ set $mod = $mod->next -+ end -+end -+document mod-list -+List all modules in the form: -+Use the as the argument for the other -+mod-commands: mod-print-symbols, mod-add-symbols. -+end -+ -+define mod-validate -+ set $mod = (struct module*)module_list -+ while ($mod != $arg0) && ($mod != &kernel_module) -+ set $mod = $mod->next -+ end -+ if $mod == &kernel_module -+ set $mod = 0 -+ printf "%p is not a module\n", $arg0 -+ end -+end -+document mod-validate -+mod-validate -+Internal user-command used to validate the module parameter. -+If is a real loaded module, set $mod to it otherwise set $mod to 0. -+end -+ -+ -+define mod-print-symbols -+ mod-validate $arg0 -+ if $mod != 0 -+ set $i = 0 -+ while $i < $mod->nsyms -+ set $sym = $mod->syms[$i] -+ printf "%p\t%s\n", $sym->value, $sym->name -+ set $i = $i + 1 -+ end -+ end -+end -+document mod-print-symbols -+mod-print-symbols -+Print all exported symbols of the module. see mod-list -+end -+ -+ -+define mod-add-symbols-align -+ mod-validate $arg0 -+ if $mod != 0 -+ set $mod_base = ($mod->size_of_struct + (long)$mod) -+ if ($arg2 != 0) && (($mod_base & ($arg2 - 1)) != 0) -+ set $mod_base = ($mod_base | ($arg2 - 1)) + 1 -+ end -+ add-symbol-file $arg1 $mod_base -+ end -+end -+document mod-add-symbols-align -+mod-add-symbols-align -+Load the symbols table of the module from the object file where -+first section aligment is . -+To retreive alignment, use `objdump -h '. -+end -+ -+define mod-add-symbols -+ mod-add-symbols-align $arg0 $arg1 sizeof(long) -+end -+document mod-add-symbols -+mod-add-symbols -+Load the symbols table of the module from the object file. -+Default alignment is 4. See mod-add-symbols-align. -+end -+ -+define mod-add-lis -+ mod-add-symbols-align $arg0 /usr/src/LiS/streams.o 16 -+end -+document mod-add-lis -+mod-add-lis -+Does mod-add-symbols /usr/src/LiS/streams.o -+end -diff -puN /dev/null Documentation/i386/kgdb/kgdb.txt ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/kgdb.txt 2003-06-25 23:14:17.000000000 -0700 -@@ -0,0 +1,715 @@ -+Last edit: <20030506.1615.42> -+This file has information specific to the i386 kgdb option. Other -+platforms with the kgdb option may behave in a similar fashion. -+ -+New features: -+============ -+20030505.1827.27 -+We are starting to align with the sourceforge version, at least in -+commands. To this end, the boot command sting to start kgdb at -+boot time has been changed from "kgdb" to "gdb". -+ -+Andrew Morton sent a couple of patchs which are now included as follows: -+1.) We now return a flag to the interrupt handler. -+2.) We no longer use smp_num_cpus (a conflict with the lock meter). -+3.) And from William Lee Irwin III code to make -+ sure high-mem is set up before we attempt to register our interrupt -+ handler. -+We now include asm/kgdb.h from config.h so you will most likely never -+have to include it. It also 'NULLS' the kgdb macros you might have in -+your code when CONFIG_KGDB is not defined. This allows you to just -+turn off CONFIG_KGDB to turn off all the kgdb_ts() calls and such. -+This include is conditioned on the machine being an x86 so as to not -+mess with other archs. -+ -+20020801.1129.03 -+This is currently the version for the 2.4.18 (and beyond?) kernel. -+ -+We have several new "features" beginning with this version: -+ -+1.) Kgdb now syncs the "other" cpus with a cross cpu NMI. No more -+ waiting and it will pull that guy out of an irq off spin lock :) -+ -+2.) We doctored up the code that tells where a task is waiting and -+ included it so that the "info thread" command will show a bit more -+ than "schedule()". Try it... -+ -+3.) Added the ability to call a function from gdb. All the standard gdb -+ issues apply, i.e. if you hit a break point in the function you are -+ not allowed to call another (gdb limitation, not kgdb). T0 help -+ this capability we added a memory allocation function. Gdb does not -+ return this memory (it is used for stings you pass to that function -+ you are calling from gdb) so we fixed up a way to allow you to -+ manually return the memory (see below). -+ -+4.) Kgdb time stamps (kgdb_ts()) are enhanced to expand what was the -+ interrupt flag to now also include the preemption count and the -+ "in_interrupt" info. The flag is now called "with_pif" to indicate -+ the order, preempt_count, in_interrupt, flag. The preempt_count is -+ shifted left by 4 bits so you can read the count in hex by dropping -+ the low order digit. In_interrupt is in bit 1, and the flag is in -+ bit 0. -+ -+5.) The command: "p kgdb_info" is now expanded and prints something -+ like: -+(gdb) p kgdb_info -+$2 = {used_malloc = 0, called_from = 0xc0107506, entry_tsc = 67468627259, -+ errcode = 0, vector = 3, print_debug_info = 0, hold_on_sstep = 1, -+ cpus_waiting = {{task = 0xc027a000, pid = 32768, hold = 0, -+ regs = 0xc027bf84}, {task = 0x0, pid = 0, hold = 0, regs = 0x0}}} -+ -+ Things to note here: a.) used_malloc is the amount of memory that -+ has been malloc'ed to do calls from gdb. You can reclaim this -+ memory like this: "p kgdb_info.used_malloc=0" Cool, huh? b.) -+ cpus_waiting is now "sized" by the number of cpus you enter at -+ configure time in the kgdb configure section. This is NOT used any -+ where else in the system, but it is "nice" here. c.) The tasks -+ "pid" is now in the structure. This is the pid you will need to use -+ to decode to the thread id to get gdb to look at that thread. -+ Remember that the "info thread" command prints a list of threads -+ where in it numbers each thread with its reference number followed -+ by the threads pid. Note that the per cpu idle threads actually -+ have pids of 0 (yes there is more than one pid 0 in an SMP system). -+ To avoid confusion, kgdb numbers these threads with numbers beyond -+ the MAX_PID. That is why you see 32768 above. -+ -+6.) A subtle change, we now provide the complete register set for tasks -+ that are active on the other cpus. This allows better trace back on -+ those tasks. -+ -+ And, lets mention what we could not fix. Back-trace from all but the -+ thread that we trapped will, most likely, have a bogus entry in it. -+ The problem is that gdb does not recognize the entry code for -+ functions that use "current" near (at all?) the entry. The compiler -+ is putting the "current" decode as the first two instructions of the -+ function where gdb expects to find %ebp changing code. Back trace -+ also has trouble with interrupt frames. I am talking with Daniel -+ Jacobowitz about some way to fix this, but don't hold your breath. -+ -+20011220.0050.35 -+Major enhancement with this version is the ability to hold one or more -+cpus in an SMP system while allowing the others to continue. Also, by -+default only the current cpu is enabled on single step commands (please -+note that gdb issues single step commands at times other than when you -+use the si command). -+ -+Another change is to collect some useful information in -+a global structure called "kgdb_info". You should be able to just: -+ -+p kgdb_info -+ -+although I have seen cases where the first time this is done gdb just -+prints the first member but prints the whole structure if you then enter -+CR (carriage return or enter). This also works: -+ -+p *&kgdb_info -+ -+Here is a sample: -+(gdb) p kgdb_info -+$4 = {called_from = 0xc010732c, entry_tsc = 32804123790856, errcode = 0, -+ vector = 3, print_debug_info = 0} -+ -+"Called_from" is the return address from the current entry into kgdb. -+Sometimes it is useful to know why you are in kgdb, for example, was -+it an NMI or a real break point? The simple way to interrogate this -+return address is: -+ -+l *0xc010732c -+ -+which will print the surrounding few lines of source code. -+ -+"Entry_tsc" is the cpu TSC on entry to kgdb (useful to compare to the -+kgdb_ts entries). -+ -+"errcode" and "vector" are other entry parameters which may be helpful on -+some traps. -+ -+"print_debug_info" is the internal debugging kgdb print enable flag. Yes, -+you can modify it. -+ -+In SMP systems kgdb_info also includes the "cpus_waiting" structure and -+"hold_on_step": -+ -+(gdb) p kgdb_info -+$7 = {called_from = 0xc0112739, entry_tsc = 1034936624074, errcode = 0, -+ vector = 2, print_debug_info = 0, hold_on_sstep = 1, cpus_waiting = {{ -+ task = 0x0, hold = 0, regs = 0x0}, {task = 0xc71b8000, hold = 0, -+ regs = 0xc71b9f70}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, -+ hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, -+ hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, -+ hold = 0, regs = 0x0}}} -+ -+"Cpus_waiting" has an entry for each cpu other than the current one that -+has been stopped. Each entry contains the task_struct address for that -+cpu, the address of the regs for that task and a hold flag. All these -+have the proper typing so that, for example: -+ -+p *kgdb_info.cpus_waiting[1].regs -+ -+will print the registers for cpu 1. -+ -+"Hold_on_sstep" is a new feature with this version and comes up set or -+true. What is means is that whenever kgdb is asked to single step all -+other cpus are held (i.e. not allowed to execute). The flag applies to -+all but the current cpu and, again, can be changed: -+ -+p kgdb_info.hold_on_sstep=0 -+ -+restores the old behavior of letting all cpus run during single stepping. -+ -+Likewise, each cpu has a "hold" flag, which if set, locks that cpu out -+of execution. Note that this has some risk in cases where the cpus need -+to communicate with each other. If kgdb finds no cpu available on exit, -+it will push a message thru gdb and stay in kgdb. Note that it is legal -+to hold the current cpu as long as at least one cpu can execute. -+ -+20010621.1117.09 -+This version implements an event queue. Events are signaled by calling -+a function in the kgdb stub and may be examined from gdb. See EVENTS -+below for details. This version also tighten up the interrupt and SMP -+handling to not allow interrupts on the way to kgdb from a breakpoint -+trap. It is fine to allow these interrupts for user code, but not -+system debugging. -+ -+Version -+======= -+ -+This version of the kgdb package was developed and tested on -+kernel version 2.4.16. It will not install on any earlier kernels. -+It is possible that it will continue to work on later versions -+of 2.4 and then versions of 2.5 (I hope). -+ -+ -+Debugging Setup -+=============== -+ -+Designate one machine as the "development" machine. This is the -+machine on which you run your compiles and which has your source -+code for the kernel. Designate a second machine as the "target" -+machine. This is the machine that will run your experimental -+kernel. -+ -+The two machines will be connected together via a serial line out -+one or the other of the COM ports of the PC. You will need a modem -+eliminator and the appropriate cables. -+ -+Decide on which tty port you want the machines to communicate, then -+cable them up back-to-back using the null modem. COM1 is /dev/ttyS0 and -+COM2 is /dev/ttyS1. You should test this connection with the two -+machines prior to trying to debug a kernel. Once you have it working, -+on the TARGET machine, enter: -+ -+setserial /dev/ttyS0 (or what ever tty you are using) -+ -+and record the port and the irq addresses. -+ -+On the DEVELOPMENT machine you need to apply the patch for the kgdb -+hooks. You have probably already done that if you are reading this -+file. -+ -+On your DEVELOPMENT machine, go to your kernel source directory and do -+"make Xconfig" where X is one of "x", "menu", or "". If you are -+configuring in the standard serial driver, it must not be a module. -+Either yes or no is ok, but making the serial driver a module means it -+will initialize after kgdb has set up the UART interrupt code and may -+cause a failure of the control C option discussed below. The configure -+question for the serial driver is under the "Character devices" heading -+and is: -+ -+"Standard/generic (8250/16550 and compatible UARTs) serial support" -+ -+Go down to the kernel debugging menu item and open it up. Enable the -+kernel kgdb stub code by selecting that item. You can also choose to -+turn on the "-ggdb -O1" compile options. The -ggdb causes the compiler -+to put more debug info (like local symbols) in the object file. On the -+i386 -g and -ggdb are the same so this option just reduces to "O1". The -+-O1 reduces the optimization level. This may be helpful in some cases, -+be aware, however, that this may also mask the problem you are looking -+for. -+ -+The baud rate. Default is 115200. What ever you choose be sure that -+the host machine is set to the same speed. I recommend the default. -+ -+The port. This is the I/O address of the serial UART that you should -+have gotten using setserial as described above. The standard com1 port -+(3f8) using irq 4 is default . Com2 is 2f8 which by convention uses irq -+3. -+ -+The port irq (see above). -+ -+Stack overflow test. This option makes a minor change in the trap, -+system call and interrupt code to detect stack overflow and transfer -+control to kgdb if it happens. (Some platforms have this in the base -+line code, but the i386 does not.) -+ -+You can also configure the system to recognize the boot option -+"console=kgdb" which if given will cause all console output during -+booting to be put thru gdb as well as other consoles. This option -+requires that gdb and kgdb be connected prior to sending console output -+so, if they are not, a breakpoint is executed to force the connection. -+This will happen before any kernel output (it is going thru gdb, right), -+and will stall the boot until the connection is made. -+ -+You can also configure in a patch to SysRq to enable the kGdb SysRq. -+This request generates a breakpoint. Since the serial port irq line is -+set up after any serial drivers, it is possible that this command will -+work when the control C will not. -+ -+Save and exit the Xconfig program. Then do "make clean" , "make dep" -+and "make bzImage" (or whatever target you want to make). This gets the -+kernel compiled with the "-g" option set -- necessary for debugging. -+ -+You have just built the kernel on your DEVELOPMENT machine that you -+intend to run on your TARGET machine. -+ -+To install this new kernel, use the following installation procedure. -+Remember, you are on the DEVELOPMENT machine patching the kernel source -+for the kernel that you intend to run on the TARGET machine. -+ -+Copy this kernel to your target machine using your usual procedures. I -+usually arrange to copy development: -+/usr/src/linux/arch/i386/boot/bzImage to /vmlinuz on the TARGET machine -+via a LAN based NFS access. That is, I run the cp command on the target -+and copy from the development machine via the LAN. Run Lilo (see "man -+lilo" for details on how to set this up) on the new kernel on the target -+machine so that it will boot! Then boot the kernel on the target -+machine. -+ -+On the DEVELOPMENT machine, create a file called .gdbinit in the -+directory /usr/src/linux. An example .gdbinit file looks like this: -+ -+shell echo -e "\003" >/dev/ttyS0 -+set remotebaud 38400 (or what ever speed you have chosen) -+target remote /dev/ttyS0 -+ -+ -+Change the "echo" and "target" definition so that it specifies the tty -+port that you intend to use. Change the "remotebaud" definition to -+match the data rate that you are going to use for the com line. -+ -+You are now ready to try it out. -+ -+Boot your target machine with "kgdb" in the boot command i.e. something -+like: -+ -+lilo> test kgdb -+ -+or if you also want console output thru gdb: -+ -+lilo> test kgdb console=kgdb -+ -+You should see the lilo message saying it has loaded the kernel and then -+all output stops. The kgdb stub is trying to connect with gdb. Start -+gdb something like this: -+ -+ -+On your DEVELOPMENT machine, cd /usr/src/linux and enter "gdb vmlinux". -+When gdb gets the symbols loaded it will read your .gdbinit file and, if -+everything is working correctly, you should see gdb print out a few -+lines indicating that a breakpoint has been taken. It will actually -+show a line of code in the target kernel inside the kgdb activation -+code. -+ -+The gdb interaction should look something like this: -+ -+ linux-dev:/usr/src/linux# gdb vmlinux -+ GDB is free software and you are welcome to distribute copies of it -+ under certain conditions; type "show copying" to see the conditions. -+ There is absolutely no warranty for GDB; type "show warranty" for details. -+ GDB 4.15.1 (i486-slackware-linux), -+ Copyright 1995 Free Software Foundation, Inc... -+ breakpoint () at i386-stub.c:750 -+ 750 } -+ (gdb) -+ -+You can now use whatever gdb commands you like to set breakpoints. -+Enter "continue" to start your target machine executing again. At this -+point the target system will run at full speed until it encounters -+your breakpoint or gets a segment violation in the kernel, or whatever. -+ -+If you have the kgdb console enabled when you continue, gdb will print -+out all the console messages. -+ -+The above example caused a breakpoint relatively early in the boot -+process. For the i386 kgdb it is possible to code a break instruction -+as the first C-language point in init/main.c, i.e. as the first instruction -+in start_kernel(). This could be done as follows: -+ -+#include -+ breakpoint(); -+ -+This breakpoint() is really a function that sets up the breakpoint and -+single-step hardware trap cells and then executes a breakpoint. Any -+early hard coded breakpoint will need to use this function. Once the -+trap cells are set up they need not be set again, but doing it again -+does not hurt anything, so you don't need to be concerned about which -+breakpoint is hit first. Once the trap cells are set up (and the kernel -+sets them up in due course even if breakpoint() is never called) the -+macro: -+ -+BREAKPOINT; -+ -+will generate an inline breakpoint. This may be more useful as it stops -+the processor at the instruction instead of in a function a step removed -+from the location of interest. In either case must be -+included to define both breakpoint() and BREAKPOINT. -+ -+Triggering kgdbstub at other times -+================================== -+ -+Often you don't need to enter the debugger until much later in the boot -+or even after the machine has been running for some time. Once the -+kernel is booted and interrupts are on, you can force the system to -+enter the debugger by sending a control C to the debug port. This is -+what the first line of the recommended .gdbinit file does. This allows -+you to start gdb any time after the system is up as well as when the -+system is already at a break point. (In the case where the system is -+already at a break point the control C is not needed, however, it will -+be ignored by the target so no harm is done. Also note the the echo -+command assumes that the port speed is already set. This will be true -+once gdb has connected, but it is best to set the port speed before you -+run gdb.) -+ -+Another simple way to do this is to put the following file in you ~/bin -+directory: -+ -+#!/bin/bash -+echo -e "\003" > /dev/ttyS0 -+ -+Here, the ttyS0 should be replaced with what ever port you are using. -+The "\003" is control-C. Once you are connected with gdb, you can enter -+control-C at the command prompt. -+ -+An alternative way to get control to the debugger is to enable the kGdb -+SysRq command. Then you would enter Alt-SysRq-g (all three keys at the -+same time, but push them down in the order given). To refresh your -+memory of the available SysRq commands try Alt-SysRq-=. Actually any -+undefined command could replace the "=", but I like to KNOW that what I -+am pushing will never be defined. -+ -+Debugging hints -+=============== -+ -+You can break into the target machine at any time from the development -+machine by typing ^C (see above paragraph). If the target machine has -+interrupts enabled this will stop it in the kernel and enter the -+debugger. -+ -+There is unfortunately no way of breaking into the kernel if it is -+in a loop with interrupts disabled, so if this happens to you then -+you need to place exploratory breakpoints or printk's into the kernel -+to find out where it is looping. The exploratory breakpoints can be -+entered either thru gdb or hard coded into the source. This is very -+handy if you do something like: -+ -+if () BREAKPOINT; -+ -+ -+There is a copy of an e-mail in the Documentation/i386/kgdb/ directory -+(debug-nmi.txt) which describes how to create an NMI on an ISA bus -+machine using a paper clip. I have a sophisticated version of this made -+by wiring a push button switch into a PC104/ISA bus adapter card. The -+adapter card nicely furnishes wire wrap pins for all the ISA bus -+signals. -+ -+When you are done debugging the kernel on the target machine it is a -+good idea to leave it in a running state. This makes reboots faster, -+bypassing the fsck. So do a gdb "continue" as the last gdb command if -+this is possible. To terminate gdb itself on the development machine -+and leave the target machine running, first clear all breakpoints and -+continue, then type ^Z to suspend gdb and then kill it with "kill %1" or -+something similar. -+ -+If gdbstub Does Not Work -+======================== -+ -+If it doesn't work, you will have to troubleshoot it. Do the easy -+things first like double checking your cabling and data rates. You -+might try some non-kernel based programs to see if the back-to-back -+connection works properly. Just something simple like cat /etc/hosts -+>/dev/ttyS0 on one machine and cat /dev/ttyS0 on the other will tell you -+if you can send data from one machine to the other. Make sure it works -+in both directions. There is no point in tearing out your hair in the -+kernel if the line doesn't work. -+ -+All of the real action takes place in the file -+/usr/src/linux/arch/i386/kernel/kgdb_stub.c. That is the code on the target -+machine that interacts with gdb on the development machine. In gdb you can -+turn on a debug switch with the following command: -+ -+ set remotedebug -+ -+This will print out the protocol messages that gdb is exchanging with -+the target machine. -+ -+Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c This is -+the code that talks to the serial port on the target side. There might -+be a problem there. In particular there is a section of this code that -+tests the UART which will tell you what UART you have if you define -+"PRNT" (just remove "_off" from the #define PRNT_off). To view this -+report you will need to boot the system without any beakpoints. This -+allows the kernel to run to the point where it calls kgdb to set up -+interrupts. At this time kgdb will test the UART and print out the type -+it finds. (You need to wait so that the printks are actually being -+printed. Early in the boot they are cached, waiting for the console to -+be enabled. Also, if kgdb is entered thru a breakpoint it is possible -+to cause a dead lock by calling printk when the console is locked. The -+stub, thus avoids doing printks from break points especially in the -+serial code.) At this time, if the UART fails to do the expected thing, -+kgdb will print out (using printk) information on what failed. (These -+messages will be buried in all the other boot up messages. Look for -+lines that start with "gdb_hook_interrupt:". You may want to use dmesg -+once the system is up to view the log. If this fails or if you still -+don't connect, review your answers for the port address. Use: -+ -+setserial /dev/ttyS0 -+ -+to get the current port and irq information. This command will also -+tell you what the system found for the UART type. The stub recognizes -+the following UART types: -+ -+16450, 16550, and 16550A -+ -+If you are really desperate you can use printk debugging in the -+kgdbstub code in the target kernel until you get it working. In particular, -+there is a global variable in /usr/src/linux/arch/i386/kernel/kgdb_stub.c -+named "remote_debug". Compile your kernel with this set to 1, rather -+than 0 and the debug stub will print out lots of stuff as it does -+what it does. Likewise there are debug printks in the kgdb_serial.c -+code that can be turned on with simple changes in the macro defines. -+ -+ -+Debugging Loadable Modules -+========================== -+ -+This technique comes courtesy of Edouard Parmelan -+ -+ -+When you run gdb, enter the command -+ -+source gdbinit-modules -+ -+This will read in a file of gdb macros that was installed in your -+kernel source directory when kgdb was installed. This file implements -+the following commands: -+ -+mod-list -+ Lists the loaded modules in the form -+ -+mod-print-symbols -+ Prints all the symbols in the indicated module. -+ -+mod-add-symbols -+ Loads the symbols from the object file and associates them -+ with the indicated module. -+ -+After you have loaded the module that you want to debug, use the command -+mod-list to find the of your module. Then use that -+address in the mod-add-symbols command to load your module's symbols. -+From that point onward you can debug your module as if it were a part -+of the kernel. -+ -+The file gdbinit-modules also contains a command named mod-add-lis as -+an example of how to construct a command of your own to load your -+favorite module. The idea is to "can" the pathname of the module -+in the command so you don't have to type so much. -+ -+Threads -+======= -+ -+Each process in a target machine is seen as a gdb thread. gdb thread -+related commands (info threads, thread n) can be used. -+ -+ia-32 hardware breakpoints -+========================== -+ -+kgdb stub contains support for hardware breakpoints using debugging features -+of ia-32(x86) processors. These breakpoints do not need code modification. -+They use debugging registers. 4 hardware breakpoints are available in ia-32 -+processors. -+ -+Each hardware breakpoint can be of one of the following three types. -+ -+1. Execution breakpoint - An Execution breakpoint is triggered when code -+ at the breakpoint address is executed. -+ -+ As limited number of hardware breakpoints are available, it is -+ advisable to use software breakpoints ( break command ) instead -+ of execution hardware breakpoints, unless modification of code -+ is to be avoided. -+ -+2. Write breakpoint - A write breakpoint is triggered when memory -+ location at the breakpoint address is written. -+ -+ A write or can be placed for data of variable length. Length of -+ a write breakpoint indicates length of the datatype to be -+ watched. Length is 1 for 1 byte data , 2 for 2 byte data, 3 for -+ 4 byte data. -+ -+3. Access breakpoint - An access breakpoint is triggered when memory -+ location at the breakpoint address is either read or written. -+ -+ Access breakpoints also have lengths similar to write breakpoints. -+ -+IO breakpoints in ia-32 are not supported. -+ -+Since gdb stub at present does not use the protocol used by gdb for hardware -+breakpoints, hardware breakpoints are accessed through gdb macros. gdb macros -+for hardware breakpoints are described below. -+ -+hwebrk - Places an execution breakpoint -+ hwebrk breakpointno address -+hwwbrk - Places a write breakpoint -+ hwwbrk breakpointno length address -+hwabrk - Places an access breakpoint -+ hwabrk breakpointno length address -+hwrmbrk - Removes a breakpoint -+ hwrmbrk breakpointno -+exinfo - Tells whether a software or hardware breakpoint has occurred. -+ Prints number of the hardware breakpoint if a hardware breakpoint has -+ occurred. -+ -+Arguments required by these commands are as follows -+breakpointno - 0 to 3 -+length - 1 to 3 -+address - Memory location in hex digits ( without 0x ) e.g c015e9bc -+ -+SMP support -+========== -+ -+When a breakpoint occurs or user issues a break ( Ctrl + C ) to gdb -+client, all the processors are forced to enter the debugger. Current -+thread corresponds to the thread running on the processor where -+breakpoint occurred. Threads running on other processor(s) appear -+similar to other non running threads in the 'info threads' output. With -+in the kgdb stub there is a structure "waiting_cpus" in which kgdb -+records the values of "current" and "regs" for each cpu other than the -+one that hit the breakpoint. "current" is a pointer to the task -+structure for the task that cpu is running, while "regs" points to the -+saved registers for the task. This structure can be examined with the -+gdb "p" command. -+ -+ia-32 hardware debugging registers on all processors are set to same -+values. Hence any hardware breakpoints may occur on any processor. -+ -+gdb troubleshooting -+=================== -+ -+1. gdb hangs -+Kill it. restart gdb. Connect to target machine. -+ -+2. gdb cannot connect to target machine (after killing a gdb and -+restarting another) If the target machine was not inside debugger when -+you killed gdb, gdb cannot connect because the target machine won't -+respond. In this case echo "Ctrl+C"(ASCII 3) in the serial line. -+e.g. echo -e "\003" > /dev/ttyS1 This forces that target machine into -+debugger after which you can connect. -+ -+3. gdb cannot connect even after echoing Ctrl+C into serial line -+Try changing serial line settings min to 1 and time to 0 -+e.g. stty min 1 time 0 < /dev/ttyS1 -+Try echoing again -+ -+check serial line speed and set it to correct value if required -+e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 -+ -+EVENTS -+====== -+ -+Ever want to know the order of things happening? Which cpu did what and -+when? How did the spinlock get the way it is? Then events are for -+you. Events are defined by calls to an event collection interface and -+saved for later examination. In this case, kgdb events are saved by a -+very fast bit of code in kgdb which is fully SMP and interrupt protected -+and they are examined by using gdb to display them. Kgdb keeps only -+the last N events, where N must be a power of two and is defined at -+configure time. -+ -+ -+Events are signaled to kgdb by calling: -+ -+kgdb_ts(data0,data1) -+ -+For each call kgdb records each call in an array along with other info. -+Here is the array def: -+ -+struct kgdb_and_then_struct { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ long long at_time; -+ int from_ln; -+ char * in_src; -+ void *from; -+ int with_if; -+ int data0; -+ int data1; -+}; -+ -+For SMP machines the cpu is recorded, for all machines the TSC is -+recorded (gets a time stamp) as well as the line number and source file -+the call was made from. The address of the (from), the "if" (interrupt -+flag) and the two data items are also recorded. The macro kgdb_ts casts -+the types to int, so you can put any 32-bit values here. There is a -+configure option to select the number of events you want to keep. A -+nice number might be 128, but you can keep up to 1024 if you want. The -+number must be a power of two. An "andthen" macro library is provided -+for gdb to help you look at these events. It is also possible to define -+a different structure for the event storage and cast the data to this -+structure. For example the following structure is defined in kgdb: -+ -+struct kgdb_and_then_struct2 { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ long long at_time; -+ int from_ln; -+ char * in_src; -+ void *from; -+ int with_if; -+ struct task_struct *t1; -+ struct task_struct *t2; -+}; -+ -+If you use this for display, the data elements will be displayed as -+pointers to task_struct entries. You may want to define your own -+structure to use in casting. You should only change the last two items -+and you must keep the structure size the same. Kgdb will handle these -+as 32-bit ints, but within that constraint you can define a structure to -+cast to any 32-bit quantity. This need only be available to gdb and is -+only used for casting in the display code. -+ -+Final Items -+=========== -+ -+I picked up this code from Amit S. Kale and enhanced it. -+ -+If you make some really cool modification to this stuff, or if you -+fix a bug, please let me know. -+ -+George Anzinger -+ -+ -+Amit S. Kale -+ -+ -+(First kgdb by David Grothe ) -+ -+(modified by Tigran Aivazian ) -+ Putting gdbstub into the kernel config menu. -+ -+(modified by Scott Foehner ) -+ Hooks for entering gdbstub at boot time. -+ -+(modified by Amit S. Kale ) -+ Threads, ia-32 hw debugging, mp support, console support, -+ nmi watchdog handling. -+ -+(modified by George Anzinger ) -+ Extended threads to include the idle threads. -+ Enhancements to allow breakpoint() at first C code. -+ Use of module_init() and __setup() to automate the configure. -+ Enhanced the cpu "collection" code to work in early bring up. -+ Added ability to call functions from gdb -+ Print info thread stuff without going back to schedule() -+ Now collect the "other" cpus with a IPI/ NMI. -\ No newline at end of file -diff -puN /dev/null Documentation/i386/kgdb/loadmodule.sh ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/loadmodule.sh 2003-06-25 23:14:17.000000000 -0700 -@@ -0,0 +1,78 @@ -+#/bin/sh -+# This script loads a module on a target machine and generates a gdb script. -+# source generated gdb script to load the module file at appropriate addresses -+# in gdb. -+# -+# Usage: -+# Loading the module on target machine and generating gdb script) -+# [foo]$ loadmodule.sh -+# -+# Loading the module file into gdb -+# (gdb) source -+# -+# Modify following variables according to your setup. -+# TESTMACHINE - Name of the target machine -+# GDBSCRIPTS - The directory where a gdb script will be generated -+# -+# Author: Amit S. Kale (akale@veritas.com). -+# -+# If you run into problems, please check files pointed to by following -+# variables. -+# ERRFILE - /tmp/.errs contains stderr output of insmod -+# MAPFILE - /tmp/.map contains stdout output of insmod -+# GDBSCRIPT - $GDBSCRIPTS/load gdb script. -+ -+TESTMACHINE=foo -+GDBSCRIPTS=/home/bar -+ -+if [ $# -lt 1 ] ; then { -+ echo Usage: $0 modulefile -+ exit -+} ; fi -+ -+MODULEFILE=$1 -+MODULEFILEBASENAME=`basename $1` -+ -+if [ $MODULEFILE = $MODULEFILEBASENAME ] ; then { -+ MODULEFILE=`pwd`/$MODULEFILE -+} fi -+ -+ERRFILE=/tmp/$MODULEFILEBASENAME.errs -+MAPFILE=/tmp/$MODULEFILEBASENAME.map -+GDBSCRIPT=$GDBSCRIPTS/load$MODULEFILEBASENAME -+ -+function findaddr() { -+ local ADDR=0x$(echo "$SEGMENTS" | \ -+ grep "$1" | sed 's/^[^ ]*[ ]*[^ ]*[ ]*//' | \ -+ sed 's/[ ]*[^ ]*$//') -+ echo $ADDR -+} -+ -+function checkerrs() { -+ if [ "`cat $ERRFILE`" != "" ] ; then { -+ cat $ERRFILE -+ exit -+ } fi -+} -+ -+#load the module -+echo Copying $MODULEFILE to $TESTMACHINE -+rcp $MODULEFILE root@${TESTMACHINE}: -+ -+echo Loading module $MODULEFILE -+rsh -l root $TESTMACHINE /sbin/insmod -m ./`basename $MODULEFILE` \ -+ > $MAPFILE 2> $ERRFILE -+checkerrs -+ -+SEGMENTS=`head -n 11 $MAPFILE | tail -n 10` -+TEXTADDR=$(findaddr "\\.text[^.]") -+LOADSTRING="add-symbol-file $MODULEFILE $TEXTADDR" -+SEGADDRS=`echo "$SEGMENTS" | awk '//{ -+ if ($1 != ".text" && $1 != ".this" && -+ $1 != ".kstrtab" && $1 != ".kmodtab") { -+ print " -s " $1 " 0x" $3 " " -+ } -+}'` -+LOADSTRING="$LOADSTRING $SEGADDRS" -+echo Generating script $GDBSCRIPT -+echo $LOADSTRING > $GDBSCRIPT -diff -puN drivers/char/keyboard.c~kgdb-ga drivers/char/keyboard.c ---- 25/drivers/char/keyboard.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/drivers/char/keyboard.c 2003-06-25 23:14:17.000000000 -0700 -@@ -1055,6 +1055,9 @@ void kbd_keycode(unsigned int keycode, i - } - if (sysrq_down && down && !rep) { - handle_sysrq(kbd_sysrq_xlate[keycode], regs, tty); -+#ifdef CONFIG_KGDB_SYSRQ -+ sysrq_down = 0; /* in case we miss the "up" event */ -+#endif - return; - } - #endif -diff -puN drivers/char/sysrq.c~kgdb-ga drivers/char/sysrq.c ---- 25/drivers/char/sysrq.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/drivers/char/sysrq.c 2003-06-25 23:14:17.000000000 -0700 -@@ -35,6 +35,19 @@ - #include - - #include -+#ifdef CONFIG_KGDB_SYSRQ -+ -+#define GDB_OP &kgdb_op -+static struct sysrq_key_op kgdb_op={ -+ handler: (void*)breakpoint, -+ help_msg: "kGdb ", -+ action_msg: "Debug breakpoint\n", -+}; -+ -+#else -+#define GDB_OP NULL -+#endif -+ - - extern void reset_vc(unsigned int); - extern struct list_head super_blocks; -@@ -240,7 +253,7 @@ static struct sysrq_key_op *sysrq_key_ta - /* d */ NULL, - /* e */ &sysrq_term_op, - /* f */ NULL, --/* g */ NULL, -+/* g */ GDB_OP, - /* h */ NULL, - /* i */ &sysrq_kill_op, - /* j */ NULL, -diff -puN drivers/serial/8250.c~kgdb-ga drivers/serial/8250.c ---- 25/drivers/serial/8250.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/drivers/serial/8250.c 2003-06-25 23:14:17.000000000 -0700 -@@ -823,7 +823,7 @@ receive_chars(struct uart_8250_port *up, - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - tty->flip.work.func((void *)tty); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) -- return; // if TTY_DONT_FLIP is set -+ return; /* if TTY_DONT_FLIP is set */ - } - ch = serial_inp(up, UART_RX); - *tty->flip.char_buf_ptr = ch; -@@ -1183,13 +1183,20 @@ static void serial8250_break_ctl(struct - serial_out(up, UART_LCR, up->lcr); - spin_unlock_irqrestore(&up->port.lock, flags); - } -+#ifdef CONFIG_KGDB -+static int kgdb_irq = -1; -+#endif - - static int serial8250_startup(struct uart_port *port) - { - struct uart_8250_port *up = (struct uart_8250_port *)port; - unsigned long flags; - int retval; -- -+#ifdef CONFIG_KGDB -+ if ( up->port.irq == kgdb_irq){ -+ return -EBUSY; -+ } -+#endif - if (up->port.type == PORT_16C950) { - /* Wake up and initialize UART */ - up->acr = 0; -@@ -1853,6 +1860,11 @@ static void __init serial8250_register_p - for (i = 0; i < UART_NR; i++) { - struct uart_8250_port *up = &serial8250_ports[i]; - -+#ifdef CONFIG_KGDB -+ if(up->port.irq == kgdb_irq){ -+ up->port.iobase = up->port.mapbase = 0; -+ } -+#endif - up->port.line = i; - up->port.ops = &serial8250_pops; - init_timer(&up->timer); -@@ -2116,7 +2128,31 @@ void serial8250_resume_port(int line, u3 - { - uart_resume_port(&serial8250_reg, &serial8250_ports[line].port, level); - } -- -+#ifdef CONFIG_KGDB -+/* -+ * Find all the ports using the given irq and shut them down. -+ * Result should be that the irq will be released. -+ */ -+void shutdown_for_kgdb(struct async_struct * info) -+{ -+ int irq = info->state->irq; -+ struct uart_8250_port *up; -+ int ttyS; -+ -+ kgdb_irq = irq; /* save for later init */ -+ for (ttyS = 0; ttyS < UART_NR; ttyS++){ -+ up = &serial8250_ports[ttyS]; -+ if( up->port.irq == irq && (irq_lists + irq)->head){ -+#ifdef CONFIG_DEBUG_SPINLOCK /* ugly business... */ -+ if(up->port.lock.magic != SPINLOCK_MAGIC){ -+ spin_lock_init(&up->port.lock); -+ } -+#endif -+ serial8250_shutdown(&up->port); -+ } -+ } -+} -+#endif - static int __init serial8250_init(void) - { - int ret, i; -diff -puN include/asm-i386/bugs.h~kgdb-ga include/asm-i386/bugs.h ---- 25/include/asm-i386/bugs.h~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/include/asm-i386/bugs.h 2003-06-25 23:14:17.000000000 -0700 -@@ -1,11 +1,11 @@ - /* - * include/asm-i386/bugs.h - * -- * Copyright (C) 1994 Linus Torvalds -+ * Copyright (C) 1994 Linus Torvalds - * - * Cyrix stuff, June 1998 by: - * - Rafael R. Reilova (moved everything from head.S), -- * -+ * - * - Channing Corn (tests & fixes), - * - Andrew D. Balsa (code cleanup). - * -@@ -25,7 +25,20 @@ - #include - #include - #include -- -+#ifdef CONFIG_KGDB -+/* -+ * Provied the command line "gdb" initial break -+ */ -+int __init kgdb_initial_break(char * str) -+{ -+ if (*str == '\0'){ -+ breakpoint(); -+ return 1; -+ } -+ return 0; -+} -+__setup("gdb",kgdb_initial_break); -+#endif - static int __init no_halt(char *s) - { - boot_cpu_data.hlt_works_ok = 0; -@@ -140,7 +153,7 @@ static void __init check_popad(void) - : "ecx", "edi" ); - /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ - if (res != 12345678) printk( "Buggy.\n" ); -- else printk( "OK.\n" ); -+ else printk( "OK.\n" ); - #endif - } - -diff -puN /dev/null include/asm-i386/kgdb.h ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25-akpm/include/asm-i386/kgdb.h 2003-06-25 23:14:17.000000000 -0700 -@@ -0,0 +1,59 @@ -+#ifndef __KGDB -+#define __KGDB -+ -+/* -+ * This file should not include ANY others. This makes it usable -+ * most anywhere without the fear of include order or inclusion. -+ * Make it so! -+ * -+ * This file may be included all the time. It is only active if -+ * CONFIG_KGDB is defined, otherwise it stubs out all the macros -+ * and entry points. -+ */ -+#if defined(CONFIG_KGDB) && !defined(__ASSEMBLY__) -+ -+extern void breakpoint(void); -+#define INIT_KGDB_INTS kgdb_enable_ints() -+ -+#ifndef BREAKPOINT -+#define BREAKPOINT asm(" int $3") -+#endif -+/* -+ * GDB debug stub (or any debug stub) can point the 'linux_debug_hook' -+ * pointer to its routine and it will be entered as the first thing -+ * when a trap occurs. -+ * -+ * Return values are, at present, undefined. -+ * -+ * The debug hook routine does not necessarily return to its caller. -+ * It has the register image and thus may choose to resume execution -+ * anywhere it pleases. -+ */ -+struct pt_regs; -+ -+extern int kgdb_handle_exception(int trapno, -+ int signo, int err_code, struct pt_regs *regs); -+extern int in_kgdb(struct pt_regs *regs); -+ -+#ifdef CONFIG_KGDB_TS -+void kgdb_tstamp(int line, char *source, int data0, int data1); -+/* -+ * This is the time stamp function. The macro adds the source info and -+ * does a cast on the data to allow most any 32-bit value. -+ */ -+ -+#define kgdb_ts(data0,data1) kgdb_tstamp(__LINE__,__FILE__,(int)data0,(int)data1) -+#else -+#define kgdb_ts(data0,data1) -+#endif -+#else /* CONFIG_KGDB && ! __ASSEMBLY__ ,stubs follow... */ -+#ifndef BREAKPOINT -+#define BREAKPOINT -+#endif -+#define kgdb_ts(data0,data1) -+#define in_kgdb -+#define kgdb_handle_exception -+#define breakpoint -+#define INIT_KGDB_INTS -+#endif -+#endif /* __KGDB */ -diff -puN /dev/null include/asm-i386/kgdb_local.h ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25-akpm/include/asm-i386/kgdb_local.h 2003-06-25 23:14:17.000000000 -0700 -@@ -0,0 +1,102 @@ -+#ifndef __KGDB_LOCAL -+#define ___KGDB_LOCAL -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define PORT 0x3f8 -+#ifdef CONFIG_KGDB_PORT -+#undef PORT -+#define PORT CONFIG_KGDB_PORT -+#endif -+#define IRQ 4 -+#ifdef CONFIG_KGDB_IRQ -+#undef IRQ -+#define IRQ CONFIG_KGDB_IRQ -+#endif -+#define SB_CLOCK 1843200 -+#define SB_BASE (SB_CLOCK/16) -+#define SB_BAUD9600 SB_BASE/9600 -+#define SB_BAUD192 SB_BASE/19200 -+#define SB_BAUD384 SB_BASE/38400 -+#define SB_BAUD576 SB_BASE/57600 -+#define SB_BAUD1152 SB_BASE/115200 -+#ifdef CONFIG_KGDB_9600BAUD -+#define SB_BAUD SB_BAUD9600 -+#endif -+#ifdef CONFIG_KGDB_19200BAUD -+#define SB_BAUD SB_BAUD192 -+#endif -+#ifdef CONFIG_KGDB_38400BAUD -+#define SB_BAUD SB_BAUD384 -+#endif -+#ifdef CONFIG_KGDB_57600BAUD -+#define SB_BAUD SB_BAUD576 -+#endif -+#ifdef CONFIG_KGDB_115200BAUD -+#define SB_BAUD SB_BAUD1152 -+#endif -+#ifndef SB_BAUD -+#define SB_BAUD SB_BAUD1152 /* Start with this if not given */ -+#endif -+ -+#ifndef CONFIG_X86_TSC -+#undef rdtsc -+#define rdtsc(a,b) if (a++ > 10000){a = 0; b++;} -+#undef rdtscll -+#define rdtscll(s) s++ -+#endif -+ -+#ifdef _raw_read_unlock /* must use a name that is "define"ed, not an inline */ -+#undef spin_lock -+#undef spin_trylock -+#undef spin_unlock -+#define spin_lock _raw_spin_lock -+#define spin_trylock _raw_spin_trylock -+#define spin_unlock _raw_spin_unlock -+#else -+#endif -+#undef spin_unlock_wait -+#define spin_unlock_wait(x) do { cpu_relax(); barrier();} \ -+ while(spin_is_locked(x)) -+ -+#define SB_IER 1 -+#define SB_MCR UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS -+ -+#define FLAGS 0 -+#define SB_STATE { \ -+ magic: SSTATE_MAGIC, \ -+ baud_base: SB_BASE, \ -+ port: PORT, \ -+ irq: IRQ, \ -+ flags: FLAGS, \ -+ custom_divisor:SB_BAUD} -+#define SB_INFO { \ -+ magic: SERIAL_MAGIC, \ -+ port: PORT,0,FLAGS, \ -+ state: &state, \ -+ tty: (struct tty_struct *)&state, \ -+ IER: SB_IER, \ -+ MCR: SB_MCR} -+extern void putDebugChar(int); -+/* RTAI support needs us to really stop/start interrupts */ -+ -+#define kgdb_sti() __asm__ __volatile__("sti": : :"memory") -+#define kgdb_cli() __asm__ __volatile__("cli": : :"memory") -+#define kgdb_local_save_flags(x) __asm__ __volatile__(\ -+ "pushfl ; popl %0":"=g" (x): /* no input */) -+#define kgdb_local_irq_restore(x) __asm__ __volatile__(\ -+ "pushl %0 ; popfl": \ -+ /* no output */ :"g" (x):"memory", "cc") -+#define kgdb_local_irq_save(x) kgdb_local_save_flags(x); kgdb_cli() -+ -+#ifdef CONFIG_SERIAL -+extern void shutdown_for_kgdb(struct async_struct *info); -+#endif -+#define INIT_KDEBUG putDebugChar("+"); -+#endif /* __KGDB_LOCAL */ -diff -puN include/linux/config.h~kgdb-ga include/linux/config.h ---- 25/include/linux/config.h~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/include/linux/config.h 2003-06-25 23:14:17.000000000 -0700 -@@ -2,5 +2,8 @@ - #define _LINUX_CONFIG_H - - #include -+#ifdef CONFIG_X86 -+#include -+#endif - - #endif -diff -puN kernel/sched.c~kgdb-ga kernel/sched.c ---- 25/kernel/sched.c~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/kernel/sched.c 2003-06-25 23:14:17.000000000 -0700 -@@ -1604,6 +1604,13 @@ out_unlock: - task_rq_unlock(rq, &flags); - } - -+#if defined( CONFIG_KGDB) -+struct task_struct * kgdb_get_idle(int this_cpu) -+{ -+ return runqueues[this_cpu].idle; -+} -+#endif -+ - #ifndef __alpha__ - - /* -diff -puN MAINTAINERS~kgdb-ga MAINTAINERS ---- 25/MAINTAINERS~kgdb-ga 2003-06-25 23:14:17.000000000 -0700 -+++ 25-akpm/MAINTAINERS 2003-06-25 23:14:17.000000000 -0700 -@@ -1059,6 +1059,12 @@ L: kbuild-devel@lists.sourceforge.net - W: http://kbuild.sourceforge.net - S: Maintained - -+KGDB FOR I386 PLATFORM -+P: George Anzinger -+M: george@mvista.com -+L: linux-net@vger.kernel.org -+S: Supported -+ - KERNEL NFSD - P: Neil Brown - M: neilb@cse.unsw.edu.au - -_ diff --git a/lustre/kernel_patches/patches/kgdb-ga-docco-fixes-2.5.73.patch b/lustre/kernel_patches/patches/kgdb-ga-docco-fixes-2.5.73.patch deleted file mode 100644 index 47d451c..0000000 --- a/lustre/kernel_patches/patches/kgdb-ga-docco-fixes-2.5.73.patch +++ /dev/null @@ -1,347 +0,0 @@ - -From: "Randy.Dunlap" - -Just some readability fixes. - - - - Documentation/i386/kgdb/kgdb.txt | 135 +++++++++++++++++++-------------------- - 1 files changed, 68 insertions(+), 67 deletions(-) - -diff -puN Documentation/i386/kgdb/kgdb.txt~kgdb-gs-docco-fixes Documentation/i386/kgdb/kgdb.txt ---- 25/Documentation/i386/kgdb/kgdb.txt~kgdb-gs-docco-fixes 2003-06-26 17:32:25.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/kgdb.txt 2003-06-26 17:32:25.000000000 -0700 -@@ -6,10 +6,10 @@ New features: - ============ - 20030505.1827.27 - We are starting to align with the sourceforge version, at least in --commands. To this end, the boot command sting to start kgdb at -+commands. To this end, the boot command string to start kgdb at - boot time has been changed from "kgdb" to "gdb". - --Andrew Morton sent a couple of patchs which are now included as follows: -+Andrew Morton sent a couple of patches which are now included as follows: - 1.) We now return a flag to the interrupt handler. - 2.) We no longer use smp_num_cpus (a conflict with the lock meter). - 3.) And from William Lee Irwin III code to make -@@ -27,18 +27,18 @@ This is currently the version for the 2. - - We have several new "features" beginning with this version: - --1.) Kgdb now syncs the "other" cpus with a cross cpu NMI. No more -- waiting and it will pull that guy out of an irq off spin lock :) -+1.) Kgdb now syncs the "other" CPUs with a cross-CPU NMI. No more -+ waiting and it will pull that guy out of an IRQ off spin lock :) - - 2.) We doctored up the code that tells where a task is waiting and - included it so that the "info thread" command will show a bit more - than "schedule()". Try it... - - 3.) Added the ability to call a function from gdb. All the standard gdb -- issues apply, i.e. if you hit a break point in the function you are -- not allowed to call another (gdb limitation, not kgdb). T0 help -+ issues apply, i.e. if you hit a breakpoint in the function, you are -+ not allowed to call another (gdb limitation, not kgdb). To help - this capability we added a memory allocation function. Gdb does not -- return this memory (it is used for stings you pass to that function -+ return this memory (it is used for strings that you pass to that function - you are calling from gdb) so we fixed up a way to allow you to - manually return the memory (see below). - -@@ -61,23 +61,23 @@ $2 = {used_malloc = 0, called_from = 0xc - Things to note here: a.) used_malloc is the amount of memory that - has been malloc'ed to do calls from gdb. You can reclaim this - memory like this: "p kgdb_info.used_malloc=0" Cool, huh? b.) -- cpus_waiting is now "sized" by the number of cpus you enter at -- configure time in the kgdb configure section. This is NOT used any -- where else in the system, but it is "nice" here. c.) The tasks -+ cpus_waiting is now "sized" by the number of CPUs you enter at -+ configure time in the kgdb configure section. This is NOT used -+ anywhere else in the system, but it is "nice" here. c.) The task's - "pid" is now in the structure. This is the pid you will need to use - to decode to the thread id to get gdb to look at that thread. - Remember that the "info thread" command prints a list of threads -- where in it numbers each thread with its reference number followed -- by the threads pid. Note that the per cpu idle threads actually -- have pids of 0 (yes there is more than one pid 0 in an SMP system). -+ wherein it numbers each thread with its reference number followed -+ by the thread's pid. Note that the per-CPU idle threads actually -+ have pids of 0 (yes, there is more than one pid 0 in an SMP system). - To avoid confusion, kgdb numbers these threads with numbers beyond -- the MAX_PID. That is why you see 32768 above. -+ the MAX_PID. That is why you see 32768 and above. - - 6.) A subtle change, we now provide the complete register set for tasks -- that are active on the other cpus. This allows better trace back on -+ that are active on the other CPUs. This allows better trace back on - those tasks. - -- And, lets mention what we could not fix. Back-trace from all but the -+ And, let's mention what we could not fix. Back-trace from all but the - thread that we trapped will, most likely, have a bogus entry in it. - The problem is that gdb does not recognize the entry code for - functions that use "current" near (at all?) the entry. The compiler -@@ -88,9 +88,9 @@ $2 = {used_malloc = 0, called_from = 0xc - - 20011220.0050.35 - Major enhancement with this version is the ability to hold one or more --cpus in an SMP system while allowing the others to continue. Also, by --default only the current cpu is enabled on single step commands (please --note that gdb issues single step commands at times other than when you -+CPUs in an SMP system while allowing the others to continue. Also, by -+default only the current CPU is enabled on single-step commands (please -+note that gdb issues single-step commands at times other than when you - use the si command). - - Another change is to collect some useful information in -@@ -111,14 +111,14 @@ $4 = {called_from = 0xc010732c, entry_ts - - "Called_from" is the return address from the current entry into kgdb. - Sometimes it is useful to know why you are in kgdb, for example, was --it an NMI or a real break point? The simple way to interrogate this -+it an NMI or a real breakpoint? The simple way to interrogate this - return address is: - - l *0xc010732c - - which will print the surrounding few lines of source code. - --"Entry_tsc" is the cpu TSC on entry to kgdb (useful to compare to the -+"Entry_tsc" is the CPU TSC on entry to kgdb (useful to compare to the - kgdb_ts entries). - - "errcode" and "vector" are other entry parameters which may be helpful on -@@ -139,34 +139,34 @@ $7 = {called_from = 0xc0112739, entry_ts - hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, - hold = 0, regs = 0x0}}} - --"Cpus_waiting" has an entry for each cpu other than the current one that -+"Cpus_waiting" has an entry for each CPU other than the current one that - has been stopped. Each entry contains the task_struct address for that --cpu, the address of the regs for that task and a hold flag. All these -+CPU, the address of the regs for that task and a hold flag. All these - have the proper typing so that, for example: - - p *kgdb_info.cpus_waiting[1].regs - --will print the registers for cpu 1. -+will print the registers for CPU 1. - - "Hold_on_sstep" is a new feature with this version and comes up set or --true. What is means is that whenever kgdb is asked to single step all --other cpus are held (i.e. not allowed to execute). The flag applies to --all but the current cpu and, again, can be changed: -+true. What this means is that whenever kgdb is asked to single-step all -+other CPUs are held (i.e. not allowed to execute). The flag applies to -+all but the current CPU and, again, can be changed: - - p kgdb_info.hold_on_sstep=0 - --restores the old behavior of letting all cpus run during single stepping. -+restores the old behavior of letting all CPUs run during single-stepping. - --Likewise, each cpu has a "hold" flag, which if set, locks that cpu out --of execution. Note that this has some risk in cases where the cpus need --to communicate with each other. If kgdb finds no cpu available on exit, -+Likewise, each CPU has a "hold" flag, which if set, locks that CPU out -+of execution. Note that this has some risk in cases where the CPUs need -+to communicate with each other. If kgdb finds no CPU available on exit, - it will push a message thru gdb and stay in kgdb. Note that it is legal --to hold the current cpu as long as at least one cpu can execute. -+to hold the current CPU as long as at least one CPU can execute. - - 20010621.1117.09 - This version implements an event queue. Events are signaled by calling - a function in the kgdb stub and may be examined from gdb. See EVENTS --below for details. This version also tighten up the interrupt and SMP -+below for details. This version also tightens up the interrupt and SMP - handling to not allow interrupts on the way to kgdb from a breakpoint - trap. It is fine to allow these interrupts for user code, but not - system debugging. -@@ -190,18 +190,18 @@ machine. This is the machine that will - kernel. - - The two machines will be connected together via a serial line out --one or the other of the COM ports of the PC. You will need a modem --eliminator and the appropriate cables. -+one or the other of the COM ports of the PC. You will need the -+appropriate modem eliminator (null modem) cable(s) for this. - - Decide on which tty port you want the machines to communicate, then --cable them up back-to-back using the null modem. COM1 is /dev/ttyS0 and --COM2 is /dev/ttyS1. You should test this connection with the two --machines prior to trying to debug a kernel. Once you have it working, --on the TARGET machine, enter: -+connect them up back-to-back using the null modem cable. COM1 is -+/dev/ttyS0 and COM2 is /dev/ttyS1. You should test this connection -+with the two machines prior to trying to debug a kernel. Once you -+have it working, on the TARGET machine, enter: - - setserial /dev/ttyS0 (or what ever tty you are using) - --and record the port and the irq addresses. -+and record the port address and the IRQ number. - - On the DEVELOPMENT machine you need to apply the patch for the kgdb - hooks. You have probably already done that if you are reading this -@@ -212,7 +212,7 @@ On your DEVELOPMENT machine, go to your - configuring in the standard serial driver, it must not be a module. - Either yes or no is ok, but making the serial driver a module means it - will initialize after kgdb has set up the UART interrupt code and may --cause a failure of the control C option discussed below. The configure -+cause a failure of the control-C option discussed below. The configure - question for the serial driver is under the "Character devices" heading - and is: - -@@ -231,16 +231,16 @@ The baud rate. Default is 115200. What - the host machine is set to the same speed. I recommend the default. - - The port. This is the I/O address of the serial UART that you should --have gotten using setserial as described above. The standard com1 port --(3f8) using irq 4 is default . Com2 is 2f8 which by convention uses irq -+have gotten using setserial as described above. The standard COM1 port -+(3f8) using IRQ 4 is default. COM2 is 2f8 which by convention uses IRQ - 3. - --The port irq (see above). -+The port IRQ (see above). - - Stack overflow test. This option makes a minor change in the trap, - system call and interrupt code to detect stack overflow and transfer --control to kgdb if it happens. (Some platforms have this in the base --line code, but the i386 does not.) -+control to kgdb if it happens. (Some platforms have this in the -+baseline code, but the i386 does not.) - - You can also configure the system to recognize the boot option - "console=kgdb" which if given will cause all console output during -@@ -251,9 +251,9 @@ This will happen before any kernel outpu - and will stall the boot until the connection is made. - - You can also configure in a patch to SysRq to enable the kGdb SysRq. --This request generates a breakpoint. Since the serial port irq line is -+This request generates a breakpoint. Since the serial port IRQ line is - set up after any serial drivers, it is possible that this command will --work when the control C will not. -+work when the control-C will not. - - Save and exit the Xconfig program. Then do "make clean" , "make dep" - and "make bzImage" (or whatever target you want to make). This gets the -@@ -360,11 +360,11 @@ Triggering kgdbstub at other times - Often you don't need to enter the debugger until much later in the boot - or even after the machine has been running for some time. Once the - kernel is booted and interrupts are on, you can force the system to --enter the debugger by sending a control C to the debug port. This is -+enter the debugger by sending a control-C to the debug port. This is - what the first line of the recommended .gdbinit file does. This allows - you to start gdb any time after the system is up as well as when the --system is already at a break point. (In the case where the system is --already at a break point the control C is not needed, however, it will -+system is already at a breakpoint. (In the case where the system is -+already at a breakpoint the control-C is not needed, however, it will - be ignored by the target so no harm is done. Also note the the echo - command assumes that the port speed is already set. This will be true - once gdb has connected, but it is best to set the port speed before you -@@ -442,7 +442,7 @@ turn on a debug switch with the followin - This will print out the protocol messages that gdb is exchanging with - the target machine. - --Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c This is -+Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c. This is - the code that talks to the serial port on the target side. There might - be a problem there. In particular there is a section of this code that - tests the UART which will tell you what UART you have if you define -@@ -454,7 +454,7 @@ it finds. (You need to wait so that the - printed. Early in the boot they are cached, waiting for the console to - be enabled. Also, if kgdb is entered thru a breakpoint it is possible - to cause a dead lock by calling printk when the console is locked. The --stub, thus avoids doing printks from break points especially in the -+stub thus avoids doing printks from breakpoints, especially in the - serial code.) At this time, if the UART fails to do the expected thing, - kgdb will print out (using printk) information on what failed. (These - messages will be buried in all the other boot up messages. Look for -@@ -464,7 +464,7 @@ don't connect, review your answers for t - - setserial /dev/ttyS0 - --to get the current port and irq information. This command will also -+to get the current port and IRQ information. This command will also - tell you what the system found for the UART type. The stub recognizes - the following UART types: - -@@ -581,11 +581,11 @@ When a breakpoint occurs or user issues - client, all the processors are forced to enter the debugger. Current - thread corresponds to the thread running on the processor where - breakpoint occurred. Threads running on other processor(s) appear --similar to other non running threads in the 'info threads' output. With --in the kgdb stub there is a structure "waiting_cpus" in which kgdb --records the values of "current" and "regs" for each cpu other than the -+similar to other non-running threads in the 'info threads' output. -+Within the kgdb stub there is a structure "waiting_cpus" in which kgdb -+records the values of "current" and "regs" for each CPU other than the - one that hit the breakpoint. "current" is a pointer to the task --structure for the task that cpu is running, while "regs" points to the -+structure for the task that CPU is running, while "regs" points to the - saved registers for the task. This structure can be examined with the - gdb "p" command. - -@@ -601,22 +601,23 @@ Kill it. restart gdb. Connect to target - 2. gdb cannot connect to target machine (after killing a gdb and - restarting another) If the target machine was not inside debugger when - you killed gdb, gdb cannot connect because the target machine won't --respond. In this case echo "Ctrl+C"(ASCII 3) in the serial line. --e.g. echo -e "\003" > /dev/ttyS1 This forces that target machine into --debugger after which you can connect. -+respond. In this case echo "Ctrl+C"(ASCII 3) to the serial line. -+e.g. echo -e "\003" > /dev/ttyS1 -+This forces that target machine into the debugger, after which you -+can connect. - - 3. gdb cannot connect even after echoing Ctrl+C into serial line - Try changing serial line settings min to 1 and time to 0 - e.g. stty min 1 time 0 < /dev/ttyS1 - Try echoing again - --check serial line speed and set it to correct value if required -+Check serial line speed and set it to correct value if required - e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 - - EVENTS - ====== - --Ever want to know the order of things happening? Which cpu did what and -+Ever want to know the order of things happening? Which CPU did what and - when? How did the spinlock get the way it is? Then events are for - you. Events are defined by calls to an event collection interface and - saved for later examination. In this case, kgdb events are saved by a -@@ -631,7 +632,7 @@ Events are signaled to kgdb by calling: - kgdb_ts(data0,data1) - - For each call kgdb records each call in an array along with other info. --Here is the array def: -+Here is the array definition: - - struct kgdb_and_then_struct { - #ifdef CONFIG_SMP -@@ -646,7 +647,7 @@ struct kgdb_and_then_struct { - int data1; - }; - --For SMP machines the cpu is recorded, for all machines the TSC is -+For SMP machines the CPU is recorded, for all machines the TSC is - recorded (gets a time stamp) as well as the line number and source file - the call was made from. The address of the (from), the "if" (interrupt - flag) and the two data items are also recorded. The macro kgdb_ts casts -@@ -709,7 +710,7 @@ Amit S. Kale - Extended threads to include the idle threads. - Enhancements to allow breakpoint() at first C code. - Use of module_init() and __setup() to automate the configure. -- Enhanced the cpu "collection" code to work in early bring up. -+ Enhanced the cpu "collection" code to work in early bring-up. - Added ability to call functions from gdb - Print info thread stuff without going back to schedule() -- Now collect the "other" cpus with a IPI/ NMI. -\ No newline at end of file -+ Now collect the "other" cpus with an IPI/ NMI. - -_ diff --git a/lustre/kernel_patches/patches/kgdb-use-ggdb-2.5.73.patch b/lustre/kernel_patches/patches/kgdb-use-ggdb-2.5.73.patch deleted file mode 100644 index da07bd9..0000000 --- a/lustre/kernel_patches/patches/kgdb-use-ggdb-2.5.73.patch +++ /dev/null @@ -1,17 +0,0 @@ - arch/i386/Makefile | 2 +- - 1 files changed, 1 insertion(+), 1 deletion(-) - -diff -puN arch/i386/Makefile~kgdb-use-ggdb arch/i386/Makefile ---- 25/arch/i386/Makefile~kgdb-use-ggdb 2003-06-14 22:54:41.000000000 -0700 -+++ 25-akpm/arch/i386/Makefile 2003-06-14 22:54:41.000000000 -0700 -@@ -85,7 +85,7 @@ mcore-$(CONFIG_X86_ES7000) := mach-es700 - # default subarch .h files - mflags-y += -Iinclude/asm-i386/mach-default - --mflags-$(CONFIG_KGDB) += -g -+mflags-$(CONFIG_KGDB) += -ggdb - mflags-$(CONFIG_KGDB_MORE) += $(shell echo $(CONFIG_KGDB_OPTIONS) | sed -e 's/"//g') - - head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o - -_ diff --git a/lustre/kernel_patches/patches/lkcd-cvs-2.5.69.patch b/lustre/kernel_patches/patches/lkcd-cvs-2.5.69.patch deleted file mode 100644 index ee94909..0000000 --- a/lustre/kernel_patches/patches/lkcd-cvs-2.5.69.patch +++ /dev/null @@ -1,6418 +0,0 @@ ---- linux-2.5.69/drivers/dump/Makefile.lkcdbase Mon Jun 2 17:29:39 2003 -+++ linux-2.5.69/drivers/dump/Makefile Fri Apr 25 00:24:15 2003 -@@ -0,0 +1,14 @@ -+# -+# Makefile for the dump device drivers. -+# -+ -+dump-y := dump_setup.o dump_fmt.o dump_filters.o dump_scheme.o dump_execute.o -+dump-$(CONFIG_X86) += dump_i386.o -+dump-$(CONFIG_CRASH_DUMP_MEMDEV) += dump_memdev.o dump_overlay.o -+dump-objs += $(dump-y) -+ -+obj-$(CONFIG_CRASH_DUMP) += dump.o -+obj-$(CONFIG_CRASH_DUMP_BLOCKDEV) += dump_blockdev.o -+obj-$(CONFIG_CRASH_DUMP_NETDEV) += dump_netdev.o -+obj-$(CONFIG_CRASH_DUMP_COMPRESS_RLE) += dump_rle.o -+obj-$(CONFIG_CRASH_DUMP_COMPRESS_GZIP) += dump_gzip.o ---- linux-2.5.69/drivers/dump/dump_blockdev.c.lkcdbase Mon Jun 2 17:29:49 2003 -+++ linux-2.5.69/drivers/dump/dump_blockdev.c Sun May 18 22:30:52 2003 -@@ -0,0 +1,461 @@ -+/* -+ * Implements the dump driver interface for saving a dump to -+ * a block device through the kernel's generic low level block i/o -+ * routines. -+ * -+ * Started: June 2002 - Mohamed Abbas -+ * Moved original lkcd kiobuf dump i/o code from dump_base.c -+ * to use generic dump device interfaces -+ * -+ * Sept 2002 - Bharata B. Rao -+ * Convert dump i/o to directly use bio instead of kiobuf for 2.5 -+ * -+ * Oct 2002 - Suparna Bhattacharya -+ * Rework to new dumpdev.h structures, implement open/close/ -+ * silence, misc fixes (blocknr removal, bio_add_page usage) -+ * -+ * Copyright (C) 1999 - 2002 Silicon Graphics, Inc. All rights reserved. -+ * Copyright (C) 2001 - 2002 Matt D. Robinson. All rights reserved. -+ * Copyright (C) 2002 International Business Machines Corp. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "dump_methods.h" -+ -+extern void *dump_page_buf; -+ -+/* The end_io callback for dump i/o completion */ -+static int -+dump_bio_end_io(struct bio *bio, unsigned int bytes_done, int error) -+{ -+ struct dump_blockdev *dump_bdev; -+ -+ if (bio->bi_size) { -+ /* some bytes still left to transfer */ -+ return 1; /* not complete */ -+ } -+ -+ dump_bdev = (struct dump_blockdev *)bio->bi_private; -+ if (error) { -+ printk("IO error while writing the dump, aborting\n"); -+ } -+ -+ dump_bdev->err = error; -+ -+ /* no wakeup needed, since caller polls for completion */ -+ return 0; -+} -+ -+/* Check if the dump bio is already mapped to the specified buffer */ -+static int -+dump_block_map_valid(struct dump_blockdev *dev, struct page *page, -+ int len) -+{ -+ struct bio *bio = dev->bio; -+ unsigned long bsize = 0; -+ -+ if (!bio->bi_vcnt) -+ return 0; /* first time, not mapped */ -+ -+ -+ if ((bio_page(bio) != page) || (len > bio->bi_vcnt << PAGE_SHIFT)) -+ return 0; /* buffer not mapped */ -+ -+ bsize = bdev_hardsect_size(bio->bi_bdev); -+ if ((len & (PAGE_SIZE - 1)) || (len & bsize)) -+ return 0; /* alignment checks needed */ -+ -+ /* quick check to decide if we need to redo bio_add_page */ -+ if (bdev_get_queue(bio->bi_bdev)->merge_bvec_fn) -+ return 0; /* device may have other restrictions */ -+ -+ return 1; /* already mapped */ -+} -+ -+/* -+ * Set up the dump bio for i/o from the specified buffer -+ * Return value indicates whether the full buffer could be mapped or not -+ */ -+static int -+dump_block_map(struct dump_blockdev *dev, void *buf, int len) -+{ -+ struct page *page = virt_to_page(buf); -+ struct bio *bio = dev->bio; -+ unsigned long bsize = 0; -+ -+ bio->bi_bdev = dev->bdev; -+ bio->bi_sector = (dev->start_offset + dev->ddev.curr_offset) >> 9; -+ bio->bi_idx = 0; /* reset index to the beginning */ -+ -+ if (dump_block_map_valid(dev, page, len)) { -+ /* already mapped and usable rightaway */ -+ bio->bi_size = len; /* reset size to the whole bio */ -+ } else { -+ /* need to map the bio */ -+ bio->bi_size = 0; -+ bio->bi_vcnt = 0; -+ bsize = bdev_hardsect_size(bio->bi_bdev); -+ -+ /* first a few sanity checks */ -+ if (len < bsize) { -+ printk("map: len less than hardsect size \n"); -+ return -EINVAL; -+ } -+ -+ if ((unsigned long)buf & bsize) { -+ printk("map: not aligned \n"); -+ return -EINVAL; -+ } -+ -+ /* assume contig. page aligned low mem buffer( no vmalloc) */ -+ if ((page_address(page) != buf) || (len & (PAGE_SIZE - 1))) { -+ printk("map: invalid buffer alignment!\n"); -+ return -EINVAL; -+ } -+ /* finally we can go ahead and map it */ -+ while (bio->bi_size < len) -+ if (bio_add_page(bio, page++, PAGE_SIZE, 0) == 0) { -+ break; -+ } -+ -+ bio->bi_end_io = dump_bio_end_io; -+ bio->bi_private = dev; -+ } -+ -+ if (bio->bi_size != len) { -+ printk("map: bio size = %d not enough for len = %d!\n", -+ bio->bi_size, len); -+ return -E2BIG; -+ } -+ return 0; -+} -+ -+static void -+dump_free_bio(struct bio *bio) -+{ -+ if (bio) -+ kfree(bio->bi_io_vec); -+ kfree(bio); -+} -+ -+/* -+ * Prepares the dump device so we can take a dump later. -+ * The caller is expected to have filled up the kdev_id field in the -+ * block dump dev structure. -+ * -+ * At dump time when dump_block_write() is invoked it will be too -+ * late to recover, so as far as possible make sure obvious errors -+ * get caught right here and reported back to the caller. -+ */ -+static int -+dump_block_open(struct dump_dev *dev, unsigned long arg) -+{ -+ struct dump_blockdev *dump_bdev = DUMP_BDEV(dev); -+ struct block_device *bdev; -+ int retval = 0; -+ struct bio_vec *bvec; -+ -+ /* make sure this is a valid block device */ -+ if (!arg) { -+ retval = -EINVAL; -+ goto err; -+ } -+ -+ /* get a corresponding block_dev struct for this */ -+ bdev = bdget((dev_t)arg); -+ if (!bdev) { -+ retval = -ENODEV; -+ goto err; -+ } -+ -+ /* get the block device opened */ -+ if ((retval = blkdev_get(bdev, O_RDWR | O_LARGEFILE, 0, BDEV_RAW))) { -+ goto err1; -+ } -+ -+ if ((dump_bdev->bio = kmalloc(sizeof(struct bio), GFP_KERNEL)) -+ == NULL) { -+ printk("Cannot allocate bio\n"); -+ retval = -ENOMEM; -+ goto err2; -+ } -+ -+ bio_init(dump_bdev->bio); -+ -+ if ((bvec = kmalloc(sizeof(struct bio_vec) * -+ (DUMP_BUFFER_SIZE >> PAGE_SHIFT), GFP_KERNEL)) == NULL) { -+ retval = -ENOMEM; -+ goto err3; -+ } -+ -+ /* assign the new dump dev structure */ -+ dump_bdev->kdev_id = to_kdev_t((dev_t)arg); -+ dump_bdev->bdev = bdev; -+ -+ /* make a note of the limit */ -+ dump_bdev->limit = bdev->bd_inode->i_size; -+ -+ /* now make sure we can map the dump buffer */ -+ dump_bdev->bio->bi_io_vec = bvec; -+ dump_bdev->bio->bi_max_vecs = DUMP_BUFFER_SIZE >> PAGE_SHIFT; -+ -+ retval = dump_block_map(dump_bdev, dump_config.dumper->dump_buf, -+ DUMP_BUFFER_SIZE); -+ -+ if (retval) { -+ printk("open: dump_block_map failed, ret %d\n", retval); -+ goto err3; -+ } -+ -+ printk("Block device (%d,%d) successfully configured for dumping\n", -+ major(dump_bdev->kdev_id), -+ minor(dump_bdev->kdev_id)); -+ -+ -+ /* after opening the block device, return */ -+ return retval; -+ -+err3: dump_free_bio(dump_bdev->bio); -+ dump_bdev->bio = NULL; -+err2: if (bdev) blkdev_put(bdev, BDEV_RAW); -+ goto err; -+err1: if (bdev) bdput(bdev); -+ dump_bdev->bdev = NULL; -+err: return retval; -+} -+ -+/* -+ * Close the dump device and release associated resources -+ * Invoked when unconfiguring the dump device. -+ */ -+static int -+dump_block_release(struct dump_dev *dev) -+{ -+ struct dump_blockdev *dump_bdev = DUMP_BDEV(dev); -+ -+ /* release earlier bdev if present */ -+ if (dump_bdev->bdev) { -+ blkdev_put(dump_bdev->bdev, BDEV_RAW); -+ dump_bdev->bdev = NULL; -+ } -+ -+ dump_free_bio(dump_bdev->bio); -+ dump_bdev->bio = NULL; -+ -+ return 0; -+} -+ -+ -+/* -+ * Prepare the dump device for use (silence any ongoing activity -+ * and quiesce state) when the system crashes. -+ */ -+static int -+dump_block_silence(struct dump_dev *dev) -+{ -+ struct dump_blockdev *dump_bdev = DUMP_BDEV(dev); -+ struct request_queue *q = bdev_get_queue(dump_bdev->bdev); -+ int ret; -+ -+ /* If we can't get request queue lock, refuse to take the dump */ -+ if (!spin_trylock(q->queue_lock)) -+ return -EBUSY; -+ -+ ret = elv_queue_empty(q); -+ spin_unlock(q->queue_lock); -+ -+ /* For now we assume we have the device to ourselves */ -+ /* Just a quick sanity check */ -+ if (!ret) { -+ /* i/o in flight - safer to quit */ -+ return -EBUSY; -+ } -+ -+ /* -+ * Move to a softer level of silencing where no spin_lock_irqs -+ * are held on other cpus -+ */ -+ dump_silence_level = DUMP_SOFT_SPIN_CPUS; -+ -+ __dump_irq_enable(); -+ -+ printk("Dumping to block device (%d,%d) on CPU %d ...\n", -+ major(dump_bdev->kdev_id), minor(dump_bdev->kdev_id), -+ smp_processor_id()); -+ -+ return 0; -+} -+ -+/* -+ * Invoked when dumping is done. This is the time to put things back -+ * (i.e. undo the effects of dump_block_silence) so the device is -+ * available for normal use. -+ */ -+static int -+dump_block_resume(struct dump_dev *dev) -+{ -+ __dump_irq_restore(); -+ return 0; -+} -+ -+ -+/* -+ * Seek to the specified offset in the dump device. -+ * Makes sure this is a valid offset, otherwise returns an error. -+ */ -+static int -+dump_block_seek(struct dump_dev *dev, loff_t off) -+{ -+ struct dump_blockdev *dump_bdev = DUMP_BDEV(dev); -+ loff_t offset = off + dump_bdev->start_offset; -+ -+ if (offset & ( PAGE_SIZE - 1)) { -+ printk("seek: non-page aligned\n"); -+ return -EINVAL; -+ } -+ -+ if (offset & (bdev_hardsect_size(dump_bdev->bdev) - 1)) { -+ printk("seek: not sector aligned \n"); -+ return -EINVAL; -+ } -+ -+ if (offset > dump_bdev->limit) { -+ printk("seek: not enough space left on device!\n"); -+ return -ENOSPC; -+ } -+ dev->curr_offset = off; -+ return 0; -+} -+ -+/* -+ * Write out a buffer after checking the device limitations, -+ * sector sizes, etc. Assumes the buffer is in directly mapped -+ * kernel address space (not vmalloc'ed). -+ * -+ * Returns: number of bytes written or -ERRNO. -+ */ -+static int -+dump_block_write(struct dump_dev *dev, void *buf, -+ unsigned long len) -+{ -+ struct dump_blockdev *dump_bdev = DUMP_BDEV(dev); -+ loff_t offset = dev->curr_offset + dump_bdev->start_offset; -+ int retval = -ENOSPC; -+ -+ if (offset >= dump_bdev->limit) { -+ printk("write: not enough space left on device!\n"); -+ goto out; -+ } -+ -+ /* don't write more blocks than our max limit */ -+ if (offset + len > dump_bdev->limit) -+ len = dump_bdev->limit - offset; -+ -+ -+ retval = dump_block_map(dump_bdev, buf, len); -+ if (retval){ -+ printk("write: dump_block_map failed! err %d\n", retval); -+ goto out; -+ } -+ -+ /* -+ * Write out the data to disk. -+ * Assumes the entire buffer mapped to a single bio, which we can -+ * submit and wait for io completion. In the future, may consider -+ * increasing the dump buffer size and submitting multiple bio s -+ * for better throughput. -+ */ -+ dump_bdev->err = -EAGAIN; -+ submit_bio(WRITE, dump_bdev->bio); -+ -+ dump_bdev->ddev.curr_offset += len; -+ retval = len; -+ out: -+ return retval; -+} -+ -+/* -+ * Name: dump_block_ready() -+ * Func: check if the last dump i/o is over and ready for next request -+ */ -+static int -+dump_block_ready(struct dump_dev *dev, void *buf) -+{ -+ struct dump_blockdev *dump_bdev = DUMP_BDEV(dev); -+ request_queue_t *q = bdev_get_queue(dump_bdev->bio->bi_bdev); -+ -+ /* check for io completion */ -+ if (dump_bdev->err == -EAGAIN) { -+ q->unplug_fn(q); -+ return -EAGAIN; -+ } -+ -+ if (dump_bdev->err) { -+ printk("dump i/o err\n"); -+ return dump_bdev->err; -+ } -+ -+ return 0; -+} -+ -+ -+struct dump_dev_ops dump_blockdev_ops = { -+ .open = dump_block_open, -+ .release = dump_block_release, -+ .silence = dump_block_silence, -+ .resume = dump_block_resume, -+ .seek = dump_block_seek, -+ .write = dump_block_write, -+ /* .read not implemented */ -+ .ready = dump_block_ready -+}; -+ -+static struct dump_blockdev default_dump_blockdev = { -+ .ddev = {.type_name = "blockdev", .ops = &dump_blockdev_ops, -+ .curr_offset = 0}, -+ /* -+ * leave enough room for the longest swap header possibly written -+ * written by mkswap (likely the largest page size supported by -+ * the arch -+ */ -+ .start_offset = DUMP_HEADER_OFFSET, -+ .err = 0 -+ /* assume the rest of the fields are zeroed by default */ -+}; -+ -+struct dump_blockdev *dump_blockdev = &default_dump_blockdev; -+ -+static int __init -+dump_blockdev_init(void) -+{ -+ if (dump_register_device(&dump_blockdev->ddev) < 0) { -+ printk("block device driver registration failed\n"); -+ return -1; -+ } -+ -+ printk("block device driver for LKCD registered\n"); -+ return 0; -+} -+ -+static void __exit -+dump_blockdev_cleanup(void) -+{ -+ dump_unregister_device(&dump_blockdev->ddev); -+ printk("block device driver for LKCD unregistered\n"); -+} -+ -+MODULE_AUTHOR("LKCD Development Team "); -+MODULE_DESCRIPTION("Block Dump Driver for Linux Kernel Crash Dump (LKCD)"); -+MODULE_LICENSE("GPL"); -+ -+module_init(dump_blockdev_init); -+module_exit(dump_blockdev_cleanup); ---- linux-2.5.69/drivers/dump/dump_execute.c.lkcdbase Mon Jun 2 17:29:49 2003 -+++ linux-2.5.69/drivers/dump/dump_execute.c Fri Feb 7 06:47:58 2003 -@@ -0,0 +1,126 @@ -+/* -+ * The file has the common/generic dump execution code -+ * -+ * Started: Oct 2002 - Suparna Bhattacharya -+ * Split and rewrote high level dump execute code to make use -+ * of dump method interfaces. -+ * -+ * Derived from original code in dump_base.c created by -+ * Matt Robinson ) -+ * -+ * Copyright (C) 1999 - 2002 Silicon Graphics, Inc. All rights reserved. -+ * Copyright (C) 2001 - 2002 Matt D. Robinson. All rights reserved. -+ * Copyright (C) 2002 International Business Machines Corp. -+ * -+ * Assumes dumper and dump config settings are in place -+ * (invokes corresponding dumper specific routines as applicable) -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+#include -+#include -+#include -+#include "dump_methods.h" -+ -+struct notifier_block *dump_notifier_list; /* dump started/ended callback */ -+ -+/* Dump progress indicator */ -+void -+dump_speedo(int i) -+{ -+ static const char twiddle[4] = { '|', '\\', '-', '/' }; -+ printk("%c\b", twiddle[i&3]); -+} -+ -+/* Make the device ready and write out the header */ -+int dump_begin(void) -+{ -+ int err = 0; -+ -+ /* dump_dev = dump_config.dumper->dev; */ -+ dumper_reset(); -+ if ((err = dump_dev_silence())) { -+ /* quiesce failed, can't risk continuing */ -+ /* Todo/Future: switch to alternate dump scheme if possible */ -+ printk("dump silence dev failed ! error %d\n", err); -+ return err; -+ } -+ -+ pr_debug("Writing dump header\n"); -+ if ((err = dump_update_header())) { -+ printk("dump update header failed ! error %d\n", err); -+ dump_dev_resume(); -+ return err; -+ } -+ -+ dump_config.dumper->curr_offset = DUMP_BUFFER_SIZE; -+ -+ return 0; -+} -+ -+/* -+ * Write the dump terminator, a final header update and let go of -+ * exclusive use of the device for dump. -+ */ -+int dump_complete(void) -+{ -+ int ret = 0; -+ -+ if (dump_config.level != DUMP_LEVEL_HEADER) { -+ if ((ret = dump_update_end_marker())) { -+ printk("dump update end marker error %d\n", ret); -+ } -+ if ((ret = dump_update_header())) { -+ printk("dump update header error %d\n", ret); -+ } -+ } -+ ret = dump_dev_resume(); -+ -+ return ret; -+} -+ -+/* Saves all dump data */ -+int dump_execute_savedump(void) -+{ -+ int ret = 0, err = 0; -+ -+ if ((ret = dump_begin())) { -+ return ret; -+ } -+ -+ if (dump_config.level != DUMP_LEVEL_HEADER) { -+ ret = dump_sequencer(); -+ } -+ if ((err = dump_complete())) { -+ printk("Dump complete failed. Error %d\n", err); -+ } -+ -+ return ret; -+} -+ -+/* Does all the real work: Capture and save state */ -+int dump_generic_execute(const char *panic_str, const struct pt_regs *regs) -+{ -+ int ret = 0; -+ -+ if ((ret = dump_configure_header(panic_str, regs))) { -+ printk("dump config header failed ! error %d\n", ret); -+ return ret; -+ } -+ -+ /* tell interested parties that a dump is about to start */ -+ notifier_call_chain(&dump_notifier_list, DUMP_BEGIN, -+ &dump_config.dump_device); -+ -+ if (dump_config.level != DUMP_LEVEL_NONE) -+ ret = dump_execute_savedump(); -+ -+ pr_debug("dumped %ld blocks of %d bytes each\n", -+ dump_config.dumper->count, DUMP_BUFFER_SIZE); -+ -+ /* tell interested parties that a dump has completed */ -+ notifier_call_chain(&dump_notifier_list, DUMP_END, -+ &dump_config.dump_device); -+ -+ return ret; -+} ---- linux-2.5.69/drivers/dump/dump_filters.c.lkcdbase Mon Jun 2 17:29:49 2003 -+++ linux-2.5.69/drivers/dump/dump_filters.c Mon Feb 3 05:06:28 2003 -@@ -0,0 +1,143 @@ -+/* -+ * Default filters to select data to dump for various passes. -+ * -+ * Started: Oct 2002 - Suparna Bhattacharya -+ * Split and rewrote default dump selection logic to generic dump -+ * method interfaces -+ * Derived from a portion of dump_base.c created by -+ * Matt Robinson ) -+ * -+ * Copyright (C) 1999 - 2002 Silicon Graphics, Inc. All rights reserved. -+ * Copyright (C) 2001 - 2002 Matt D. Robinson. All rights reserved. -+ * Copyright (C) 2002 International Business Machines Corp. -+ * -+ * Used during single-stage dumping and during stage 1 of the 2-stage scheme -+ * (Stage 2 of the 2-stage scheme uses the fully transparent filters -+ * i.e. passthru filters in dump_overlay.c) -+ * -+ * Future: Custom selective dump may involve a different set of filters. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include "dump_methods.h" -+ -+ -+/* Copied from mm/bootmem.c - FIXME */ -+/* return the number of _pages_ that will be allocated for the boot bitmap */ -+unsigned long dump_calc_bootmap_pages (void) -+{ -+ unsigned long mapsize; -+ unsigned long pages = num_physpages; -+ -+ mapsize = (pages+7)/8; -+ mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK; -+ mapsize >>= PAGE_SHIFT; -+ -+ return mapsize; -+} -+ -+ -+#define DUMP_PFN_SAFETY_MARGIN 1024 /* 4 MB */ -+/* temporary */ -+extern unsigned long min_low_pfn; -+ -+ -+int dump_low_page(struct page *p) -+{ -+ return page_to_pfn(p) < min_low_pfn + dump_calc_bootmap_pages() -+ + 1 + DUMP_PFN_SAFETY_MARGIN; -+} -+ -+static inline int kernel_page(struct page *p) -+{ -+ /* FIXME: Need to exclude hugetlb pages. Clue: reserved but inuse */ -+ return PageReserved(p) || (!PageLRU(p) && PageInuse(p)); -+} -+ -+static inline int user_page(struct page *p) -+{ -+ return PageInuse(p) && (!PageReserved(p) && PageLRU(p)); -+} -+ -+static inline int unreferenced_page(struct page *p) -+{ -+ return !PageInuse(p) && !PageReserved(p); -+} -+ -+ -+/* loc marks the beginning of a range of pages */ -+int dump_filter_kernpages(int pass, unsigned long loc, unsigned long sz) -+{ -+ struct page *page = (struct page *)loc; -+ /* if any of the pages is a kernel page, select this set */ -+ while (sz) { -+ if (dump_low_page(page) || kernel_page(page)) -+ return 1; -+ sz -= PAGE_SIZE; -+ page++; -+ } -+ return 0; -+} -+ -+ -+/* loc marks the beginning of a range of pages */ -+int dump_filter_userpages(int pass, unsigned long loc, unsigned long sz) -+{ -+ struct page *page = (struct page *)loc; -+ int ret = 0; -+ /* select if the set has any user page, and no kernel pages */ -+ while (sz) { -+ if (user_page(page) && !dump_low_page(page)) { -+ ret = 1; -+ } else if (kernel_page(page) || dump_low_page(page)) { -+ return 0; -+ } -+ page++; -+ sz -= PAGE_SIZE; -+ } -+ return ret; -+} -+ -+ -+ -+/* loc marks the beginning of a range of pages */ -+int dump_filter_unusedpages(int pass, unsigned long loc, unsigned long sz) -+{ -+ struct page *page = (struct page *)loc; -+ -+ /* select if the set does not have any used pages */ -+ while (sz) { -+ if (!unreferenced_page(page) || dump_low_page(page)) { -+ return 0; -+ } -+ page++; -+ sz -= PAGE_SIZE; -+ } -+ return 1; -+} -+ -+/* dummy: last (non-existent) pass */ -+int dump_filter_none(int pass, unsigned long loc, unsigned long sz) -+{ -+ return 0; -+} -+ -+/* TBD: resolve level bitmask ? */ -+struct dump_data_filter dump_filter_table[] = { -+ { .name = "kern", .selector = dump_filter_kernpages, -+ .level_mask = DUMP_MASK_KERN}, -+ { .name = "user", .selector = dump_filter_userpages, -+ .level_mask = DUMP_MASK_USED}, -+ { .name = "unused", .selector = dump_filter_unusedpages, -+ .level_mask = DUMP_MASK_UNUSED}, -+ { .name = "none", .selector = dump_filter_none, -+ .level_mask = DUMP_MASK_REST}, -+ { .name = "", .selector = NULL, .level_mask = 0} -+}; -+ ---- linux-2.5.69/drivers/dump/dump_fmt.c.lkcdbase Mon Jun 2 17:29:49 2003 -+++ linux-2.5.69/drivers/dump/dump_fmt.c Fri Feb 7 06:47:58 2003 -@@ -0,0 +1,395 @@ -+/* -+ * Implements the routines which handle the format specific -+ * aspects of dump for the default dump format. -+ * -+ * Used in single stage dumping and stage 1 of soft-boot based dumping -+ * Saves data in LKCD (lcrash) format -+ * -+ * Previously a part of dump_base.c -+ * -+ * Started: Oct 2002 - Suparna Bhattacharya -+ * Split off and reshuffled LKCD dump format code around generic -+ * dump method interfaces. -+ * -+ * Derived from original code created by -+ * Matt Robinson ) -+ * -+ * Contributions from SGI, IBM, HP, MCL, and others. -+ * -+ * Copyright (C) 1999 - 2002 Silicon Graphics, Inc. All rights reserved. -+ * Copyright (C) 2000 - 2002 TurboLinux, Inc. All rights reserved. -+ * Copyright (C) 2001 - 2002 Matt D. Robinson. All rights reserved. -+ * Copyright (C) 2002 International Business Machines Corp. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "dump_methods.h" -+ -+/* -+ * SYSTEM DUMP LAYOUT -+ * -+ * System dumps are currently the combination of a dump header and a set -+ * of data pages which contain the system memory. The layout of the dump -+ * (for full dumps) is as follows: -+ * -+ * +-----------------------------+ -+ * | generic dump header | -+ * +-----------------------------+ -+ * | architecture dump header | -+ * +-----------------------------+ -+ * | page header | -+ * +-----------------------------+ -+ * | page data | -+ * +-----------------------------+ -+ * | page header | -+ * +-----------------------------+ -+ * | page data | -+ * +-----------------------------+ -+ * | | | -+ * | | | -+ * | | | -+ * | | | -+ * | V | -+ * +-----------------------------+ -+ * | PAGE_END header | -+ * +-----------------------------+ -+ * -+ * There are two dump headers, the first which is architecture -+ * independent, and the other which is architecture dependent. This -+ * allows different architectures to dump different data structures -+ * which are specific to their chipset, CPU, etc. -+ * -+ * After the dump headers come a succession of dump page headers along -+ * with dump pages. The page header contains information about the page -+ * size, any flags associated with the page (whether it's compressed or -+ * not), and the address of the page. After the page header is the page -+ * data, which is either compressed (or not). Each page of data is -+ * dumped in succession, until the final dump header (PAGE_END) is -+ * placed at the end of the dump, assuming the dump device isn't out -+ * of space. -+ * -+ * This mechanism allows for multiple compression types, different -+ * types of data structures, different page ordering, etc., etc., etc. -+ * It's a very straightforward mechanism for dumping system memory. -+ */ -+ -+struct __dump_header dump_header; /* the primary dump header */ -+struct __dump_header_asm dump_header_asm; /* the arch-specific dump header */ -+ -+/* -+ * Set up common header fields (mainly the arch indep section) -+ * Per-cpu state is handled by lcrash_save_context -+ * Returns the size of the header in bytes. -+ */ -+static int lcrash_init_dump_header(const char *panic_str) -+{ -+ struct timeval dh_time; -+ /* make sure the dump header isn't TOO big */ -+ if ((sizeof(struct __dump_header) + -+ sizeof(struct __dump_header_asm)) > DUMP_BUFFER_SIZE) { -+ printk("lcrash_init_header(): combined " -+ "headers larger than DUMP_BUFFER_SIZE!\n"); -+ return -E2BIG; -+ } -+ -+ /* initialize the dump headers to zero */ -+ memset(&dump_header, 0, sizeof(dump_header)); -+ memset(&dump_header_asm, 0, sizeof(dump_header_asm)); -+ -+ /* configure dump header values */ -+ dump_header.dh_magic_number = DUMP_MAGIC_NUMBER; -+ dump_header.dh_version = DUMP_VERSION_NUMBER; -+ dump_header.dh_memory_start = PAGE_OFFSET; -+ dump_header.dh_memory_end = DUMP_MAGIC_NUMBER; -+ dump_header.dh_header_size = sizeof(struct __dump_header); -+ dump_header.dh_page_size = PAGE_SIZE; -+ dump_header.dh_dump_level = dump_config.level; -+ dump_header.dh_current_task = (unsigned long) current; -+ dump_header.dh_dump_compress = dump_config.dumper->compress-> -+ compress_type; -+ dump_header.dh_dump_flags = dump_config.flags; -+ dump_header.dh_dump_device = dump_config.dumper->dev->device_id; -+ -+#if DUMP_DEBUG >= 6 -+ dump_header.dh_num_bytes = 0; -+#endif -+ dump_header.dh_num_dump_pages = 0; -+ do_gettimeofday(&dh_time); -+ dump_header.dh_time.tv_sec = dh_time.tv_sec; -+ dump_header.dh_time.tv_usec = dh_time.tv_usec; -+ -+ memcpy((void *)&(dump_header.dh_utsname_sysname), -+ (const void *)&(system_utsname.sysname), __NEW_UTS_LEN + 1); -+ memcpy((void *)&(dump_header.dh_utsname_nodename), -+ (const void *)&(system_utsname.nodename), __NEW_UTS_LEN + 1); -+ memcpy((void *)&(dump_header.dh_utsname_release), -+ (const void *)&(system_utsname.release), __NEW_UTS_LEN + 1); -+ memcpy((void *)&(dump_header.dh_utsname_version), -+ (const void *)&(system_utsname.version), __NEW_UTS_LEN + 1); -+ memcpy((void *)&(dump_header.dh_utsname_machine), -+ (const void *)&(system_utsname.machine), __NEW_UTS_LEN + 1); -+ memcpy((void *)&(dump_header.dh_utsname_domainname), -+ (const void *)&(system_utsname.domainname), __NEW_UTS_LEN + 1); -+ -+ if (panic_str) { -+ memcpy((void *)&(dump_header.dh_panic_string), -+ (const void *)panic_str, DUMP_PANIC_LEN); -+ } -+ -+ dump_header_asm.dha_magic_number = DUMP_ASM_MAGIC_NUMBER; -+ dump_header_asm.dha_version = DUMP_ASM_VERSION_NUMBER; -+ dump_header_asm.dha_header_size = sizeof(dump_header_asm); -+ -+ dump_header_asm.dha_smp_num_cpus = num_online_cpus(); -+ pr_debug("smp_num_cpus in header %d\n", -+ dump_header_asm.dha_smp_num_cpus); -+ -+ dump_header_asm.dha_dumping_cpu = smp_processor_id(); -+ -+ return sizeof(dump_header) + sizeof(dump_header_asm); -+} -+ -+ -+int dump_lcrash_configure_header(const char *panic_str, -+ const struct pt_regs *regs) -+{ -+ int retval = 0; -+ -+ dump_config.dumper->header_len = lcrash_init_dump_header(panic_str); -+ -+ /* capture register states for all processors */ -+ dump_save_this_cpu(regs); -+ __dump_save_other_cpus(); /* side effect:silence cpus */ -+ -+ /* configure architecture-specific dump header values */ -+ if ((retval = __dump_configure_header(regs))) -+ return retval; -+ -+ dump_config.dumper->header_dirty++; -+ return 0; -+} -+ -+/* save register and task context */ -+void dump_lcrash_save_context(int cpu, const struct pt_regs *regs, -+ struct task_struct *tsk) -+{ -+ dump_header_asm.dha_smp_current_task[cpu] = (uint32_t) tsk; -+ -+ __dump_save_regs(&dump_header_asm.dha_smp_regs[cpu], regs); -+ -+ /* take a snapshot of the stack */ -+ /* doing this enables us to tolerate slight drifts on this cpu */ -+ if (dump_header_asm.dha_stack[cpu]) { -+ memcpy((void *)dump_header_asm.dha_stack[cpu], -+ tsk->thread_info, THREAD_SIZE); -+ } -+ dump_header_asm.dha_stack_ptr[cpu] = (uint32_t)(tsk->thread_info); -+} -+ -+/* write out the header */ -+int dump_write_header(void) -+{ -+ int retval = 0, size; -+ void *buf = dump_config.dumper->dump_buf; -+ -+ /* accounts for DUMP_HEADER_OFFSET if applicable */ -+ if ((retval = dump_dev_seek(0))) { -+ printk("Unable to seek to dump header offset: %d\n", -+ retval); -+ return retval; -+ } -+ -+ memcpy(buf, (void *)&dump_header, sizeof(dump_header)); -+ size = sizeof(dump_header); -+ memcpy(buf + size, (void *)&dump_header_asm, sizeof(dump_header_asm)); -+ size += sizeof(dump_header_asm); -+ size = PAGE_ALIGN(size); -+ retval = dump_ll_write(buf , size); -+ -+ if (retval < size) -+ return (retval >= 0) ? ENOSPC : retval; -+ -+ return 0; -+} -+ -+int dump_generic_update_header(void) -+{ -+ int err = 0; -+ -+ if (dump_config.dumper->header_dirty) { -+ if ((err = dump_write_header())) { -+ printk("dump write header failed !err %d\n", err); -+ } else { -+ dump_config.dumper->header_dirty = 0; -+ } -+ } -+ -+ return err; -+} -+ -+static inline int is_curr_stack_page(struct page *page, unsigned long size) -+{ -+ unsigned long thread_addr = (unsigned long)current_thread_info(); -+ unsigned long addr = (unsigned long)page_address(page); -+ -+ return !PageHighMem(page) && (addr < thread_addr + THREAD_SIZE) -+ && (addr + size > thread_addr); -+} -+ -+static inline int is_dump_page(struct page *page, unsigned long size) -+{ -+ unsigned long addr = (unsigned long)page_address(page); -+ unsigned long dump_buf = (unsigned long)dump_config.dumper->dump_buf; -+ -+ return !PageHighMem(page) && (addr < dump_buf + DUMP_BUFFER_SIZE) -+ && (addr + size > dump_buf); -+} -+ -+int dump_allow_compress(struct page *page, unsigned long size) -+{ -+ /* -+ * Don't compress the page if any part of it overlaps -+ * with the current stack or dump buffer (since the contents -+ * in these could be changing while compression is going on) -+ */ -+ return !is_curr_stack_page(page, size) && !is_dump_page(page, size); -+} -+ -+void lcrash_init_pageheader(struct __dump_page *dp, struct page *page, -+ unsigned long sz) -+{ -+ memset(dp, sizeof(struct __dump_page), 0); -+ dp->dp_flags = 0; -+ dp->dp_size = 0; -+ if (sz > 0) -+ dp->dp_address = page_to_pfn(page) << PAGE_SHIFT; -+ -+#if DUMP_DEBUG > 6 -+ dp->dp_page_index = dump_header.dh_num_dump_pages; -+ dp->dp_byte_offset = dump_header.dh_num_bytes + DUMP_BUFFER_SIZE -+ + DUMP_HEADER_OFFSET; /* ?? */ -+#endif /* DUMP_DEBUG */ -+} -+ -+int dump_lcrash_add_data(unsigned long loc, unsigned long len) -+{ -+ struct page *page = (struct page *)loc; -+ void *addr, *buf = dump_config.dumper->curr_buf; -+ struct __dump_page *dp = (struct __dump_page *)buf; -+ int bytes, size; -+ -+ if (buf > dump_config.dumper->dump_buf + DUMP_BUFFER_SIZE) -+ return -ENOMEM; -+ -+ lcrash_init_pageheader(dp, page, len); -+ buf += sizeof(struct __dump_page); -+ -+ while (len) { -+ addr = kmap_atomic(page, KM_DUMP); -+ size = bytes = (len > PAGE_SIZE) ? PAGE_SIZE : len; -+ /* check for compression */ -+ if (dump_allow_compress(page, bytes)) { -+ size = dump_compress_data((char *)addr, bytes, (char *)buf); -+ } -+ /* set the compressed flag if the page did compress */ -+ if (size && (size < bytes)) { -+ dp->dp_flags |= DUMP_DH_COMPRESSED; -+ } else { -+ /* compression failed -- default to raw mode */ -+ dp->dp_flags |= DUMP_DH_RAW; -+ memcpy(buf, addr, bytes); -+ size = bytes; -+ } -+ /* memset(buf, 'A', size); temporary: testing only !! */ -+ kunmap_atomic(addr, KM_DUMP); -+ dp->dp_size += size; -+ buf += size; -+ len -= bytes; -+ page++; -+ } -+ -+ /* now update the header */ -+#if DUMP_DEBUG > 6 -+ dump_header.dh_num_bytes += dp->dp_size + sizeof(*dp); -+#endif -+ dump_header.dh_num_dump_pages++; -+ dump_config.dumper->header_dirty++; -+ -+ dump_config.dumper->curr_buf = buf; -+ -+ return len; -+} -+ -+int dump_lcrash_update_end_marker(void) -+{ -+ struct __dump_page *dp = -+ (struct __dump_page *)dump_config.dumper->curr_buf; -+ unsigned long left; -+ int ret = 0; -+ -+ lcrash_init_pageheader(dp, NULL, 0); -+ dp->dp_flags |= DUMP_DH_END; /* tbd: truncation test ? */ -+ -+ /* now update the header */ -+#if DUMP_DEBUG > 6 -+ dump_header.dh_num_bytes += sizeof(*dp); -+#endif -+ dump_config.dumper->curr_buf += sizeof(*dp); -+ left = dump_config.dumper->curr_buf - dump_config.dumper->dump_buf; -+ -+ printk("\n"); -+ -+ while (left) { -+ if ((ret = dump_dev_seek(dump_config.dumper->curr_offset))) { -+ printk("Seek failed at offset 0x%llx\n", -+ dump_config.dumper->curr_offset); -+ return ret; -+ } -+ -+ if (DUMP_BUFFER_SIZE > left) -+ memset(dump_config.dumper->curr_buf, 'm', -+ DUMP_BUFFER_SIZE - left); -+ -+ if ((ret = dump_ll_write(dump_config.dumper->dump_buf, -+ DUMP_BUFFER_SIZE)) < DUMP_BUFFER_SIZE) { -+ return (ret < 0) ? ret : -ENOSPC; -+ } -+ -+ dump_config.dumper->curr_offset += DUMP_BUFFER_SIZE; -+ -+ if (left > DUMP_BUFFER_SIZE) { -+ left -= DUMP_BUFFER_SIZE; -+ memcpy(dump_config.dumper->dump_buf, -+ dump_config.dumper->dump_buf + DUMP_BUFFER_SIZE, left); -+ dump_config.dumper->curr_buf -= DUMP_BUFFER_SIZE; -+ } else { -+ left = 0; -+ } -+ } -+ return 0; -+} -+ -+ -+/* Default Formatter (lcrash) */ -+struct dump_fmt_ops dump_fmt_lcrash_ops = { -+ .configure_header = dump_lcrash_configure_header, -+ .update_header = dump_generic_update_header, -+ .save_context = dump_lcrash_save_context, -+ .add_data = dump_lcrash_add_data, -+ .update_end_marker = dump_lcrash_update_end_marker -+}; -+ -+struct dump_fmt dump_fmt_lcrash = { -+ .name = "lcrash", -+ .ops = &dump_fmt_lcrash_ops -+}; -+ ---- linux-2.5.69/drivers/dump/dump_gzip.c.lkcdbase Mon Jun 2 17:29:49 2003 -+++ linux-2.5.69/drivers/dump/dump_gzip.c Fri Dec 13 00:51:31 2002 -@@ -0,0 +1,118 @@ -+/* -+ * GZIP Compression functions for kernel crash dumps. -+ * -+ * Created by: Matt Robinson (yakker@sourceforge.net) -+ * Copyright 2001 Matt D. Robinson. All rights reserved. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+/* header files */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static void *deflate_workspace; -+ -+/* -+ * Name: dump_compress_gzip() -+ * Func: Compress a DUMP_PAGE_SIZE page using gzip-style algorithms (the. -+ * deflate functions similar to what's used in PPP). -+ */ -+static u16 -+dump_compress_gzip(const u8 *old, u16 oldsize, u8 *new, u16 newsize) -+{ -+ /* error code and dump stream */ -+ int err; -+ z_stream dump_stream; -+ -+ dump_stream.workspace = deflate_workspace; -+ -+ if ((err = zlib_deflateInit(&dump_stream, Z_BEST_COMPRESSION)) != Z_OK) { -+ /* fall back to RLE compression */ -+ printk("dump_compress_gzip(): zlib_deflateInit() " -+ "failed (%d)!\n", err); -+ return 0; -+ } -+ -+ /* use old (page of memory) and size (DUMP_PAGE_SIZE) as in-streams */ -+ dump_stream.next_in = (u8 *) old; -+ dump_stream.avail_in = oldsize; -+ -+ /* out streams are new (dpcpage) and new size (DUMP_DPC_PAGE_SIZE) */ -+ dump_stream.next_out = new; -+ dump_stream.avail_out = newsize; -+ -+ /* deflate the page -- check for error */ -+ err = zlib_deflate(&dump_stream, Z_FINISH); -+ if (err != Z_STREAM_END) { -+ /* zero is return code here */ -+ (void)zlib_deflateEnd(&dump_stream); -+ printk("dump_compress_gzip(): zlib_deflate() failed (%d)!\n", -+ err); -+ return 0; -+ } -+ -+ /* let's end the deflated compression stream */ -+ if ((err = zlib_deflateEnd(&dump_stream)) != Z_OK) { -+ printk("dump_compress_gzip(): zlib_deflateEnd() " -+ "failed (%d)!\n", err); -+ } -+ -+ /* return the compressed byte total (if it's smaller) */ -+ if (dump_stream.total_out >= oldsize) { -+ return oldsize; -+ } -+ return dump_stream.total_out; -+} -+ -+/* setup the gzip compression functionality */ -+static struct __dump_compress dump_gzip_compression = { -+ .compress_type = DUMP_COMPRESS_GZIP, -+ .compress_func = dump_compress_gzip, -+ .compress_name = "GZIP", -+}; -+ -+/* -+ * Name: dump_compress_gzip_init() -+ * Func: Initialize gzip as a compression mechanism. -+ */ -+static int __init -+dump_compress_gzip_init(void) -+{ -+ deflate_workspace = vmalloc(zlib_deflate_workspacesize()); -+ if (!deflate_workspace) { -+ printk("dump_compress_gzip_init(): Failed to " -+ "alloc %d bytes for deflate workspace\n", -+ zlib_deflate_workspacesize()); -+ return -ENOMEM; -+ } -+ dump_register_compression(&dump_gzip_compression); -+ return 0; -+} -+ -+/* -+ * Name: dump_compress_gzip_cleanup() -+ * Func: Remove gzip as a compression mechanism. -+ */ -+static void __exit -+dump_compress_gzip_cleanup(void) -+{ -+ vfree(deflate_workspace); -+ dump_unregister_compression(DUMP_COMPRESS_GZIP); -+} -+ -+/* module initialization */ -+module_init(dump_compress_gzip_init); -+module_exit(dump_compress_gzip_cleanup); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("LKCD Development Team "); -+MODULE_DESCRIPTION("Gzip compression module for crash dump driver"); ---- linux-2.5.69/drivers/dump/dump_i386.c.lkcdbase Mon Jun 2 17:29:49 2003 -+++ linux-2.5.69/drivers/dump/dump_i386.c Wed Mar 5 02:49:22 2003 -@@ -0,0 +1,329 @@ -+/* -+ * Architecture specific (i386) functions for Linux crash dumps. -+ * -+ * Created by: Matt Robinson (yakker@sgi.com) -+ * -+ * Copyright 1999 Silicon Graphics, Inc. All rights reserved. -+ * -+ * 2.3 kernel modifications by: Matt D. Robinson (yakker@turbolinux.com) -+ * Copyright 2000 TurboLinux, Inc. All rights reserved. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+/* -+ * The hooks for dumping the kernel virtual memory to disk are in this -+ * file. Any time a modification is made to the virtual memory mechanism, -+ * these routines must be changed to use the new mechanisms. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "dump_methods.h" -+#include -+ -+#include -+#include -+#include -+#include -+ -+static __s32 saved_irq_count; /* saved preempt_count() flags */ -+ -+static int -+alloc_dha_stack(void) -+{ -+ int i; -+ void *ptr; -+ -+ if (dump_header_asm.dha_stack[0]) -+ return 0; -+ -+ ptr = vmalloc(THREAD_SIZE * num_online_cpus()); -+ if (!ptr) { -+ printk("vmalloc for dha_stacks failed\n"); -+ return -ENOMEM; -+ } -+ -+ for (i = 0; i < num_online_cpus(); i++) { -+ dump_header_asm.dha_stack[i] = (u32)((unsigned long)ptr + -+ (i * THREAD_SIZE)); -+ } -+ return 0; -+} -+ -+static int -+free_dha_stack(void) -+{ -+ if (dump_header_asm.dha_stack[0]) { -+ vfree((void *)dump_header_asm.dha_stack[0]); -+ dump_header_asm.dha_stack[0] = 0; -+ } -+ return 0; -+} -+ -+ -+void -+__dump_save_regs(struct pt_regs *dest_regs, const struct pt_regs *regs) -+{ -+ *dest_regs = *regs; -+ -+ /* In case of panic dumps, we collects regs on entry to panic. -+ * so, we shouldn't 'fix' ssesp here again. But it is hard to -+ * tell just looking at regs whether ssesp need fixing. We make -+ * this decision by looking at xss in regs. If we have better -+ * means to determine that ssesp are valid (by some flag which -+ * tells that we are here due to panic dump), then we can use -+ * that instead of this kludge. -+ */ -+ if (!user_mode(regs)) { -+ if ((0xffff & regs->xss) == __KERNEL_DS) -+ /* already fixed up */ -+ return; -+ dest_regs->esp = (unsigned long)&(regs->esp); -+ __asm__ __volatile__ ("movw %%ss, %%ax;" -+ :"=a"(dest_regs->xss)); -+ } -+} -+ -+ -+#ifdef CONFIG_SMP -+extern unsigned long irq_affinity[]; -+extern irq_desc_t irq_desc[]; -+extern void dump_send_ipi(void); -+ -+static int dump_expect_ipi[NR_CPUS]; -+static atomic_t waiting_for_dump_ipi; -+static unsigned long saved_affinity[NR_IRQS]; -+ -+extern void stop_this_cpu(void *); /* exported by i386 kernel */ -+ -+static int -+dump_nmi_callback(struct pt_regs *regs, int cpu) -+{ -+ if (!dump_expect_ipi[cpu]) -+ return 0; -+ -+ dump_expect_ipi[cpu] = 0; -+ -+ dump_save_this_cpu(regs); -+ atomic_dec(&waiting_for_dump_ipi); -+ -+ level_changed: -+ switch (dump_silence_level) { -+ case DUMP_HARD_SPIN_CPUS: /* Spin until dump is complete */ -+ while (dump_oncpu) { -+ barrier(); /* paranoia */ -+ if (dump_silence_level != DUMP_HARD_SPIN_CPUS) -+ goto level_changed; -+ -+ cpu_relax(); /* kill time nicely */ -+ } -+ break; -+ -+ case DUMP_HALT_CPUS: /* Execute halt */ -+ stop_this_cpu(NULL); -+ break; -+ -+ case DUMP_SOFT_SPIN_CPUS: -+ /* Mark the task so it spins in schedule */ -+ set_tsk_thread_flag(current, TIF_NEED_RESCHED); -+ break; -+ } -+ -+ return 1; -+} -+ -+/* save registers on other processors */ -+void -+__dump_save_other_cpus(void) -+{ -+ int i, cpu = smp_processor_id(); -+ int other_cpus = num_online_cpus()-1; -+ -+ if (other_cpus > 0) { -+ atomic_set(&waiting_for_dump_ipi, other_cpus); -+ -+ for (i = 0; i < NR_CPUS; i++) { -+ dump_expect_ipi[i] = (i != cpu && cpu_online(i)); -+ } -+ -+ /* short circuit normal NMI handling temporarily */ -+ set_nmi_callback(dump_nmi_callback); -+ wmb(); -+ -+ dump_send_ipi(); -+ /* may be we dont need to wait for NMI to be processed. -+ just write out the header at the end of dumping, if -+ this IPI is not processed until then, there probably -+ is a problem and we just fail to capture state of -+ other cpus. */ -+ while(atomic_read(&waiting_for_dump_ipi) > 0) { -+ cpu_relax(); -+ } -+ -+ unset_nmi_callback(); -+ } -+} -+ -+/* -+ * Routine to save the old irq affinities and change affinities of all irqs to -+ * the dumping cpu. -+ */ -+static void -+set_irq_affinity(void) -+{ -+ int i; -+ int cpu = smp_processor_id(); -+ -+ memcpy(saved_affinity, irq_affinity, NR_IRQS * sizeof(unsigned long)); -+ for (i = 0; i < NR_IRQS; i++) { -+ if (irq_desc[i].handler == NULL) -+ continue; -+ irq_affinity[i] = 1UL << cpu; -+ if (irq_desc[i].handler->set_affinity != NULL) -+ irq_desc[i].handler->set_affinity(i, irq_affinity[i]); -+ } -+} -+ -+/* -+ * Restore old irq affinities. -+ */ -+static void -+reset_irq_affinity(void) -+{ -+ int i; -+ -+ memcpy(irq_affinity, saved_affinity, NR_IRQS * sizeof(unsigned long)); -+ for (i = 0; i < NR_IRQS; i++) { -+ if (irq_desc[i].handler == NULL) -+ continue; -+ if (irq_desc[i].handler->set_affinity != NULL) -+ irq_desc[i].handler->set_affinity(i, saved_affinity[i]); -+ } -+} -+ -+#else /* !CONFIG_SMP */ -+#define set_irq_affinity() do { } while (0) -+#define reset_irq_affinity() do { } while (0) -+#define save_other_cpu_states() do { } while (0) -+#endif /* !CONFIG_SMP */ -+ -+/* -+ * Kludge - dump from interrupt context is unreliable (Fixme) -+ * -+ * We do this so that softirqs initiated for dump i/o -+ * get processed and we don't hang while waiting for i/o -+ * to complete or in any irq synchronization attempt. -+ * -+ * This is not quite legal of course, as it has the side -+ * effect of making all interrupts & softirqs triggered -+ * while dump is in progress complete before currently -+ * pending softirqs and the currently executing interrupt -+ * code. -+ */ -+static inline void -+irq_bh_save(void) -+{ -+ saved_irq_count = irq_count(); -+ preempt_count() &= ~(HARDIRQ_MASK|SOFTIRQ_MASK); -+} -+ -+static inline void -+irq_bh_restore(void) -+{ -+ preempt_count() |= saved_irq_count; -+} -+ -+/* -+ * Name: __dump_irq_enable -+ * Func: Reset system so interrupts are enabled. -+ * This is used for dump methods that require interrupts -+ * Eventually, all methods will have interrupts disabled -+ * and this code can be removed. -+ * -+ * Change irq affinities -+ * Re-enable interrupts -+ */ -+void -+__dump_irq_enable(void) -+{ -+ set_irq_affinity(); -+ irq_bh_save(); -+ local_irq_enable(); -+} -+ -+/* -+ * Name: __dump_irq_restore -+ * Func: Resume the system state in an architecture-specific way. -+ -+ */ -+void -+__dump_irq_restore(void) -+{ -+ local_irq_disable(); -+ reset_irq_affinity(); -+ irq_bh_restore(); -+} -+ -+/* -+ * Name: __dump_configure_header() -+ * Func: Meant to fill in arch specific header fields except per-cpu state -+ * already captured via __dump_save_context for all CPUs. -+ */ -+int -+__dump_configure_header(const struct pt_regs *regs) -+{ -+ return (0); -+} -+ -+/* -+ * Name: __dump_init() -+ * Func: Initialize the dumping routine process. -+ */ -+void -+__dump_init(uint64_t local_memory_start) -+{ -+ return; -+} -+ -+/* -+ * Name: __dump_open() -+ * Func: Open the dump device (architecture specific). -+ */ -+void -+__dump_open(void) -+{ -+ alloc_dha_stack(); -+} -+ -+/* -+ * Name: __dump_cleanup() -+ * Func: Free any architecture specific data structures. This is called -+ * when the dump module is being removed. -+ */ -+void -+__dump_cleanup(void) -+{ -+ free_dha_stack(); -+} -+ -+extern int pfn_is_ram(unsigned long); -+ -+/* -+ * Name: __dump_page_valid() -+ * Func: Check if page is valid to dump. -+ */ -+int -+__dump_page_valid(unsigned long index) -+{ -+ if (!pfn_valid(index)) -+ return 0; -+ -+ return pfn_is_ram(index); -+} -+ ---- linux-2.5.69/drivers/dump/dump_memdev.c.lkcdbase Mon Jun 2 17:29:49 2003 -+++ linux-2.5.69/drivers/dump/dump_memdev.c Tue Mar 25 21:34:35 2003 -@@ -0,0 +1,640 @@ -+/* -+ * Implements the dump driver interface for saving a dump in available -+ * memory areas. The saved pages may be written out to persistent storage -+ * after a soft reboot. -+ * -+ * Started: Oct 2002 - Suparna Bhattacharya -+ * -+ * Copyright (C) 2002 International Business Machines Corp. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ * -+ * The approach of tracking pages containing saved dump using map pages -+ * allocated as needed has been derived from the Mission Critical Linux -+ * mcore dump implementation. -+ * -+ * Credits and a big thanks for letting the lkcd project make use of -+ * the excellent piece of work and also helping with clarifications -+ * and tips along the way are due to: -+ * Dave Winchell (primary author of mcore) -+ * Jeff Moyer -+ * Josh Huber -+ * -+ * For those familiar with the mcore code, the main differences worth -+ * noting here (besides the dump device abstraction) result from enabling -+ * "high" memory pages (pages not permanently mapped in the kernel -+ * address space) to be used for saving dump data (because of which a -+ * simple virtual address based linked list cannot be used anymore for -+ * managing free pages), an added level of indirection for faster -+ * lookups during the post-boot stage, and the idea of pages being -+ * made available as they get freed up while dump to memory progresses -+ * rather than one time before starting the dump. The last point enables -+ * a full memory snapshot to be saved starting with an initial set of -+ * bootstrap pages given a good compression ratio. (See dump_overlay.c) -+ * -+ */ -+ -+/* -+ * -----------------MEMORY LAYOUT ------------------ -+ * The memory space consists of a set of discontiguous pages, and -+ * discontiguous map pages as well, rooted in a chain of indirect -+ * map pages (also discontiguous). Except for the indirect maps -+ * (which must be preallocated in advance), the rest of the pages -+ * could be in high memory. -+ * -+ * root -+ * | --------- -------- -------- -+ * --> | . . +|--->| . +|------->| . . | indirect -+ * --|--|--- ---|---- --|-|--- maps -+ * | | | | | -+ * ------ ------ ------- ------ ------- -+ * | . | | . | | . . | | . | | . . | maps -+ * --|--- --|--- --|--|-- --|--- ---|-|-- -+ * page page page page page page page data -+ * pages -+ * -+ * Writes to the dump device happen sequentially in append mode. -+ * The main reason for the existence of the indirect map is -+ * to enable a quick way to lookup a specific logical offset in -+ * the saved data post-soft-boot, e.g. to writeout pages -+ * with more critical data first, even though such pages -+ * would have been compressed and copied last, being the lowest -+ * ranked candidates for reuse due to their criticality. -+ * (See dump_overlay.c) -+ */ -+#include -+#include -+#include -+#include -+#include "dump_methods.h" -+ -+#define DUMP_MAP_SZ (PAGE_SIZE / sizeof(unsigned long)) /* direct map size */ -+#define DUMP_IND_MAP_SZ DUMP_MAP_SZ - 1 /* indirect map size */ -+#define DUMP_NR_BOOTSTRAP 64 /* no of bootstrap pages */ -+ -+extern int dump_low_page(struct page *); -+ -+/* check if the next entry crosses a page boundary */ -+static inline int is_last_map_entry(unsigned long *map) -+{ -+ unsigned long addr = (unsigned long)(map + 1); -+ -+ return (!(addr & (PAGE_SIZE - 1))); -+} -+ -+/* Todo: should have some validation checks */ -+/* The last entry in the indirect map points to the next indirect map */ -+/* Indirect maps are referred to directly by virtual address */ -+static inline unsigned long *next_indirect_map(unsigned long *map) -+{ -+ return (unsigned long *)map[DUMP_IND_MAP_SZ]; -+} -+ -+#ifdef CONFIG_CRASH_DUMP_SOFTBOOT -+/* Called during early bootup - fixme: make this __init */ -+void dump_early_reserve_map(struct dump_memdev *dev) -+{ -+ unsigned long *map1, *map2; -+ loff_t off = 0, last = dev->last_used_offset >> PAGE_SHIFT; -+ int i, j; -+ -+ printk("Reserve bootmap space holding previous dump of %lld pages\n", -+ last); -+ map1= (unsigned long *)dev->indirect_map_root; -+ -+ while (map1 && (off < last)) { -+ reserve_bootmem(virt_to_phys((void *)map1), PAGE_SIZE); -+ for (i=0; (i < DUMP_MAP_SZ - 1) && map1[i] && (off < last); -+ i++, off += DUMP_MAP_SZ) { -+ pr_debug("indirect map[%d] = 0x%lx\n", i, map1[i]); -+ if (map1[i] >= max_low_pfn) -+ continue; -+ reserve_bootmem(map1[i] << PAGE_SHIFT, PAGE_SIZE); -+ map2 = pfn_to_kaddr(map1[i]); -+ for (j = 0 ; (j < DUMP_MAP_SZ) && map2[j] && -+ (off + j < last); j++) { -+ pr_debug("\t map[%d][%d] = 0x%lx\n", i, j, -+ map2[j]); -+ if (map2[j] < max_low_pfn) { -+ reserve_bootmem(map2[j] << PAGE_SHIFT, -+ PAGE_SIZE); -+ } -+ } -+ } -+ map1 = next_indirect_map(map1); -+ } -+ dev->nr_free = 0; /* these pages don't belong to this boot */ -+} -+#endif -+ -+/* mark dump pages so that they aren't used by this kernel */ -+void dump_mark_map(struct dump_memdev *dev) -+{ -+ unsigned long *map1, *map2; -+ loff_t off = 0, last = dev->last_used_offset >> PAGE_SHIFT; -+ struct page *page; -+ int i, j; -+ -+ printk("Dump: marking pages in use by previous dump\n"); -+ map1= (unsigned long *)dev->indirect_map_root; -+ -+ while (map1 && (off < last)) { -+ page = virt_to_page(map1); -+ set_page_count(page, 1); -+ for (i=0; (i < DUMP_MAP_SZ - 1) && map1[i] && (off < last); -+ i++, off += DUMP_MAP_SZ) { -+ pr_debug("indirect map[%d] = 0x%lx\n", i, map1[i]); -+ page = pfn_to_page(map1[i]); -+ set_page_count(page, 1); -+ map2 = kmap_atomic(page, KM_DUMP); -+ for (j = 0 ; (j < DUMP_MAP_SZ) && map2[j] && -+ (off + j < last); j++) { -+ pr_debug("\t map[%d][%d] = 0x%lx\n", i, j, -+ map2[j]); -+ page = pfn_to_page(map2[j]); -+ set_page_count(page, 1); -+ } -+ } -+ map1 = next_indirect_map(map1); -+ } -+} -+ -+ -+/* -+ * Given a logical offset into the mem device lookup the -+ * corresponding page -+ * loc is specified in units of pages -+ * Note: affects curr_map (even in the case where lookup fails) -+ */ -+struct page *dump_mem_lookup(struct dump_memdev *dump_mdev, unsigned long loc) -+{ -+ unsigned long *map; -+ unsigned long i, index = loc / DUMP_MAP_SZ; -+ struct page *page = NULL; -+ unsigned long curr_pfn, curr_map, *curr_map_ptr = NULL; -+ -+ map = (unsigned long *)dump_mdev->indirect_map_root; -+ if (!map) -+ return NULL; -+ -+ if (loc > dump_mdev->last_offset >> PAGE_SHIFT) -+ return NULL; -+ -+ /* -+ * first locate the right indirect map -+ * in the chain of indirect maps -+ */ -+ for (i = 0; i + DUMP_IND_MAP_SZ < index ; i += DUMP_IND_MAP_SZ) { -+ if (!(map = next_indirect_map(map))) -+ return NULL; -+ } -+ /* then the right direct map */ -+ /* map entries are referred to by page index */ -+ if ((curr_map = map[index - i])) { -+ page = pfn_to_page(curr_map); -+ /* update the current traversal index */ -+ /* dump_mdev->curr_map = &map[index - i];*/ -+ curr_map_ptr = &map[index - i]; -+ } -+ -+ if (page) -+ map = kmap_atomic(page, KM_DUMP); -+ else -+ return NULL; -+ -+ /* and finally the right entry therein */ -+ /* data pages are referred to by page index */ -+ i = index * DUMP_MAP_SZ; -+ if ((curr_pfn = map[loc - i])) { -+ page = pfn_to_page(curr_pfn); -+ dump_mdev->curr_map = curr_map_ptr; -+ dump_mdev->curr_map_offset = loc - i; -+ dump_mdev->ddev.curr_offset = loc << PAGE_SHIFT; -+ } else { -+ page = NULL; -+ } -+ kunmap_atomic(map, KM_DUMP); -+ -+ return page; -+} -+ -+/* -+ * Retrieves a pointer to the next page in the dump device -+ * Used during the lookup pass post-soft-reboot -+ */ -+struct page *dump_mem_next_page(struct dump_memdev *dev) -+{ -+ unsigned long i; -+ unsigned long *map; -+ struct page *page = NULL; -+ -+ if (dev->ddev.curr_offset + PAGE_SIZE >= dev->last_offset) { -+ return NULL; -+ } -+ -+ if ((i = (unsigned long)(++dev->curr_map_offset)) >= DUMP_MAP_SZ) { -+ /* move to next map */ -+ if (is_last_map_entry(++dev->curr_map)) { -+ /* move to the next indirect map page */ -+ printk("dump_mem_next_page: go to next indirect map\n"); -+ dev->curr_map = (unsigned long *)*dev->curr_map; -+ if (!dev->curr_map) -+ return NULL; -+ } -+ i = dev->curr_map_offset = 0; -+ pr_debug("dump_mem_next_page: next map 0x%lx, entry 0x%lx\n", -+ dev->curr_map, *dev->curr_map); -+ -+ }; -+ -+ if (*dev->curr_map) { -+ map = kmap_atomic(pfn_to_page(*dev->curr_map), KM_DUMP); -+ if (map[i]) -+ page = pfn_to_page(map[i]); -+ kunmap_atomic(map, KM_DUMP); -+ dev->ddev.curr_offset += PAGE_SIZE; -+ }; -+ -+ return page; -+} -+ -+/* Copied from dump_filters.c */ -+static inline int kernel_page(struct page *p) -+{ -+ /* FIXME: Need to exclude hugetlb pages. Clue: reserved but inuse */ -+ return PageReserved(p) || (!PageLRU(p) && PageInuse(p)); -+} -+ -+static inline int user_page(struct page *p) -+{ -+ return PageInuse(p) && (!PageReserved(p) && PageLRU(p)); -+} -+ -+int dump_reused_by_boot(struct page *page) -+{ -+ /* Todo -+ * Checks: -+ * if PageReserved -+ * if < __end + bootmem_bootmap_pages for this boot + allowance -+ * if overwritten by initrd (how to check ?) -+ * Also, add more checks in early boot code -+ * e.g. bootmem bootmap alloc verify not overwriting dump, and if -+ * so then realloc or move the dump pages out accordingly. -+ */ -+ -+ /* Temporary proof of concept hack, avoid overwriting kern pages */ -+ -+ return (kernel_page(page) || dump_low_page(page) || user_page(page)); -+} -+ -+ -+/* Uses the free page passed in to expand available space */ -+int dump_mem_add_space(struct dump_memdev *dev, struct page *page) -+{ -+ struct page *map_page; -+ unsigned long *map; -+ unsigned long i; -+ -+ if (!dev->curr_map) -+ return -ENOMEM; /* must've exhausted indirect map */ -+ -+ if (!*dev->curr_map || dev->curr_map_offset >= DUMP_MAP_SZ) { -+ /* add map space */ -+ *dev->curr_map = page_to_pfn(page); -+ dev->curr_map_offset = 0; -+ return 0; -+ } -+ -+ /* add data space */ -+ i = dev->curr_map_offset; -+ map_page = pfn_to_page(*dev->curr_map); -+ map = (unsigned long *)kmap_atomic(map_page, KM_DUMP); -+ map[i] = page_to_pfn(page); -+ kunmap_atomic(map, KM_DUMP); -+ dev->curr_map_offset = ++i; -+ dev->last_offset += PAGE_SIZE; -+ if (i >= DUMP_MAP_SZ) { -+ /* move to next map */ -+ if (is_last_map_entry(++dev->curr_map)) { -+ /* move to the next indirect map page */ -+ pr_debug("dump_mem_add_space: using next" -+ "indirect map\n"); -+ dev->curr_map = (unsigned long *)*dev->curr_map; -+ } -+ } -+ return 0; -+} -+ -+ -+/* Caution: making a dest page invalidates existing contents of the page */ -+int dump_check_and_free_page(struct dump_memdev *dev, struct page *page) -+{ -+ int err = 0; -+ -+ /* -+ * the page can be used as a destination only if we are sure -+ * it won't get overwritten by the soft-boot, and is not -+ * critical for us right now. -+ */ -+ if (dump_reused_by_boot(page)) -+ return 0; -+ -+ if ((err = dump_mem_add_space(dev, page))) { -+ printk("Warning: Unable to extend memdev space. Err %d\n", -+ err); -+ return 0; -+ } -+ -+ dev->nr_free++; -+ return 1; -+} -+ -+ -+/* Set up the initial maps and bootstrap space */ -+/* Must be called only after any previous dump is written out */ -+int dump_mem_open(struct dump_dev *dev, unsigned long devid) -+{ -+ struct dump_memdev *dump_mdev = DUMP_MDEV(dev); -+ unsigned long nr_maps, *map, *prev_map = &dump_mdev->indirect_map_root; -+ void *addr; -+ struct page *page; -+ unsigned long i = 0; -+ int err = 0; -+ -+ /* Todo: sanity check for unwritten previous dump */ -+ -+ /* allocate pages for indirect map (non highmem area) */ -+ nr_maps = num_physpages / DUMP_MAP_SZ; /* maps to cover entire mem */ -+ for (i = 0; i < nr_maps; i += DUMP_IND_MAP_SZ) { -+ if (!(map = (unsigned long *)dump_alloc_mem(PAGE_SIZE))) { -+ printk("Unable to alloc indirect map %ld\n", -+ i / DUMP_IND_MAP_SZ); -+ return -ENOMEM; -+ } -+ clear_page(map); -+ *prev_map = (unsigned long)map; -+ prev_map = &map[DUMP_IND_MAP_SZ]; -+ }; -+ -+ dump_mdev->curr_map = (unsigned long *)dump_mdev->indirect_map_root; -+ dump_mdev->curr_map_offset = 0; -+ -+ /* -+ * allocate a few bootstrap pages: at least 1 map and 1 data page -+ * plus enough to save the dump header -+ */ -+ i = 0; -+ do { -+ if (!(addr = dump_alloc_mem(PAGE_SIZE))) { -+ printk("Unable to alloc bootstrap page %ld\n", i); -+ return -ENOMEM; -+ } -+ -+ page = virt_to_page(addr); -+ if (dump_low_page(page)) { -+ dump_free_mem(addr); -+ continue; -+ } -+ -+ if (dump_mem_add_space(dump_mdev, page)) { -+ printk("Warning: Unable to extend memdev " -+ "space. Err %d\n", err); -+ dump_free_mem(addr); -+ continue; -+ } -+ i++; -+ } while (i < DUMP_NR_BOOTSTRAP); -+ -+ printk("dump memdev init: %ld maps, %ld bootstrap pgs, %ld free pgs\n", -+ nr_maps, i, dump_mdev->last_offset >> PAGE_SHIFT); -+ -+ dump_mdev->last_bs_offset = dump_mdev->last_offset; -+ -+ return 0; -+} -+ -+/* Releases all pre-alloc'd pages */ -+int dump_mem_release(struct dump_dev *dev) -+{ -+ struct dump_memdev *dump_mdev = DUMP_MDEV(dev); -+ struct page *page, *map_page; -+ unsigned long *map, *prev_map; -+ void *addr; -+ int i; -+ -+ if (!dump_mdev->nr_free) -+ return 0; -+ -+ pr_debug("dump_mem_release\n"); -+ page = dump_mem_lookup(dump_mdev, 0); -+ for (i = 0; page && (i < DUMP_NR_BOOTSTRAP - 1); i++) { -+ if (PageHighMem(page)) -+ break; -+ addr = page_address(page); -+ if (!addr) { -+ printk("page_address(%p) = NULL\n", page); -+ break; -+ } -+ pr_debug("Freeing page at 0x%lx\n", addr); -+ dump_free_mem(addr); -+ if (dump_mdev->curr_map_offset >= DUMP_MAP_SZ - 1) { -+ map_page = pfn_to_page(*dump_mdev->curr_map); -+ if (PageHighMem(map_page)) -+ break; -+ page = dump_mem_next_page(dump_mdev); -+ addr = page_address(map_page); -+ if (!addr) { -+ printk("page_address(%p) = NULL\n", -+ map_page); -+ break; -+ } -+ pr_debug("Freeing map page at 0x%lx\n", addr); -+ dump_free_mem(addr); -+ i++; -+ } else { -+ page = dump_mem_next_page(dump_mdev); -+ } -+ } -+ -+ /* now for the last used bootstrap page used as a map page */ -+ if ((i < DUMP_NR_BOOTSTRAP) && (*dump_mdev->curr_map)) { -+ map_page = pfn_to_page(*dump_mdev->curr_map); -+ if ((map_page) && !PageHighMem(map_page)) { -+ addr = page_address(map_page); -+ if (!addr) { -+ printk("page_address(%p) = NULL\n", map_page); -+ } else { -+ pr_debug("Freeing map page at 0x%lx\n", addr); -+ dump_free_mem(addr); -+ i++; -+ } -+ } -+ } -+ -+ printk("Freed %d bootstrap pages\n", i); -+ -+ /* free the indirect maps */ -+ map = (unsigned long *)dump_mdev->indirect_map_root; -+ -+ i = 0; -+ while (map) { -+ prev_map = map; -+ map = next_indirect_map(map); -+ dump_free_mem(prev_map); -+ i++; -+ } -+ -+ printk("Freed %d indirect map(s)\n", i); -+ -+ /* Reset the indirect map */ -+ dump_mdev->indirect_map_root = 0; -+ dump_mdev->curr_map = 0; -+ -+ /* Reset the free list */ -+ dump_mdev->nr_free = 0; -+ -+ dump_mdev->last_offset = dump_mdev->ddev.curr_offset = 0; -+ dump_mdev->last_used_offset = 0; -+ dump_mdev->curr_map = NULL; -+ dump_mdev->curr_map_offset = 0; -+ return 0; -+} -+ -+/* -+ * Long term: -+ * It is critical for this to be very strict. Cannot afford -+ * to have anything running and accessing memory while we overwrite -+ * memory (potential risk of data corruption). -+ * If in doubt (e.g if a cpu is hung and not responding) just give -+ * up and refuse to proceed with this scheme. -+ * -+ * Note: I/O will only happen after soft-boot/switchover, so we can -+ * safely disable interrupts and force stop other CPUs if this is -+ * going to be a disruptive dump, no matter what they -+ * are in the middle of. -+ */ -+/* -+ * ATM Most of this is already taken care of in the nmi handler -+ * We may halt the cpus rightaway if we know this is going to be disruptive -+ * For now, since we've limited ourselves to overwriting free pages we -+ * aren't doing much here. Eventually, we'd have to wait to make sure other -+ * cpus aren't using memory we could be overwriting -+ */ -+int dump_mem_silence(struct dump_dev *dev) -+{ -+ struct dump_memdev *dump_mdev = DUMP_MDEV(dev); -+ -+ if (dump_mdev->last_offset > dump_mdev->last_bs_offset) { -+ /* prefer to run lkcd config & start with a clean slate */ -+ return -EEXIST; -+ } -+ return 0; -+} -+ -+extern int dump_overlay_resume(void); -+ -+/* Trigger the next stage of dumping */ -+int dump_mem_resume(struct dump_dev *dev) -+{ -+ dump_overlay_resume(); -+ return 0; -+} -+ -+/* -+ * Allocate mem dev pages as required and copy buffer contents into it. -+ * Fails if the no free pages are available -+ * Keeping it simple and limited for starters (can modify this over time) -+ * Does not handle holes or a sparse layout -+ * Data must be in multiples of PAGE_SIZE -+ */ -+int dump_mem_write(struct dump_dev *dev, void *buf, unsigned long len) -+{ -+ struct dump_memdev *dump_mdev = DUMP_MDEV(dev); -+ struct page *page; -+ unsigned long n = 0; -+ void *addr; -+ unsigned long *saved_curr_map, saved_map_offset; -+ int ret = 0; -+ -+ pr_debug("dump_mem_write: offset 0x%llx, size %ld\n", -+ dev->curr_offset, len); -+ -+ if (dev->curr_offset + len > dump_mdev->last_offset) { -+ printk("Out of space to write\n"); -+ return -ENOSPC; -+ } -+ -+ if ((len & (PAGE_SIZE - 1)) || (dev->curr_offset & (PAGE_SIZE - 1))) -+ return -EINVAL; /* not aligned in units of page size */ -+ -+ saved_curr_map = dump_mdev->curr_map; -+ saved_map_offset = dump_mdev->curr_map_offset; -+ page = dump_mem_lookup(dump_mdev, dev->curr_offset >> PAGE_SHIFT); -+ -+ for (n = len; (n > 0) && page; n -= PAGE_SIZE, buf += PAGE_SIZE ) { -+ addr = kmap_atomic(page, KM_DUMP); -+ /* memset(addr, 'x', PAGE_SIZE); */ -+ memcpy(addr, buf, PAGE_SIZE); -+ kunmap_atomic(addr, KM_DUMP); -+ /* dev->curr_offset += PAGE_SIZE; */ -+ page = dump_mem_next_page(dump_mdev); -+ } -+ -+ dump_mdev->curr_map = saved_curr_map; -+ dump_mdev->curr_map_offset = saved_map_offset; -+ -+ if (dump_mdev->last_used_offset < dev->curr_offset) -+ dump_mdev->last_used_offset = dev->curr_offset; -+ -+ return (len - n) ? (len - n) : ret ; -+} -+ -+/* dummy - always ready */ -+int dump_mem_ready(struct dump_dev *dev, void *buf) -+{ -+ return 0; -+} -+ -+/* -+ * Should check for availability of space to write upto the offset -+ * affects only the curr_offset; last_offset untouched -+ * Keep it simple: Only allow multiples of PAGE_SIZE for now -+ */ -+int dump_mem_seek(struct dump_dev *dev, loff_t offset) -+{ -+ struct dump_memdev *dump_mdev = DUMP_MDEV(dev); -+ -+ if (offset & (PAGE_SIZE - 1)) -+ return -EINVAL; /* allow page size units only for now */ -+ -+ /* Are we exceeding available space ? */ -+ if (offset > dump_mdev->last_offset) { -+ printk("dump_mem_seek failed for offset 0x%llx\n", -+ offset); -+ return -ENOSPC; -+ } -+ -+ dump_mdev->ddev.curr_offset = offset; -+ return 0; -+} -+ -+struct dump_dev_ops dump_memdev_ops = { -+ .open = dump_mem_open, -+ .release = dump_mem_release, -+ .silence = dump_mem_silence, -+ .resume = dump_mem_resume, -+ .seek = dump_mem_seek, -+ .write = dump_mem_write, -+ .read = NULL, /* not implemented at the moment */ -+ .ready = dump_mem_ready -+}; -+ -+static struct dump_memdev default_dump_memdev = { -+ .ddev = {.type_name = "memdev", .ops = &dump_memdev_ops, -+ .device_id = 0x14} -+ /* assume the rest of the fields are zeroed by default */ -+}; -+ -+/* may be overwritten if a previous dump exists */ -+struct dump_memdev *dump_memdev = &default_dump_memdev; -+ ---- linux-2.5.69/drivers/dump/dump_netdev.c.lkcdbase Mon Jun 2 17:29:49 2003 -+++ linux-2.5.69/drivers/dump/dump_netdev.c Tue May 20 03:04:07 2003 -@@ -0,0 +1,863 @@ -+/* -+ * Implements the dump driver interface for saving a dump via network -+ * interface. -+ * -+ * Some of this code has been taken/adapted from Ingo Molnar's netconsole -+ * code. LKCD team expresses its thanks to Ingo. -+ * -+ * Started: June 2002 - Mohamed Abbas -+ * Adapted netconsole code to implement LKCD dump over the network. -+ * -+ * Nov 2002 - Bharata B. Rao -+ * Innumerable code cleanups, simplification and some fixes. -+ * Netdump configuration done by ioctl instead of using module parameters. -+ * -+ * Copyright (C) 2001 Ingo Molnar -+ * Copyright (C) 2002 International Business Machines Corp. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+static int startup_handshake; -+static int page_counter; -+static struct net_device *dump_ndev; -+static struct in_device *dump_in_dev; -+static u16 source_port, target_port; -+static u32 source_ip, target_ip; -+static unsigned char daddr[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff} ; -+static spinlock_t dump_skb_lock = SPIN_LOCK_UNLOCKED; -+static int dump_nr_skbs; -+static struct sk_buff *dump_skb; -+static unsigned long flags_global; -+static int netdump_in_progress; -+static char device_name[IFNAMSIZ]; -+ -+/* -+ * security depends on the trusted path between the netconsole -+ * server and netconsole client, since none of the packets are -+ * encrypted. The random magic number protects the protocol -+ * against spoofing. -+ */ -+static u64 dump_magic; -+ -+#define MAX_UDP_CHUNK 1460 -+#define MAX_PRINT_CHUNK (MAX_UDP_CHUNK-HEADER_LEN) -+ -+/* -+ * We maintain a small pool of fully-sized skbs, -+ * to make sure the message gets out even in -+ * extreme OOM situations. -+ */ -+#define DUMP_MAX_SKBS 32 -+ -+#define MAX_SKB_SIZE \ -+ (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ -+ sizeof(struct iphdr) + sizeof(struct ethhdr)) -+ -+static void -+dump_refill_skbs(void) -+{ -+ struct sk_buff *skb; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&dump_skb_lock, flags); -+ while (dump_nr_skbs < DUMP_MAX_SKBS) { -+ skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); -+ if (!skb) -+ break; -+ if (dump_skb) -+ skb->next = dump_skb; -+ else -+ skb->next = NULL; -+ dump_skb = skb; -+ dump_nr_skbs++; -+ } -+ spin_unlock_irqrestore(&dump_skb_lock, flags); -+} -+ -+static struct -+sk_buff * dump_get_skb(void) -+{ -+ struct sk_buff *skb; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&dump_skb_lock, flags); -+ skb = dump_skb; -+ if (skb) { -+ dump_skb = skb->next; -+ skb->next = NULL; -+ dump_nr_skbs--; -+ } -+ spin_unlock_irqrestore(&dump_skb_lock, flags); -+ -+ return skb; -+} -+ -+/* -+ * Zap completed output skbs. -+ */ -+static void -+zap_completion_queue(void) -+{ -+ int count; -+ unsigned long flags; -+ int cpu = smp_processor_id(); -+ -+ count=0; -+ if (softnet_data[cpu].completion_queue) { -+ struct sk_buff *clist; -+ -+ local_irq_save(flags); -+ clist = softnet_data[cpu].completion_queue; -+ softnet_data[cpu].completion_queue = NULL; -+ local_irq_restore(flags); -+ -+ while (clist != NULL) { -+ struct sk_buff *skb = clist; -+ clist = clist->next; -+ __kfree_skb(skb); -+ count++; -+ if (count > 10000) -+ printk("Error in sk list\n"); -+ } -+ } -+} -+ -+static void -+dump_send_skb(struct net_device *dev, const char *msg, unsigned int msg_len, -+ reply_t *reply) -+{ -+ int once = 1; -+ int total_len, eth_len, ip_len, udp_len, count = 0; -+ struct sk_buff *skb; -+ struct udphdr *udph; -+ struct iphdr *iph; -+ struct ethhdr *eth; -+ -+ udp_len = msg_len + HEADER_LEN + sizeof(*udph); -+ ip_len = eth_len = udp_len + sizeof(*iph); -+ total_len = eth_len + ETH_HLEN; -+ -+repeat_loop: -+ zap_completion_queue(); -+ if (dump_nr_skbs < DUMP_MAX_SKBS) -+ dump_refill_skbs(); -+ -+ skb = alloc_skb(total_len, GFP_ATOMIC); -+ if (!skb) { -+ skb = dump_get_skb(); -+ if (!skb) { -+ count++; -+ if (once && (count == 1000000)) { -+ printk("possibly FATAL: out of netconsole " -+ "skbs!!! will keep retrying.\n"); -+ once = 0; -+ } -+ dev->poll_controller(dev); -+ goto repeat_loop; -+ } -+ } -+ -+ atomic_set(&skb->users, 1); -+ skb_reserve(skb, total_len - msg_len - HEADER_LEN); -+ skb->data[0] = NETCONSOLE_VERSION; -+ -+ put_unaligned(htonl(reply->nr), (u32 *) (skb->data + 1)); -+ put_unaligned(htonl(reply->code), (u32 *) (skb->data + 5)); -+ put_unaligned(htonl(reply->info), (u32 *) (skb->data + 9)); -+ -+ memcpy(skb->data + HEADER_LEN, msg, msg_len); -+ skb->len += msg_len + HEADER_LEN; -+ -+ udph = (struct udphdr *) skb_push(skb, sizeof(*udph)); -+ udph->source = source_port; -+ udph->dest = target_port; -+ udph->len = htons(udp_len); -+ udph->check = 0; -+ -+ iph = (struct iphdr *)skb_push(skb, sizeof(*iph)); -+ -+ iph->version = 4; -+ iph->ihl = 5; -+ iph->tos = 0; -+ iph->tot_len = htons(ip_len); -+ iph->id = 0; -+ iph->frag_off = 0; -+ iph->ttl = 64; -+ iph->protocol = IPPROTO_UDP; -+ iph->check = 0; -+ iph->saddr = source_ip; -+ iph->daddr = target_ip; -+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); -+ -+ eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); -+ -+ eth->h_proto = htons(ETH_P_IP); -+ memcpy(eth->h_source, dev->dev_addr, dev->addr_len); -+ memcpy(eth->h_dest, daddr, dev->addr_len); -+ -+ count=0; -+repeat_poll: -+ spin_lock(&dev->xmit_lock); -+ dev->xmit_lock_owner = smp_processor_id(); -+ -+ count++; -+ -+ -+ if (netif_queue_stopped(dev)) { -+ dev->xmit_lock_owner = -1; -+ spin_unlock(&dev->xmit_lock); -+ -+ dev->poll_controller(dev); -+ zap_completion_queue(); -+ -+ -+ goto repeat_poll; -+ } -+ -+ dev->hard_start_xmit(skb, dev); -+ -+ dev->xmit_lock_owner = -1; -+ spin_unlock(&dev->xmit_lock); -+} -+ -+static unsigned short -+udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, -+ unsigned long base) -+{ -+ return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base); -+} -+ -+static int -+udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, -+ unsigned short ulen, u32 saddr, u32 daddr) -+{ -+ if (uh->check == 0) { -+ skb->ip_summed = CHECKSUM_UNNECESSARY; -+ } else if (skb->ip_summed == CHECKSUM_HW) { -+ skb->ip_summed = CHECKSUM_UNNECESSARY; -+ if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) -+ return 0; -+ skb->ip_summed = CHECKSUM_NONE; -+ } -+ if (skb->ip_summed != CHECKSUM_UNNECESSARY) -+ skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, -+ IPPROTO_UDP, 0); -+ /* Probably, we should checksum udp header (it should be in cache -+ * in any case) and data in tiny packets (< rx copybreak). -+ */ -+ return 0; -+} -+ -+static __inline__ int -+__udp_checksum_complete(struct sk_buff *skb) -+{ -+ return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, -+ skb->csum)); -+} -+ -+static __inline__ -+int udp_checksum_complete(struct sk_buff *skb) -+{ -+ return skb->ip_summed != CHECKSUM_UNNECESSARY && -+ __udp_checksum_complete(skb); -+} -+ -+int new_req = 0; -+static req_t req; -+ -+static int -+dump_rx_hook(struct sk_buff *skb) -+{ -+ int proto; -+ struct iphdr *iph; -+ struct udphdr *uh; -+ __u32 len, saddr, daddr, ulen; -+ req_t *__req; -+ -+ /* -+ * First check if were are dumping or doing startup handshake, if -+ * not quickly return. -+ */ -+ if (!netdump_in_progress) -+ return NET_RX_SUCCESS; -+ -+ if (skb->dev->type != ARPHRD_ETHER) -+ goto out; -+ -+ proto = ntohs(skb->mac.ethernet->h_proto); -+ if (proto != ETH_P_IP) -+ goto out; -+ -+ if (skb->pkt_type == PACKET_OTHERHOST) -+ goto out; -+ -+ if (skb_shared(skb)) -+ goto out; -+ -+ /* IP header correctness testing: */ -+ iph = (struct iphdr *)skb->data; -+ if (!pskb_may_pull(skb, sizeof(struct iphdr))) -+ goto out; -+ -+ if (iph->ihl < 5 || iph->version != 4) -+ goto out; -+ -+ if (!pskb_may_pull(skb, iph->ihl*4)) -+ goto out; -+ -+ if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) -+ goto out; -+ -+ len = ntohs(iph->tot_len); -+ if (skb->len < len || len < iph->ihl*4) -+ goto out; -+ -+ saddr = iph->saddr; -+ daddr = iph->daddr; -+ if (iph->protocol != IPPROTO_UDP) -+ goto out; -+ -+ if (source_ip != daddr) -+ goto out; -+ -+ if (target_ip != saddr) -+ goto out; -+ -+ len -= iph->ihl*4; -+ uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); -+ ulen = ntohs(uh->len); -+ -+ if (ulen != len || ulen < (sizeof(*uh) + sizeof(*__req))) -+ goto out; -+ -+ if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0) -+ goto out; -+ -+ if (udp_checksum_complete(skb)) -+ goto out; -+ -+ if (source_port != uh->dest) -+ goto out; -+ -+ if (target_port != uh->source) -+ goto out; -+ -+ __req = (req_t *)(uh + 1); -+ if ((ntohl(__req->command) != COMM_GET_MAGIC) && -+ (ntohl(__req->command) != COMM_HELLO) && -+ (ntohl(__req->command) != COMM_START_WRITE_NETDUMP_ACK) && -+ (ntohl(__req->command) != COMM_START_NETDUMP_ACK) && -+ (memcmp(&__req->magic, &dump_magic, sizeof(dump_magic)) != 0)) -+ goto out; -+ -+ req.magic = ntohl(__req->magic); -+ req.command = ntohl(__req->command); -+ req.from = ntohl(__req->from); -+ req.to = ntohl(__req->to); -+ req.nr = ntohl(__req->nr); -+ new_req = 1; -+out: -+ return NET_RX_DROP; -+} -+ -+static void -+dump_send_mem(struct net_device *dev, req_t *req, const char* buff, size_t len) -+{ -+ int i; -+ -+ int nr_chunks = len/1024; -+ reply_t reply; -+ -+ reply.nr = req->nr; -+ reply.info = 0; -+ -+ if ( nr_chunks <= 0) -+ nr_chunks = 1; -+ for (i = 0; i < nr_chunks; i++) { -+ unsigned int offset = i*1024; -+ reply.code = REPLY_MEM; -+ reply.info = offset; -+ dump_send_skb(dev, buff + offset, 1024, &reply); -+ } -+} -+ -+/* -+ * This function waits for the client to acknowledge the receipt -+ * of the netdump startup reply, with the possibility of packets -+ * getting lost. We resend the startup packet if no ACK is received, -+ * after a 1 second delay. -+ * -+ * (The client can test the success of the handshake via the HELLO -+ * command, and send ACKs until we enter netdump mode.) -+ */ -+static int -+dump_handshake(struct dump_dev *net_dev) -+{ -+ char tmp[200]; -+ reply_t reply; -+ int i, j; -+ -+ if (startup_handshake) { -+ sprintf(tmp, "NETDUMP start, waiting for start-ACK.\n"); -+ reply.code = REPLY_START_NETDUMP; -+ reply.nr = 0; -+ reply.info = 0; -+ } else { -+ sprintf(tmp, "NETDUMP start, waiting for start-ACK.\n"); -+ reply.code = REPLY_START_WRITE_NETDUMP; -+ reply.nr = net_dev->curr_offset; -+ reply.info = net_dev->curr_offset; -+ } -+ -+ /* send 300 handshake packets before declaring failure */ -+ for (i = 0; i < 300; i++) { -+ dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply); -+ -+ /* wait 1 sec */ -+ for (j = 0; j < 10000; j++) { -+ udelay(100); -+ dump_ndev->poll_controller(dump_ndev); -+ zap_completion_queue(); -+ if (new_req) -+ break; -+ } -+ -+ /* -+ * if there is no new request, try sending the handshaking -+ * packet again -+ */ -+ if (!new_req) -+ continue; -+ -+ /* -+ * check if the new request is of the expected type, -+ * if so, return, else try sending the handshaking -+ * packet again -+ */ -+ if (startup_handshake) { -+ if (req.command == COMM_HELLO || req.command == -+ COMM_START_NETDUMP_ACK) { -+ return 0; -+ } else { -+ new_req = 0; -+ continue; -+ } -+ } else { -+ if (req.command == COMM_SEND_MEM) { -+ return 0; -+ } else { -+ new_req = 0; -+ continue; -+ } -+ } -+ } -+ return -1; -+} -+ -+static ssize_t -+do_netdump(struct dump_dev *net_dev, const char* buff, size_t len) -+{ -+ reply_t reply; -+ char tmp[200]; -+ ssize_t ret = 0; -+ int repeatCounter, counter, total_loop; -+ -+ netdump_in_progress = 1; -+ -+ if (dump_handshake(net_dev) < 0) { -+ printk("network dump failed due to handshake failure\n"); -+ goto out; -+ } -+ -+ /* -+ * Ideally startup handshake should be done during dump configuration, -+ * i.e., in dump_net_open(). This will be done when I figure out -+ * the dependency between startup handshake, subsequent write and -+ * various commands wrt to net-server. -+ */ -+ if (startup_handshake) -+ startup_handshake = 0; -+ -+ counter = 0; -+ repeatCounter = 0; -+ total_loop = 0; -+ while (1) { -+ if (!new_req) { -+ dump_ndev->poll_controller(dump_ndev); -+ zap_completion_queue(); -+ } -+ if (!new_req) { -+ repeatCounter++; -+ -+ if (repeatCounter > 5) { -+ counter++; -+ if (counter > 10000) { -+ if (total_loop >= 100000) { -+ printk("Time OUT LEAVE NOW\n"); -+ goto out; -+ } else { -+ total_loop++; -+ printk("Try number %d out of " -+ "10 before Time Out\n", -+ total_loop); -+ } -+ } -+ mdelay(1); -+ repeatCounter = 0; -+ } -+ continue; -+ } -+ repeatCounter = 0; -+ counter = 0; -+ total_loop = 0; -+ new_req = 0; -+ switch (req.command) { -+ case COMM_NONE: -+ break; -+ -+ case COMM_SEND_MEM: -+ dump_send_mem(dump_ndev, &req, buff, len); -+ break; -+ -+ case COMM_EXIT: -+ case COMM_START_WRITE_NETDUMP_ACK: -+ ret = len; -+ goto out; -+ -+ case COMM_HELLO: -+ sprintf(tmp, "Hello, this is netdump version " -+ "0.%02d\n", NETCONSOLE_VERSION); -+ reply.code = REPLY_HELLO; -+ reply.nr = req.nr; -+ reply.info = net_dev->curr_offset; -+ dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply); -+ break; -+ -+ case COMM_GET_PAGE_SIZE: -+ sprintf(tmp, "PAGE_SIZE: %ld\n", PAGE_SIZE); -+ reply.code = REPLY_PAGE_SIZE; -+ reply.nr = req.nr; -+ reply.info = PAGE_SIZE; -+ dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply); -+ break; -+ -+ case COMM_GET_NR_PAGES: -+ reply.code = REPLY_NR_PAGES; -+ reply.nr = req.nr; -+ reply.info = num_physpages; -+ reply.info = page_counter; -+ sprintf(tmp, "Number of pages: %ld\n", num_physpages); -+ dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply); -+ break; -+ -+ case COMM_GET_MAGIC: -+ reply.code = REPLY_MAGIC; -+ reply.nr = req.nr; -+ reply.info = NETCONSOLE_VERSION; -+ dump_send_skb(dump_ndev, (char *)&dump_magic, -+ sizeof(dump_magic), &reply); -+ break; -+ -+ default: -+ reply.code = REPLY_ERROR; -+ reply.nr = req.nr; -+ reply.info = req.command; -+ sprintf(tmp, "Got unknown command code %d!\n", -+ req.command); -+ dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply); -+ break; -+ } -+ } -+out: -+ netdump_in_progress = 0; -+ return ret; -+} -+ -+static int -+dump_validate_config(void) -+{ -+ source_ip = dump_in_dev->ifa_list->ifa_local; -+ if (!source_ip) { -+ printk("network device %s has no local address, " -+ "aborting.\n", device_name); -+ return -1; -+ } -+ -+#define IP(x) ((unsigned char *)&source_ip)[x] -+ printk("Source %d.%d.%d.%d", IP(0), IP(1), IP(2), IP(3)); -+#undef IP -+ -+ if (!source_port) { -+ printk("source_port parameter not specified, aborting.\n"); -+ return -1; -+ } -+ printk(":%i\n", source_port); -+ source_port = htons(source_port); -+ -+ if (!target_ip) { -+ printk("target_ip parameter not specified, aborting.\n"); -+ return -1; -+ } -+ -+#define IP(x) ((unsigned char *)&target_ip)[x] -+ printk("Target %d.%d.%d.%d", IP(0), IP(1), IP(2), IP(3)); -+#undef IP -+ -+ if (!target_port) { -+ printk("target_port parameter not specified, aborting.\n"); -+ return -1; -+ } -+ printk(":%i\n", target_port); -+ target_port = htons(target_port); -+ -+ printk("Target Ethernet Address %02x:%02x:%02x:%02x:%02x:%02x", -+ daddr[0], daddr[1], daddr[2], daddr[3], daddr[4], daddr[5]); -+ -+ if ((daddr[0] & daddr[1] & daddr[2] & daddr[3] & daddr[4] & -+ daddr[5]) == 255) -+ printk("(Broadcast)"); -+ printk("\n"); -+ return 0; -+} -+ -+/* -+ * Prepares the dump device so we can take a dump later. -+ * Validates the netdump configuration parameters. -+ * -+ * TODO: Network connectivity check should be done here. -+ */ -+static int -+dump_net_open(struct dump_dev *net_dev, unsigned long arg) -+{ -+ int retval = 0; -+ -+ /* get the interface name */ -+ if (copy_from_user(device_name, (void *)arg, IFNAMSIZ)) -+ return -EFAULT; -+ -+ if (!(dump_ndev = dev_get_by_name(device_name))) { -+ printk("network device %s does not exist, aborting.\n", -+ device_name); -+ return -ENODEV; -+ } -+ -+ if (!dump_ndev->poll_controller) { -+ printk("network device %s does not implement polling yet, " -+ "aborting.\n", device_name); -+ retval = -1; /* return proper error */ -+ goto err1; -+ } -+ -+ if (!(dump_in_dev = in_dev_get(dump_ndev))) { -+ printk("network device %s is not an IP protocol device, " -+ "aborting.\n", device_name); -+ retval = -EINVAL; -+ goto err1; -+ } -+ -+ if ((retval = dump_validate_config()) < 0) -+ goto err2; -+ -+ net_dev->curr_offset = 0; -+ printk("Network device %s successfully configured for dumping\n", -+ device_name); -+ return retval; -+err2: -+ in_dev_put(dump_in_dev); -+err1: -+ dev_put(dump_ndev); -+ return retval; -+} -+ -+/* -+ * Close the dump device and release associated resources -+ * Invoked when unconfiguring the dump device. -+ */ -+static int -+dump_net_release(struct dump_dev *net_dev) -+{ -+ if (dump_in_dev) -+ in_dev_put(dump_in_dev); -+ if (dump_ndev) -+ dev_put(dump_ndev); -+ return 0; -+} -+ -+/* -+ * Prepare the dump device for use (silence any ongoing activity -+ * and quiesce state) when the system crashes. -+ */ -+static int -+dump_net_silence(struct dump_dev *net_dev) -+{ -+ local_irq_save(flags_global); -+ dump_ndev->rx_hook = dump_rx_hook; -+ startup_handshake = 1; -+ net_dev->curr_offset = 0; -+ printk("Dumping to network device %s on CPU %d ...\n", device_name, -+ smp_processor_id()); -+ return 0; -+} -+ -+/* -+ * Invoked when dumping is done. This is the time to put things back -+ * (i.e. undo the effects of dump_block_silence) so the device is -+ * available for normal use. -+ */ -+static int -+dump_net_resume(struct dump_dev *net_dev) -+{ -+ int indx; -+ reply_t reply; -+ char tmp[200]; -+ -+ if (!dump_ndev) -+ return (0); -+ -+ sprintf(tmp, "NETDUMP end.\n"); -+ for( indx = 0; indx < 6; indx++) { -+ reply.code = REPLY_END_NETDUMP; -+ reply.nr = 0; -+ reply.info = 0; -+ dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply); -+ } -+ printk("NETDUMP END!\n"); -+ local_irq_restore(flags_global); -+ dump_ndev->rx_hook = NULL; -+ startup_handshake = 0; -+ return 0; -+} -+ -+/* -+ * Seek to the specified offset in the dump device. -+ * Makes sure this is a valid offset, otherwise returns an error. -+ */ -+static int -+dump_net_seek(struct dump_dev *net_dev, loff_t off) -+{ -+ /* -+ * For now using DUMP_HEADER_OFFSET as hard coded value, -+ * See dump_block_seekin dump_blockdev.c to know how to -+ * do this properly. -+ */ -+ net_dev->curr_offset = off + DUMP_HEADER_OFFSET; -+ return 0; -+} -+ -+/* -+ * -+ */ -+static int -+dump_net_write(struct dump_dev *net_dev, void *buf, unsigned long len) -+{ -+ int cnt, i, off; -+ ssize_t ret; -+ -+ cnt = len/ PAGE_SIZE; -+ -+ for (i = 0; i < cnt; i++) { -+ off = i* PAGE_SIZE; -+ ret = do_netdump(net_dev, buf+off, PAGE_SIZE); -+ if (ret <= 0) -+ return -1; -+ net_dev->curr_offset = net_dev->curr_offset + PAGE_SIZE; -+ } -+ return len; -+} -+ -+/* -+ * check if the last dump i/o is over and ready for next request -+ */ -+static int -+dump_net_ready(struct dump_dev *net_dev, void *buf) -+{ -+ return 0; -+} -+ -+/* -+ * ioctl function used for configuring network dump -+ */ -+static int -+dump_net_ioctl(struct dump_dev *net_dev, unsigned int cmd, unsigned long arg) -+{ -+ switch (cmd) { -+ case DIOSTARGETIP: -+ target_ip = arg; -+ break; -+ case DIOSTARGETPORT: -+ target_port = (u16)arg; -+ break; -+ case DIOSSOURCEPORT: -+ source_port = (u16)arg; -+ break; -+ case DIOSETHADDR: -+ return copy_from_user(daddr, (void *)arg, 6); -+ break; -+ case DIOGTARGETIP: -+ case DIOGTARGETPORT: -+ case DIOGSOURCEPORT: -+ case DIOGETHADDR: -+ break; -+ default: -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+struct dump_dev_ops dump_netdev_ops = { -+ .open = dump_net_open, -+ .release = dump_net_release, -+ .silence = dump_net_silence, -+ .resume = dump_net_resume, -+ .seek = dump_net_seek, -+ .write = dump_net_write, -+ /* .read not implemented */ -+ .ready = dump_net_ready, -+ .ioctl = dump_net_ioctl -+}; -+ -+static struct dump_dev default_dump_netdev = { -+ .type_name = "networkdev", -+ .ops = &dump_netdev_ops, -+ .curr_offset = 0 -+}; -+ -+static int __init -+dump_netdev_init(void) -+{ -+ default_dump_netdev.curr_offset = 0; -+ -+ if (dump_register_device(&default_dump_netdev) < 0) { -+ printk("network dump device driver registration failed\n"); -+ return -1; -+ } -+ printk("network device driver for LKCD registered\n"); -+ -+ get_random_bytes(&dump_magic, sizeof(dump_magic)); -+ return 0; -+} -+ -+static void __exit -+dump_netdev_cleanup(void) -+{ -+ dump_unregister_device(&default_dump_netdev); -+} -+ -+MODULE_AUTHOR("LKCD Development Team "); -+MODULE_DESCRIPTION("Network Dump Driver for Linux Kernel Crash Dump (LKCD)"); -+MODULE_LICENSE("GPL"); -+ -+module_init(dump_netdev_init); -+module_exit(dump_netdev_cleanup); ---- linux-2.5.69/drivers/dump/dump_overlay.c.lkcdbase Mon Jun 2 17:29:49 2003 -+++ linux-2.5.69/drivers/dump/dump_overlay.c Fri Feb 7 06:47:58 2003 -@@ -0,0 +1,848 @@ -+/* -+ * Two-stage soft-boot based dump scheme methods (memory overlay -+ * with post soft-boot writeout) -+ * -+ * Started: Oct 2002 - Suparna Bhattacharya -+ * -+ * This approach of saving the dump in memory and writing it -+ * out after a softboot without clearing memory is derived from the -+ * Mission Critical Linux dump implementation. Credits and a big -+ * thanks for letting the lkcd project make use of the excellent -+ * piece of work and also for helping with clarifications and -+ * tips along the way are due to: -+ * Dave Winchell (primary author of mcore) -+ * and also to -+ * Jeff Moyer -+ * Josh Huber -+ * -+ * For those familiar with the mcore implementation, the key -+ * differences/extensions here are in allowing entire memory to be -+ * saved (in compressed form) through a careful ordering scheme -+ * on both the way down as well on the way up after boot, the latter -+ * for supporting the LKCD notion of passes in which most critical -+ * data is the first to be saved to the dump device. Also the post -+ * boot writeout happens from within the kernel rather than driven -+ * from userspace. -+ * -+ * The sequence is orchestrated through the abstraction of "dumpers", -+ * one for the first stage which then sets up the dumper for the next -+ * stage, providing for a smooth and flexible reuse of the singlestage -+ * dump scheme methods and a handle to pass dump device configuration -+ * information across the soft boot. -+ * -+ * Copyright (C) 2002 International Business Machines Corp. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+/* -+ * Disruptive dumping using the second kernel soft-boot option -+ * for issuing dump i/o operates in 2 stages: -+ * -+ * (1) - Saves the (compressed & formatted) dump in memory using a -+ * carefully ordered overlay scheme designed to capture the -+ * entire physical memory or selective portions depending on -+ * dump config settings, -+ * - Registers the stage 2 dumper and -+ * - Issues a soft reboot w/o clearing memory. -+ * -+ * The overlay scheme starts with a small bootstrap free area -+ * and follows a reverse ordering of passes wherein it -+ * compresses and saves data starting with the least critical -+ * areas first, thus freeing up the corresponding pages to -+ * serve as destination for subsequent data to be saved, and -+ * so on. With a good compression ratio, this makes it feasible -+ * to capture an entire physical memory dump without significantly -+ * reducing memory available during regular operation. -+ * -+ * (2) Post soft-reboot, runs through the saved memory dump and -+ * writes it out to disk, this time around, taking care to -+ * save the more critical data first (i.e. pages which figure -+ * in early passes for a regular dump). Finally issues a -+ * clean reboot. -+ * -+ * Since the data was saved in memory after selection/filtering -+ * and formatted as per the chosen output dump format, at this -+ * stage the filter and format actions are just dummy (or -+ * passthrough) actions, except for influence on ordering of -+ * passes. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include "dump_methods.h" -+ -+extern struct list_head dumper_list_head; -+extern struct dump_memdev *dump_memdev; -+extern struct dumper dumper_stage2; -+struct dump_config_block *dump_saved_config = NULL; -+extern struct dump_blockdev *dump_blockdev; -+static struct dump_memdev *saved_dump_memdev = NULL; -+static struct dumper *saved_dumper = NULL; -+ -+/* For testing -+extern void dump_display_map(struct dump_memdev *); -+*/ -+ -+struct dumper *dumper_by_name(char *name) -+{ -+#ifdef LATER -+ struct dumper *dumper; -+ list_for_each_entry(dumper, &dumper_list_head, dumper_list) -+ if (!strncmp(dumper->name, name, 32)) -+ return dumper; -+ -+ /* not found */ -+ return NULL; -+#endif -+ /* Temporary proof of concept */ -+ if (!strncmp(dumper_stage2.name, name, 32)) -+ return &dumper_stage2; -+ else -+ return NULL; -+} -+ -+#ifdef CONFIG_CRASH_DUMP_SOFTBOOT -+extern void dump_early_reserve_map(struct dump_memdev *); -+ -+void crashdump_reserve(void) -+{ -+ extern unsigned long crashdump_addr; -+ -+ if (crashdump_addr == 0xdeadbeef) -+ return; -+ -+ /* reserve dump config and saved dump pages */ -+ dump_saved_config = (struct dump_config_block *)crashdump_addr; -+ /* magic verification */ -+ if (dump_saved_config->magic != DUMP_MAGIC_LIVE) { -+ printk("Invalid dump magic. Ignoring dump\n"); -+ dump_saved_config = NULL; -+ return; -+ } -+ -+ printk("Dump may be available from previous boot\n"); -+ -+ reserve_bootmem(virt_to_phys((void *)crashdump_addr), -+ PAGE_ALIGN(sizeof(struct dump_config_block))); -+ dump_early_reserve_map(&dump_saved_config->memdev); -+ -+} -+#endif -+ -+/* -+ * Loads the dump configuration from a memory block saved across soft-boot -+ * The ops vectors need fixing up as the corresp. routines may have -+ * relocated in the new soft-booted kernel. -+ */ -+int dump_load_config(struct dump_config_block *config) -+{ -+ struct dumper *dumper; -+ struct dump_data_filter *filter_table, *filter; -+ struct dump_dev *dev; -+ int i; -+ -+ if (config->magic != DUMP_MAGIC_LIVE) -+ return -ENOENT; /* not a valid config */ -+ -+ /* initialize generic config data */ -+ memcpy(&dump_config, &config->config, sizeof(dump_config)); -+ -+ /* initialize dumper state */ -+ if (!(dumper = dumper_by_name(config->dumper.name))) { -+ printk("dumper name mismatch\n"); -+ return -ENOENT; /* dumper mismatch */ -+ } -+ -+ /* verify and fixup schema */ -+ if (strncmp(dumper->scheme->name, config->scheme.name, 32)) { -+ printk("dumper scheme mismatch\n"); -+ return -ENOENT; /* mismatch */ -+ } -+ config->scheme.ops = dumper->scheme->ops; -+ config->dumper.scheme = &config->scheme; -+ -+ /* verify and fixup filter operations */ -+ filter_table = dumper->filter; -+ for (i = 0, filter = config->filter_table; -+ ((i < MAX_PASSES) && filter_table[i].selector); -+ i++, filter++) { -+ if (strncmp(filter_table[i].name, filter->name, 32)) { -+ printk("dump filter mismatch\n"); -+ return -ENOENT; /* filter name mismatch */ -+ } -+ filter->selector = filter_table[i].selector; -+ } -+ config->dumper.filter = config->filter_table; -+ -+ /* fixup format */ -+ if (strncmp(dumper->fmt->name, config->fmt.name, 32)) { -+ printk("dump format mismatch\n"); -+ return -ENOENT; /* mismatch */ -+ } -+ config->fmt.ops = dumper->fmt->ops; -+ config->dumper.fmt = &config->fmt; -+ -+ /* fixup target device */ -+ dev = (struct dump_dev *)(&config->dev[0]); -+ if (dumper->dev == NULL) { -+ pr_debug("Vanilla dumper - assume default\n"); -+ if (dump_dev == NULL) -+ return -ENODEV; -+ dumper->dev = dump_dev; -+ } -+ -+ if (strncmp(dumper->dev->type_name, dev->type_name, 32)) { -+ printk("dump dev type mismatch %s instead of %s\n", -+ dev->type_name, dumper->dev->type_name); -+ return -ENOENT; /* mismatch */ -+ } -+ dev->ops = dumper->dev->ops; -+ config->dumper.dev = dev; -+ -+ /* fixup memory device containing saved dump pages */ -+ /* assume statically init'ed dump_memdev */ -+ config->memdev.ddev.ops = dump_memdev->ddev.ops; -+ /* switch to memdev from prev boot */ -+ saved_dump_memdev = dump_memdev; /* remember current */ -+ dump_memdev = &config->memdev; -+ -+ /* Make this the current primary dumper */ -+ dump_config.dumper = &config->dumper; -+ -+ return 0; -+} -+ -+/* Saves the dump configuration in a memory block for use across a soft-boot */ -+int dump_save_config(struct dump_config_block *config) -+{ -+ printk("saving dump config settings\n"); -+ -+ /* dump config settings */ -+ memcpy(&config->config, &dump_config, sizeof(dump_config)); -+ -+ /* dumper state */ -+ memcpy(&config->dumper, dump_config.dumper, sizeof(struct dumper)); -+ memcpy(&config->scheme, dump_config.dumper->scheme, -+ sizeof(struct dump_scheme)); -+ memcpy(&config->fmt, dump_config.dumper->fmt, sizeof(struct dump_fmt)); -+ memcpy(&config->dev[0], dump_config.dumper->dev, -+ sizeof(struct dump_anydev)); -+ memcpy(&config->filter_table, dump_config.dumper->filter, -+ sizeof(struct dump_data_filter)*MAX_PASSES); -+ -+ /* handle to saved mem pages */ -+ memcpy(&config->memdev, dump_memdev, sizeof(struct dump_memdev)); -+ -+ config->magic = DUMP_MAGIC_LIVE; -+ -+ return 0; -+} -+ -+int dump_init_stage2(struct dump_config_block *saved_config) -+{ -+ int err = 0; -+ -+ pr_debug("dump_init_stage2\n"); -+ /* Check if dump from previous boot exists */ -+ if (saved_config) { -+ printk("loading dumper from previous boot \n"); -+ /* load and configure dumper from previous boot */ -+ if ((err = dump_load_config(saved_config))) -+ return err; -+ -+ if (!dump_oncpu) { -+ if ((err = dump_configure(dump_config.dump_device))) { -+ printk("Stage 2 dump configure failed\n"); -+ return err; -+ } -+ } -+ -+ dumper_reset(); -+ dump_dev = dump_config.dumper->dev; -+ /* write out the dump */ -+ err = dump_generic_execute(NULL, NULL); -+ -+ dump_saved_config = NULL; -+ -+ if (!dump_oncpu) { -+ dump_unconfigure(); -+ } -+ -+ return err; -+ -+ } else { -+ /* no dump to write out */ -+ printk("no dumper from previous boot \n"); -+ return 0; -+ } -+} -+ -+extern void dump_mem_markpages(struct dump_memdev *); -+ -+int dump_switchover_stage(void) -+{ -+ int ret = 0; -+ -+ /* trigger stage 2 rightaway - in real life would be after soft-boot */ -+ /* dump_saved_config would be a boot param */ -+ saved_dump_memdev = dump_memdev; -+ saved_dumper = dump_config.dumper; -+ ret = dump_init_stage2(dump_saved_config); -+ dump_memdev = saved_dump_memdev; -+ dump_config.dumper = saved_dumper; -+ return ret; -+} -+ -+int dump_activate_softboot(void) -+{ -+ int err = 0; -+ -+ /* temporary - switchover to writeout previously saved dump */ -+ err = dump_switchover_stage(); /* non-disruptive case */ -+ if (dump_oncpu) -+ dump_config.dumper = &dumper_stage1; /* set things back */ -+ -+ return err; -+ -+ dump_silence_level = DUMP_HALT_CPUS; -+ /* wait till we become the only cpu */ -+ /* maybe by checking for online cpus ? */ -+ -+ /* now call into kexec */ -+ -+ /* TBD/Fixme: -+ * should we call reboot notifiers ? inappropriate for panic ? -+ * what about device_shutdown() ? -+ * is explicit bus master disabling needed or can we do that -+ * through driverfs ? -+ */ -+ return 0; -+} -+ -+/* --- DUMP SCHEME ROUTINES --- */ -+ -+static inline int dump_buf_pending(struct dumper *dumper) -+{ -+ return (dumper->curr_buf - dumper->dump_buf); -+} -+ -+/* Invoked during stage 1 of soft-reboot based dumping */ -+int dump_overlay_sequencer(void) -+{ -+ struct dump_data_filter *filter = dump_config.dumper->filter; -+ struct dump_data_filter *filter2 = dumper_stage2.filter; -+ int pass = 0, err = 0, save = 0; -+ int (*action)(unsigned long, unsigned long); -+ -+ /* Make sure gzip compression is being used */ -+ if (dump_config.dumper->compress->compress_type != DUMP_COMPRESS_GZIP) { -+ printk(" Please set GZIP compression \n"); -+ return -EINVAL; -+ } -+ -+ /* start filling in dump data right after the header */ -+ dump_config.dumper->curr_offset = -+ PAGE_ALIGN(dump_config.dumper->header_len); -+ -+ /* Locate the last pass */ -+ for (;filter->selector; filter++, pass++); -+ -+ /* -+ * Start from the end backwards: overlay involves a reverse -+ * ordering of passes, since less critical pages are more -+ * likely to be reusable as scratch space once we are through -+ * with them. -+ */ -+ for (--pass, --filter; pass >= 0; pass--, filter--) -+ { -+ /* Assumes passes are exclusive (even across dumpers) */ -+ /* Requires care when coding the selection functions */ -+ if ((save = filter->level_mask & dump_config.level)) -+ action = dump_save_data; -+ else -+ action = dump_skip_data; -+ -+ /* Remember the offset where this pass started */ -+ /* The second stage dumper would use this */ -+ if (dump_buf_pending(dump_config.dumper) & (PAGE_SIZE - 1)) { -+ pr_debug("Starting pass %d with pending data\n", pass); -+ pr_debug("filling dummy data to page-align it\n"); -+ dump_config.dumper->curr_buf = (void *)PAGE_ALIGN( -+ (unsigned long)dump_config.dumper->curr_buf); -+ } -+ -+ filter2[pass].start = dump_config.dumper->curr_offset -+ + dump_buf_pending(dump_config.dumper); -+ -+ err = dump_iterator(pass, action, filter); -+ -+ filter2[pass].end = dump_config.dumper->curr_offset -+ + dump_buf_pending(dump_config.dumper); -+ -+ if (err < 0) { -+ printk("dump_overlay_seq: failure %d in pass %d\n", -+ err, pass); -+ break; -+ } -+ printk("\n %d overlay pages %s of %d each in pass %d\n", -+ err, save ? "saved" : "skipped", DUMP_PAGE_SIZE, pass); -+ } -+ -+ return err; -+} -+ -+/* from dump_memdev.c */ -+extern struct page *dump_mem_lookup(struct dump_memdev *dev, unsigned long loc); -+extern struct page *dump_mem_next_page(struct dump_memdev *dev); -+ -+static inline struct page *dump_get_saved_page(loff_t loc) -+{ -+ return (dump_mem_lookup(dump_memdev, loc >> PAGE_SHIFT)); -+} -+ -+static inline struct page *dump_next_saved_page(void) -+{ -+ return (dump_mem_next_page(dump_memdev)); -+} -+ -+/* -+ * Iterates over list of saved dump pages. Invoked during second stage of -+ * soft boot dumping -+ * -+ * Observation: If additional selection is desired at this stage then -+ * a different iterator could be written which would advance -+ * to the next page header everytime instead of blindly picking up -+ * the data. In such a case loc would be interpreted differently. -+ * At this moment however a blind pass seems sufficient, cleaner and -+ * faster. -+ */ -+int dump_saved_data_iterator(int pass, int (*action)(unsigned long, -+ unsigned long), struct dump_data_filter *filter) -+{ -+ loff_t loc = filter->start; -+ struct page *page; -+ unsigned long count = 0; -+ int err = 0; -+ unsigned long sz; -+ -+ printk("pass %d, start off 0x%llx end offset 0x%llx\n", pass, -+ filter->start, filter->end); -+ -+ /* loc will get treated as logical offset into stage 1 */ -+ page = dump_get_saved_page(loc); -+ -+ for (; loc < filter->end; loc += PAGE_SIZE) { -+ dump_config.dumper->curr_loc = loc; -+ if (!page) { -+ printk("no more saved data for pass %d\n", pass); -+ break; -+ } -+ sz = (loc + PAGE_SIZE > filter->end) ? filter->end - loc : -+ PAGE_SIZE; -+ -+ if (page && filter->selector(pass, (unsigned long)page, -+ PAGE_SIZE)) { -+ pr_debug("mem offset 0x%llx\n", loc); -+ if ((err = action((unsigned long)page, sz))) -+ break; -+ else -+ count++; -+ /* clear the contents of page */ -+ /* fixme: consider using KM_DUMP instead */ -+ clear_highpage(page); -+ -+ } -+ page = dump_next_saved_page(); -+ } -+ -+ return err ? err : count; -+} -+ -+static inline int dump_overlay_pages_done(struct page *page, int nr) -+{ -+ int ret=0; -+ -+ for (; nr ; page++, nr--) { -+ if (dump_check_and_free_page(dump_memdev, page)) -+ ret++; -+ } -+ return ret; -+} -+ -+int dump_overlay_save_data(unsigned long loc, unsigned long len) -+{ -+ int err = 0; -+ struct page *page = (struct page *)loc; -+ static unsigned long cnt = 0; -+ -+ if ((err = dump_generic_save_data(loc, len))) -+ return err; -+ -+ if (dump_overlay_pages_done(page, len >> PAGE_SHIFT)) { -+ cnt++; -+ if (!(cnt & 0x7f)) -+ pr_debug("released page 0x%lx\n", page_to_pfn(page)); -+ } -+ -+ return err; -+} -+ -+ -+int dump_overlay_skip_data(unsigned long loc, unsigned long len) -+{ -+ struct page *page = (struct page *)loc; -+ -+ dump_overlay_pages_done(page, len >> PAGE_SHIFT); -+ return 0; -+} -+ -+int dump_overlay_resume(void) -+{ -+ int err = 0; -+ -+ /* -+ * switch to stage 2 dumper, save dump_config_block -+ * and then trigger a soft-boot -+ */ -+ dumper_stage2.header_len = dump_config.dumper->header_len; -+ dump_config.dumper = &dumper_stage2; -+ if ((err = dump_save_config(dump_saved_config))) -+ return err; -+ -+ dump_dev = dump_config.dumper->dev; -+ -+ return err; -+ err = dump_switchover_stage(); /* plugs into soft boot mechanism */ -+ dump_config.dumper = &dumper_stage1; /* set things back */ -+ return err; -+} -+ -+int dump_overlay_configure(unsigned long devid) -+{ -+ struct dump_dev *dev; -+ struct dump_config_block *saved_config = dump_saved_config; -+ int err = 0; -+ -+ /* If there is a previously saved dump, write it out first */ -+ if (saved_config) { -+ printk("Processing old dump pending writeout\n"); -+ err = dump_switchover_stage(); -+ if (err) { -+ printk("failed to writeout saved dump\n"); -+ return err; -+ } -+ dump_free_mem(saved_config); /* testing only: not after boot */ -+ } -+ -+ dev = dumper_stage2.dev = dump_config.dumper->dev; -+ /* From here on the intermediate dump target is memory-only */ -+ dump_dev = dump_config.dumper->dev = &dump_memdev->ddev; -+ if ((err = dump_generic_configure(0))) { -+ printk("dump generic configure failed: err %d\n", err); -+ return err; -+ } -+ /* temporary */ -+ dumper_stage2.dump_buf = dump_config.dumper->dump_buf; -+ -+ /* Sanity check on the actual target dump device */ -+ if (!dev || (err = dev->ops->open(dev, devid))) { -+ return err; -+ } -+ /* TBD: should we release the target if this is soft-boot only ? */ -+ -+ /* alloc a dump config block area to save across reboot */ -+ if (!(dump_saved_config = dump_alloc_mem(sizeof(struct -+ dump_config_block)))) { -+ printk("dump config block alloc failed\n"); -+ /* undo configure */ -+ dump_generic_unconfigure(); -+ return -ENOMEM; -+ } -+ dump_config.dump_addr = (unsigned long)dump_saved_config; -+ printk("Dump config block of size %d set up at 0x%lx\n", -+ sizeof(*dump_saved_config), (unsigned long)dump_saved_config); -+ return 0; -+} -+ -+int dump_overlay_unconfigure(void) -+{ -+ struct dump_dev *dev = dumper_stage2.dev; -+ int err = 0; -+ -+ pr_debug("dump_overlay_unconfigure\n"); -+ /* Close the secondary device */ -+ dev->ops->release(dev); -+ pr_debug("released secondary device\n"); -+ -+ err = dump_generic_unconfigure(); -+ pr_debug("Unconfigured generic portions\n"); -+ dump_free_mem(dump_saved_config); -+ dump_saved_config = NULL; -+ pr_debug("Freed saved config block\n"); -+ dump_dev = dump_config.dumper->dev = dumper_stage2.dev; -+ -+ printk("Unconfigured overlay dumper\n"); -+ return err; -+} -+ -+int dump_staged_unconfigure(void) -+{ -+ int err = 0; -+ struct dump_config_block *saved_config = dump_saved_config; -+ struct dump_dev *dev; -+ -+ pr_debug("dump_staged_unconfigure\n"); -+ err = dump_generic_unconfigure(); -+ -+ /* now check if there is a saved dump waiting to be written out */ -+ if (saved_config) { -+ printk("Processing saved dump pending writeout\n"); -+ if ((err = dump_switchover_stage())) { -+ printk("Error in commiting saved dump at 0x%lx\n", -+ (unsigned long)saved_config); -+ printk("Old dump may hog memory\n"); -+ } else { -+ dump_free_mem(saved_config); -+ pr_debug("Freed saved config block\n"); -+ } -+ dump_saved_config = NULL; -+ } else { -+ dev = &dump_memdev->ddev; -+ dev->ops->release(dev); -+ } -+ printk("Unconfigured second stage dumper\n"); -+ -+ return 0; -+} -+ -+/* ----- PASSTHRU FILTER ROUTINE --------- */ -+ -+/* transparent - passes everything through */ -+int dump_passthru_filter(int pass, unsigned long loc, unsigned long sz) -+{ -+ return 1; -+} -+ -+/* ----- PASSTRU FORMAT ROUTINES ---- */ -+ -+ -+int dump_passthru_configure_header(const char *panic_str, const struct pt_regs *regs) -+{ -+ dump_config.dumper->header_dirty++; -+ return 0; -+} -+ -+/* Copies bytes of data from page(s) to the specified buffer */ -+int dump_copy_pages(void *buf, struct page *page, unsigned long sz) -+{ -+ unsigned long len = 0, bytes; -+ void *addr; -+ -+ while (len < sz) { -+ addr = kmap_atomic(page, KM_DUMP); -+ bytes = (sz > len + PAGE_SIZE) ? PAGE_SIZE : sz - len; -+ memcpy(buf, addr, bytes); -+ kunmap_atomic(addr, KM_DUMP); -+ buf += bytes; -+ len += bytes; -+ page++; -+ } -+ /* memset(dump_config.dumper->curr_buf, 0x57, len); temporary */ -+ -+ return sz - len; -+} -+ -+int dump_passthru_update_header(void) -+{ -+ long len = dump_config.dumper->header_len; -+ struct page *page; -+ void *buf = dump_config.dumper->dump_buf; -+ int err = 0; -+ -+ if (!dump_config.dumper->header_dirty) -+ return 0; -+ -+ pr_debug("Copying header of size %ld bytes from memory\n", len); -+ if (len > DUMP_BUFFER_SIZE) -+ return -E2BIG; -+ -+ page = dump_mem_lookup(dump_memdev, 0); -+ for (; (len > 0) && page; buf += PAGE_SIZE, len -= PAGE_SIZE) { -+ if ((err = dump_copy_pages(buf, page, PAGE_SIZE))) -+ return err; -+ page = dump_mem_next_page(dump_memdev); -+ } -+ if (len > 0) { -+ printk("Incomplete header saved in mem\n"); -+ return -ENOENT; -+ } -+ -+ if ((err = dump_dev_seek(0))) { -+ printk("Unable to seek to dump header offset\n"); -+ return err; -+ } -+ err = dump_ll_write(dump_config.dumper->dump_buf, -+ buf - dump_config.dumper->dump_buf); -+ if (err < dump_config.dumper->header_len) -+ return (err < 0) ? err : -ENOSPC; -+ -+ dump_config.dumper->header_dirty = 0; -+ return 0; -+} -+ -+static loff_t next_dph_offset = 0; -+ -+static int dph_valid(struct __dump_page *dph) -+{ -+ if ((dph->dp_address & (PAGE_SIZE - 1)) || (dph->dp_flags -+ > DUMP_DH_COMPRESSED) || (!dph->dp_flags) || -+ (dph->dp_size > PAGE_SIZE)) { -+ printk("dp->address = 0x%llx, dp->size = 0x%x, dp->flag = 0x%x\n", -+ dph->dp_address, dph->dp_size, dph->dp_flags); -+ return 0; -+ } -+ return 1; -+} -+ -+int dump_verify_lcrash_data(void *buf, unsigned long sz) -+{ -+ struct __dump_page *dph; -+ -+ /* sanity check for page headers */ -+ while (next_dph_offset + sizeof(*dph) < sz) { -+ dph = (struct __dump_page *)(buf + next_dph_offset); -+ if (!dph_valid(dph)) { -+ printk("Invalid page hdr at offset 0x%llx\n", -+ next_dph_offset); -+ return -EINVAL; -+ } -+ next_dph_offset += dph->dp_size + sizeof(*dph); -+ } -+ -+ next_dph_offset -= sz; -+ return 0; -+} -+ -+/* -+ * TBD/Later: Consider avoiding the copy by using a scatter/gather -+ * vector representation for the dump buffer -+ */ -+int dump_passthru_add_data(unsigned long loc, unsigned long sz) -+{ -+ struct page *page = (struct page *)loc; -+ void *buf = dump_config.dumper->curr_buf; -+ int err = 0; -+ -+ if ((err = dump_copy_pages(buf, page, sz))) { -+ printk("dump_copy_pages failed"); -+ return err; -+ } -+ -+ if ((err = dump_verify_lcrash_data(buf, sz))) { -+ printk("dump_verify_lcrash_data failed\n"); -+ printk("Invalid data for pfn 0x%lx\n", page_to_pfn(page)); -+ printk("Page flags 0x%lx\n", page->flags); -+ printk("Page count 0x%x\n", atomic_read(&page->count)); -+ return err; -+ } -+ -+ dump_config.dumper->curr_buf = buf + sz; -+ -+ return 0; -+} -+ -+ -+/* Stage 1 dumper: Saves compressed dump in memory and soft-boots system */ -+ -+/* Scheme to overlay saved data in memory for writeout after a soft-boot */ -+struct dump_scheme_ops dump_scheme_overlay_ops = { -+ .configure = dump_overlay_configure, -+ .unconfigure = dump_overlay_unconfigure, -+ .sequencer = dump_overlay_sequencer, -+ .iterator = dump_page_iterator, -+ .save_data = dump_overlay_save_data, -+ .skip_data = dump_overlay_skip_data, -+ .write_buffer = dump_generic_write_buffer -+}; -+ -+struct dump_scheme dump_scheme_overlay = { -+ .name = "overlay", -+ .ops = &dump_scheme_overlay_ops -+}; -+ -+ -+/* Stage 1 must use a good compression scheme - default to gzip */ -+extern struct __dump_compress dump_gzip_compression; -+ -+struct dumper dumper_stage1 = { -+ .name = "stage1", -+ .scheme = &dump_scheme_overlay, -+ .fmt = &dump_fmt_lcrash, -+ .compress = &dump_none_compression, /* needs to be gzip */ -+ .filter = dump_filter_table, -+ .dev = NULL, -+}; -+ -+/* Stage 2 dumper: Activated after softboot to write out saved dump to device */ -+ -+/* Formatter that transfers data as is (transparent) w/o further conversion */ -+struct dump_fmt_ops dump_fmt_passthru_ops = { -+ .configure_header = dump_passthru_configure_header, -+ .update_header = dump_passthru_update_header, -+ .save_context = NULL, /* unused */ -+ .add_data = dump_passthru_add_data, -+ .update_end_marker = dump_lcrash_update_end_marker -+}; -+ -+struct dump_fmt dump_fmt_passthru = { -+ .name = "passthru", -+ .ops = &dump_fmt_passthru_ops -+}; -+ -+/* Filter that simply passes along any data within the range (transparent)*/ -+/* Note: The start and end ranges in the table are filled in at run-time */ -+ -+extern int dump_filter_none(int pass, unsigned long loc, unsigned long sz); -+ -+struct dump_data_filter dump_passthru_filtertable[MAX_PASSES] = { -+{.name = "passkern", .selector = dump_passthru_filter, -+ .level_mask = DUMP_MASK_KERN }, -+{.name = "passuser", .selector = dump_passthru_filter, -+ .level_mask = DUMP_MASK_USED }, -+{.name = "passunused", .selector = dump_passthru_filter, -+ .level_mask = DUMP_MASK_UNUSED }, -+{.name = "none", .selector = dump_filter_none, -+ .level_mask = DUMP_MASK_REST } -+}; -+ -+ -+/* Scheme to handle data staged / preserved across a soft-boot */ -+struct dump_scheme_ops dump_scheme_staged_ops = { -+ .configure = dump_generic_configure, -+ .unconfigure = dump_staged_unconfigure, -+ .sequencer = dump_generic_sequencer, -+ .iterator = dump_saved_data_iterator, -+ .save_data = dump_generic_save_data, -+ .skip_data = dump_generic_skip_data, -+ .write_buffer = dump_generic_write_buffer -+}; -+ -+struct dump_scheme dump_scheme_staged = { -+ .name = "staged", -+ .ops = &dump_scheme_staged_ops -+}; -+ -+/* The stage 2 dumper comprising all these */ -+struct dumper dumper_stage2 = { -+ .name = "stage2", -+ .scheme = &dump_scheme_staged, -+ .fmt = &dump_fmt_passthru, -+ .compress = &dump_none_compression, -+ .filter = dump_passthru_filtertable, -+ .dev = NULL, -+}; -+ ---- linux-2.5.69/drivers/dump/dump_rle.c.lkcdbase Mon Jun 2 17:29:49 2003 -+++ linux-2.5.69/drivers/dump/dump_rle.c Fri Dec 13 00:51:31 2002 -@@ -0,0 +1,175 @@ -+/* -+ * RLE Compression functions for kernel crash dumps. -+ * -+ * Created by: Matt Robinson (yakker@sourceforge.net) -+ * Copyright 2001 Matt D. Robinson. All rights reserved. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+/* header files */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * Name: dump_compress_rle() -+ * Func: Compress a DUMP_PAGE_SIZE (hardware) page down to something more -+ * reasonable, if possible. This is the same routine we use in IRIX. -+ */ -+static u16 -+dump_compress_rle(const u8 *old, u16 oldsize, u8 *new, u16 newsize) -+{ -+ u16 ri, wi, count = 0; -+ u_char value = 0, cur_byte; -+ -+ /* -+ * If the block should happen to "compress" to larger than the -+ * buffer size, allocate a larger one and change cur_buf_size. -+ */ -+ -+ wi = ri = 0; -+ -+ while (ri < oldsize) { -+ if (!ri) { -+ cur_byte = value = old[ri]; -+ count = 0; -+ } else { -+ if (count == 255) { -+ if (wi + 3 > oldsize) { -+ return oldsize; -+ } -+ new[wi++] = 0; -+ new[wi++] = count; -+ new[wi++] = value; -+ value = cur_byte = old[ri]; -+ count = 0; -+ } else { -+ if ((cur_byte = old[ri]) == value) { -+ count++; -+ } else { -+ if (count > 1) { -+ if (wi + 3 > oldsize) { -+ return oldsize; -+ } -+ new[wi++] = 0; -+ new[wi++] = count; -+ new[wi++] = value; -+ } else if (count == 1) { -+ if (value == 0) { -+ if (wi + 3 > oldsize) { -+ return oldsize; -+ } -+ new[wi++] = 0; -+ new[wi++] = 1; -+ new[wi++] = 0; -+ } else { -+ if (wi + 2 > oldsize) { -+ return oldsize; -+ } -+ new[wi++] = value; -+ new[wi++] = value; -+ } -+ } else { /* count == 0 */ -+ if (value == 0) { -+ if (wi + 2 > oldsize) { -+ return oldsize; -+ } -+ new[wi++] = value; -+ new[wi++] = value; -+ } else { -+ if (wi + 1 > oldsize) { -+ return oldsize; -+ } -+ new[wi++] = value; -+ } -+ } /* if count > 1 */ -+ -+ value = cur_byte; -+ count = 0; -+ -+ } /* if byte == value */ -+ -+ } /* if count == 255 */ -+ -+ } /* if ri == 0 */ -+ ri++; -+ -+ } -+ if (count > 1) { -+ if (wi + 3 > oldsize) { -+ return oldsize; -+ } -+ new[wi++] = 0; -+ new[wi++] = count; -+ new[wi++] = value; -+ } else if (count == 1) { -+ if (value == 0) { -+ if (wi + 3 > oldsize) -+ return oldsize; -+ new[wi++] = 0; -+ new[wi++] = 1; -+ new[wi++] = 0; -+ } else { -+ if (wi + 2 > oldsize) -+ return oldsize; -+ new[wi++] = value; -+ new[wi++] = value; -+ } -+ } else { /* count == 0 */ -+ if (value == 0) { -+ if (wi + 2 > oldsize) -+ return oldsize; -+ new[wi++] = value; -+ new[wi++] = value; -+ } else { -+ if (wi + 1 > oldsize) -+ return oldsize; -+ new[wi++] = value; -+ } -+ } /* if count > 1 */ -+ -+ value = cur_byte; -+ count = 0; -+ return wi; -+} -+ -+/* setup the rle compression functionality */ -+static struct __dump_compress dump_rle_compression = { -+ .compress_type = DUMP_COMPRESS_RLE, -+ .compress_func = dump_compress_rle, -+ .compress_name = "RLE", -+}; -+ -+/* -+ * Name: dump_compress_rle_init() -+ * Func: Initialize rle compression for dumping. -+ */ -+static int __init -+dump_compress_rle_init(void) -+{ -+ dump_register_compression(&dump_rle_compression); -+ return 0; -+} -+ -+/* -+ * Name: dump_compress_rle_cleanup() -+ * Func: Remove rle compression for dumping. -+ */ -+static void __exit -+dump_compress_rle_cleanup(void) -+{ -+ dump_unregister_compression(DUMP_COMPRESS_RLE); -+} -+ -+/* module initialization */ -+module_init(dump_compress_rle_init); -+module_exit(dump_compress_rle_cleanup); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("LKCD Development Team "); -+MODULE_DESCRIPTION("RLE compression module for crash dump driver"); ---- linux-2.5.69/drivers/dump/dump_scheme.c.lkcdbase Mon Jun 2 17:29:49 2003 -+++ linux-2.5.69/drivers/dump/dump_scheme.c Fri Apr 25 00:24:15 2003 -@@ -0,0 +1,357 @@ -+/* -+ * Default single stage dump scheme methods -+ * -+ * Previously a part of dump_base.c -+ * -+ * Started: Oct 2002 - Suparna Bhattacharya -+ * Split and rewrote LKCD dump scheme to generic dump method -+ * interfaces -+ * Derived from original code created by -+ * Matt Robinson ) -+ * -+ * Contributions from SGI, IBM, HP, MCL, and others. -+ * -+ * Copyright (C) 1999 - 2002 Silicon Graphics, Inc. All rights reserved. -+ * Copyright (C) 2001 - 2002 Matt D. Robinson. All rights reserved. -+ * Copyright (C) 2002 International Business Machines Corp. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+/* -+ * Implements the default dump scheme, i.e. single-stage gathering and -+ * saving of dump data directly to the target device, which operates in -+ * a push mode, where the dumping system decides what data it saves -+ * taking into account pre-specified dump config options. -+ * -+ * Aside: The 2-stage dump scheme, where there is a soft-reset between -+ * the gathering and saving phases, also reuses some of these -+ * default routines (see dump_overlay.c) -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "dump_methods.h" -+ -+extern int panic_timeout; /* time before reboot */ -+ -+extern void dump_speedo(int); -+ -+/* Default sequencer used during single stage dumping */ -+/* Also invoked during stage 2 of soft-boot based dumping */ -+int dump_generic_sequencer(void) -+{ -+ struct dump_data_filter *filter = dump_config.dumper->filter; -+ int pass = 0, err = 0, save = 0; -+ int (*action)(unsigned long, unsigned long); -+ -+ /* -+ * We want to save the more critical data areas first in -+ * case we run out of space, encounter i/o failures, or get -+ * interrupted otherwise and have to give up midway -+ * So, run through the passes in increasing order -+ */ -+ for (;filter->selector; filter++, pass++) -+ { -+ /* Assumes passes are exclusive (even across dumpers) */ -+ /* Requires care when coding the selection functions */ -+ if ((save = filter->level_mask & dump_config.level)) -+ action = dump_save_data; -+ else -+ action = dump_skip_data; -+ -+ if ((err = dump_iterator(pass, action, filter)) < 0) -+ break; -+ -+ printk("\n %d dump pages %s of %d each in pass %d\n", -+ err, save ? "saved" : "skipped", DUMP_PAGE_SIZE, pass); -+ -+ } -+ -+ return (err < 0) ? err : 0; -+} -+ -+static inline struct page *dump_get_page(loff_t loc) -+{ -+ unsigned long page_index = loc >> PAGE_SHIFT; -+ -+ /* todo: complete this to account for ia64/discontig mem */ -+ /* todo: and to check for validity, ram page, no i/o mem etc */ -+ /* need to use pfn/physaddr equiv of kern_addr_valid */ -+ if (__dump_page_valid(page_index)) -+ return pfn_to_page(page_index); -+ else -+ return NULL; -+ -+} -+ -+/* Default iterator: for singlestage and stage 1 of soft-boot dumping */ -+/* Iterates over range of physical memory pages in DUMP_PAGE_SIZE increments */ -+int dump_page_iterator(int pass, int (*action)(unsigned long, unsigned long), -+ struct dump_data_filter *filter) -+{ -+ /* Todo : fix unit, type */ -+ loff_t loc; -+ int count = 0, err = 0; -+ struct page *page; -+ -+ /* Todo: Add membanks code */ -+ /* TBD: Check if we need to address DUMP_PAGE_SIZE < PAGE_SIZE */ -+ -+ for (loc = filter->start; loc < filter->end; loc += DUMP_PAGE_SIZE) { -+ dump_config.dumper->curr_loc = loc; -+ page = dump_get_page(loc); -+ if (page && filter->selector(pass, (unsigned long) page, -+ DUMP_PAGE_SIZE)) { -+ if ((err = action((unsigned long)page, DUMP_PAGE_SIZE))) -+ { -+ printk("dump_page_iterator: err %d for loc " -+ "0x%llx, in pass %d\n", err, loc, pass); -+ break; -+ } else -+ count++; -+ } -+ } -+ -+ return err ? err : count; -+} -+ -+/* -+ * Base function that saves the selected block of data in the dump -+ * Action taken when iterator decides that data needs to be saved -+ */ -+int dump_generic_save_data(unsigned long loc, unsigned long sz) -+{ -+ void *buf; -+ void *dump_buf = dump_config.dumper->dump_buf; -+ int left, bytes, ret; -+ -+ if ((ret = dump_add_data(loc, sz))) { -+ return ret; -+ } -+ buf = dump_config.dumper->curr_buf; -+ -+ /* If we've filled up the buffer write it out */ -+ if ((left = buf - dump_buf) >= DUMP_BUFFER_SIZE) { -+ bytes = dump_write_buffer(dump_buf, DUMP_BUFFER_SIZE); -+ if (bytes < DUMP_BUFFER_SIZE) { -+ printk("dump_write_buffer failed %d\n", bytes); -+ return bytes ? -ENOSPC : bytes; -+ } -+ -+ left -= bytes; -+ -+ /* -- A few chores to do from time to time -- */ -+ dump_config.dumper->count++; -+ -+ if (!(dump_config.dumper->count & 0x3f)) { -+ /* Update the header every one in a while */ -+ memset((void *)dump_buf, 'b', DUMP_BUFFER_SIZE); -+ if ((ret = dump_update_header()) < 0) { -+ /* issue warning */ -+ return ret; -+ } -+ printk("."); -+ -+ touch_nmi_watchdog(); -+ } else if (!(dump_config.dumper->count & 0x7)) { -+ /* Show progress so the user knows we aren't hung */ -+ dump_speedo(dump_config.dumper->count >> 3); -+ } -+ /* Todo: Touch/Refresh watchdog */ -+ -+ /* --- Done with periodic chores -- */ -+ -+ /* -+ * extra bit of copying to simplify verification -+ * in the second kernel boot based scheme -+ */ -+ memcpy(dump_buf - DUMP_PAGE_SIZE, dump_buf + -+ DUMP_BUFFER_SIZE - DUMP_PAGE_SIZE, DUMP_PAGE_SIZE); -+ -+ /* now adjust the leftover bits back to the top of the page */ -+ /* this case would not arise during stage 2 (passthru) */ -+ memset(dump_buf, 'z', DUMP_BUFFER_SIZE); -+ if (left) { -+ memcpy(dump_buf, dump_buf + DUMP_BUFFER_SIZE, left); -+ } -+ buf -= DUMP_BUFFER_SIZE; -+ dump_config.dumper->curr_buf = buf; -+ } -+ -+ return 0; -+} -+ -+int dump_generic_skip_data(unsigned long loc, unsigned long sz) -+{ -+ /* dummy by default */ -+ return 0; -+} -+ -+/* -+ * Common low level routine to write a buffer to current dump device -+ * Expects checks for space etc to have been taken care of by the caller -+ * Operates serially at the moment for simplicity. -+ * TBD/Todo: Consider batching for improved throughput -+ */ -+int dump_ll_write(void *buf, unsigned long len) -+{ -+ long transferred = 0, last_transfer = 0; -+ int ret = 0; -+ -+ /* make sure device is ready */ -+ while ((ret = dump_dev_ready(NULL)) == -EAGAIN); -+ if (ret < 0) { -+ printk("dump_dev_ready failed !err %d\n", ret); -+ return ret; -+ } -+ -+ while (len) { -+ if ((last_transfer = dump_dev_write(buf, len)) <= 0) { -+ ret = last_transfer; -+ printk("dump_dev_write failed !err %d\n", -+ ret); -+ break; -+ } -+ /* wait till complete */ -+ while ((ret = dump_dev_ready(buf)) == -EAGAIN) -+ cpu_relax(); -+ -+ if (ret < 0) { -+ printk("i/o failed !err %d\n", ret); -+ break; -+ } -+ -+ len -= last_transfer; -+ buf += last_transfer; -+ transferred += last_transfer; -+ } -+ return (ret < 0) ? ret : transferred; -+} -+ -+/* default writeout routine for single dump device */ -+/* writes out the dump data ensuring enough space is left for the end marker */ -+int dump_generic_write_buffer(void *buf, unsigned long len) -+{ -+ long written = 0; -+ int err = 0; -+ -+ /* check for space */ -+ if ((err = dump_dev_seek(dump_config.dumper->curr_offset + len + -+ 2*DUMP_BUFFER_SIZE)) < 0) { -+ printk("dump_write_buffer: insuff space after offset 0x%llx\n", -+ dump_config.dumper->curr_offset); -+ return err; -+ } -+ /* alignment check would happen as a side effect of this */ -+ if ((err = dump_dev_seek(dump_config.dumper->curr_offset)) < 0) -+ return err; -+ -+ written = dump_ll_write(buf, len); -+ -+ /* all or none */ -+ -+ if (written < len) -+ written = written ? -ENOSPC : written; -+ else -+ dump_config.dumper->curr_offset += len; -+ -+ return written; -+} -+ -+int dump_generic_configure(unsigned long devid) -+{ -+ struct dump_dev *dev = dump_config.dumper->dev; -+ struct dump_data_filter *filter; -+ void *buf; -+ int ret = 0; -+ -+ /* Allocate the dump buffer and initialize dumper state */ -+ /* Assume that we get aligned addresses */ -+ if (!(buf = dump_alloc_mem(DUMP_BUFFER_SIZE + 3 * DUMP_PAGE_SIZE))) -+ return -ENOMEM; -+ -+ if ((unsigned long)buf & (PAGE_SIZE - 1)) { -+ /* sanity check for page aligned address */ -+ dump_free_mem(buf); -+ return -ENOMEM; /* fixme: better error code */ -+ } -+ -+ /* Initialize the rest of the fields */ -+ dump_config.dumper->dump_buf = buf + DUMP_PAGE_SIZE; -+ dumper_reset(); -+ -+ /* Open the dump device */ -+ if (!dev) -+ return -ENODEV; -+ -+ if ((ret = dev->ops->open(dev, devid))) { -+ return ret; -+ } -+ -+ /* Initialise the memory ranges in the dump filter */ -+ for (filter = dump_config.dumper->filter ;filter->selector; filter++) { -+ if (!filter->start && !filter->end) { -+ filter->start = 0; -+ filter->end = num_physpages << PAGE_SHIFT; -+ } -+ } -+ -+ return 0; -+} -+ -+int dump_generic_unconfigure(void) -+{ -+ struct dump_dev *dev = dump_config.dumper->dev; -+ void *buf = dump_config.dumper->dump_buf; -+ int ret = 0; -+ -+ pr_debug("Generic unconfigure\n"); -+ /* Close the dump device */ -+ if (dev && (ret = dev->ops->release(dev))) -+ return ret; -+ -+ printk("Closed dump device\n"); -+ -+ if (buf) -+ dump_free_mem((buf - DUMP_PAGE_SIZE)); -+ -+ dump_config.dumper->curr_buf = dump_config.dumper->dump_buf = NULL; -+ pr_debug("Released dump buffer\n"); -+ -+ return 0; -+} -+ -+ -+/* Set up the default dump scheme */ -+ -+struct dump_scheme_ops dump_scheme_singlestage_ops = { -+ .configure = dump_generic_configure, -+ .unconfigure = dump_generic_unconfigure, -+ .sequencer = dump_generic_sequencer, -+ .iterator = dump_page_iterator, -+ .save_data = dump_generic_save_data, -+ .skip_data = dump_generic_skip_data, -+ .write_buffer = dump_generic_write_buffer, -+}; -+ -+struct dump_scheme dump_scheme_singlestage = { -+ .name = "single-stage", -+ .ops = &dump_scheme_singlestage_ops -+}; -+ -+/* The single stage dumper comprising all these */ -+struct dumper dumper_singlestage = { -+ .name = "single-stage", -+ .scheme = &dump_scheme_singlestage, -+ .fmt = &dump_fmt_lcrash, -+ .compress = &dump_none_compression, -+ .filter = dump_filter_table, -+ .dev = NULL, -+}; -+ ---- linux-2.5.69/drivers/dump/dump_setup.c.lkcdbase Mon Jun 2 17:29:49 2003 -+++ linux-2.5.69/drivers/dump/dump_setup.c Tue Apr 29 03:37:19 2003 -@@ -0,0 +1,803 @@ -+/* -+ * Standard kernel function entry points for Linux crash dumps. -+ * -+ * Created by: Matt Robinson (yakker@sourceforge.net) -+ * Contributions from SGI, IBM, HP, MCL, and others. -+ * -+ * Copyright (C) 1999 - 2002 Silicon Graphics, Inc. All rights reserved. -+ * Copyright (C) 2000 - 2002 TurboLinux, Inc. All rights reserved. -+ * Copyright (C) 2001 - 2002 Matt D. Robinson. All rights reserved. -+ * Copyright (C) 2002 Free Software Foundation, Inc. All rights reserved. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+/* -+ * ----------------------------------------------------------------------- -+ * -+ * DUMP HISTORY -+ * -+ * This dump code goes back to SGI's first attempts at dumping system -+ * memory on SGI systems running IRIX. A few developers at SGI needed -+ * a way to take this system dump and analyze it, and created 'icrash', -+ * or IRIX Crash. The mechanism (the dumps and 'icrash') were used -+ * by support people to generate crash reports when a system failure -+ * occurred. This was vital for large system configurations that -+ * couldn't apply patch after patch after fix just to hope that the -+ * problems would go away. So the system memory, along with the crash -+ * dump analyzer, allowed support people to quickly figure out what the -+ * problem was on the system with the crash dump. -+ * -+ * In comes Linux. SGI started moving towards the open source community, -+ * and upon doing so, SGI wanted to take its support utilities into Linux -+ * with the hopes that they would end up the in kernel and user space to -+ * be used by SGI's customers buying SGI Linux systems. One of the first -+ * few products to be open sourced by SGI was LKCD, or Linux Kernel Crash -+ * Dumps. LKCD comprises of a patch to the kernel to enable system -+ * dumping, along with 'lcrash', or Linux Crash, to analyze the system -+ * memory dump. A few additional system scripts and kernel modifications -+ * are also included to make the dump mechanism and dump data easier to -+ * process and use. -+ * -+ * As soon as LKCD was released into the open source community, a number -+ * of larger companies started to take advantage of it. Today, there are -+ * many community members that contribute to LKCD, and it continues to -+ * flourish and grow as an open source project. -+ */ -+ -+/* -+ * DUMP TUNABLES -+ * -+ * This is the list of system tunables (via /proc) that are available -+ * for Linux systems. All the read, write, etc., functions are listed -+ * here. Currently, there are a few different tunables for dumps: -+ * -+ * dump_device (used to be dumpdev): -+ * The device for dumping the memory pages out to. This -+ * may be set to the primary swap partition for disruptive dumps, -+ * and must be an unused partition for non-disruptive dumps. -+ * Todo: In the case of network dumps, this may be interpreted -+ * as the IP address of the netdump server to connect to. -+ * -+ * dump_compress (used to be dump_compress_pages): -+ * This is the flag which indicates which compression mechanism -+ * to use. This is a BITMASK, not an index (0,1,2,4,8,16,etc.). -+ * This is the current set of values: -+ * -+ * 0: DUMP_COMPRESS_NONE -- Don't compress any pages. -+ * 1: DUMP_COMPRESS_RLE -- This uses RLE compression. -+ * 2: DUMP_COMPRESS_GZIP -- This uses GZIP compression. -+ * -+ * dump_level: -+ * The amount of effort the dump module should make to save -+ * information for post crash analysis. This value is now -+ * a BITMASK value, not an index: -+ * -+ * 0: Do nothing, no dumping. (DUMP_LEVEL_NONE) -+ * -+ * 1: Print out the dump information to the dump header, and -+ * write it out to the dump_device. (DUMP_LEVEL_HEADER) -+ * -+ * 2: Write out the dump header and all kernel memory pages. -+ * (DUMP_LEVEL_KERN) -+ * -+ * 4: Write out the dump header and all kernel and user -+ * memory pages. (DUMP_LEVEL_USED) -+ * -+ * 8: Write out the dump header and all conventional/cached -+ * memory (RAM) pages in the system (kernel, user, free). -+ * (DUMP_LEVEL_ALL_RAM) -+ * -+ * 16: Write out everything, including non-conventional memory -+ * like firmware, proms, I/O registers, uncached memory. -+ * (DUMP_LEVEL_ALL) -+ * -+ * The dump_level will default to 1. -+ * -+ * dump_flags: -+ * These are the flags to use when talking about dumps. There -+ * are lots of possibilities. This is a BITMASK value, not an index. -+ * -+ * ----------------------------------------------------------------------- -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include "dump_methods.h" -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+/* -+ * ----------------------------------------------------------------------- -+ * V A R I A B L E S -+ * ----------------------------------------------------------------------- -+ */ -+ -+/* Dump tunables */ -+struct dump_config dump_config = { -+ .level = 0, -+ .flags = 0, -+ .dump_device = 0, -+ .dump_addr = 0, -+ .dumper = NULL -+}; -+ -+ -+/* Global variables used in dump.h */ -+/* degree of system freeze when dumping */ -+enum dump_silence_levels dump_silence_level = DUMP_HARD_SPIN_CPUS; -+ -+/* Other global fields */ -+extern struct __dump_header dump_header; -+struct dump_dev *dump_dev = NULL; /* Active dump device */ -+static int dump_compress = 0; -+ -+static u16 dump_compress_none(const u8 *old, u16 oldsize, u8 *new, u16 newsize); -+struct __dump_compress dump_none_compression = { -+ .compress_type = DUMP_COMPRESS_NONE, -+ .compress_func = dump_compress_none, -+ .compress_name = "none", -+}; -+ -+/* our device operations and functions */ -+static int dump_ioctl(struct inode *i, struct file *f, -+ unsigned int cmd, unsigned long arg); -+ -+static struct file_operations dump_fops = { -+ .ioctl = dump_ioctl, -+}; -+ -+/* static variables */ -+static int dump_okay = 0; /* can we dump out to disk? */ -+static spinlock_t dump_lock = SPIN_LOCK_UNLOCKED; -+ -+/* used for dump compressors */ -+static struct list_head dump_compress_list = LIST_HEAD_INIT(dump_compress_list); -+ -+/* list of registered dump targets */ -+static struct list_head dump_target_list = LIST_HEAD_INIT(dump_target_list); -+ -+/* lkcd info structure -- this is used by lcrash for basic system data */ -+struct __lkcdinfo lkcdinfo = { -+ .ptrsz = (sizeof(void *) * 8), -+#if defined(__LITTLE_ENDIAN) -+ .byte_order = __LITTLE_ENDIAN, -+#else -+ .byte_order = __BIG_ENDIAN, -+#endif -+ .page_shift = PAGE_SHIFT, -+ .page_size = PAGE_SIZE, -+ .page_mask = PAGE_MASK, -+ .page_offset = PAGE_OFFSET, -+}; -+ -+/* -+ * ----------------------------------------------------------------------- -+ * / P R O C T U N A B L E F U N C T I O N S -+ * ----------------------------------------------------------------------- -+ */ -+ -+static int proc_dump_device(ctl_table *ctl, int write, struct file *f, -+ void *buffer, size_t *lenp); -+ -+static int proc_doulonghex(ctl_table *ctl, int write, struct file *f, -+ void *buffer, size_t *lenp); -+/* -+ * sysctl-tuning infrastructure. -+ */ -+static ctl_table dump_table[] = { -+ { .ctl_name = CTL_DUMP_LEVEL, -+ .procname = DUMP_LEVEL_NAME, -+ .data = &dump_config.level, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .proc_handler = proc_doulonghex, }, -+ -+ { .ctl_name = CTL_DUMP_FLAGS, -+ .procname = DUMP_FLAGS_NAME, -+ .data = &dump_config.flags, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .proc_handler = proc_doulonghex, }, -+ -+ { .ctl_name = CTL_DUMP_COMPRESS, -+ .procname = DUMP_COMPRESS_NAME, -+ .data = &dump_compress, /* FIXME */ -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .proc_handler = proc_dointvec, }, -+ -+ { .ctl_name = CTL_DUMP_DEVICE, -+ .procname = DUMP_DEVICE_NAME, -+ .mode = 0644, -+ .data = &dump_config.dump_device, /* FIXME */ -+ .maxlen = sizeof(int), -+ .proc_handler = proc_dump_device }, -+ -+#ifdef CONFIG_CRASH_DUMP_MEMDEV -+ { .ctl_name = CTL_DUMP_ADDR, -+ .procname = DUMP_ADDR_NAME, -+ .mode = 0444, -+ .data = &dump_config.dump_addr, -+ .maxlen = sizeof(unsigned long), -+ .proc_handler = proc_doulonghex }, -+#endif -+ -+ { 0, } -+}; -+ -+static ctl_table dump_root[] = { -+ { .ctl_name = KERN_DUMP, -+ .procname = "dump", -+ .mode = 0555, -+ .child = dump_table }, -+ { 0, } -+}; -+ -+static ctl_table kernel_root[] = { -+ { .ctl_name = CTL_KERN, -+ .procname = "kernel", -+ .mode = 0555, -+ .child = dump_root, }, -+ { 0, } -+}; -+ -+static struct ctl_table_header *sysctl_header; -+ -+/* -+ * ----------------------------------------------------------------------- -+ * C O M P R E S S I O N F U N C T I O N S -+ * ----------------------------------------------------------------------- -+ */ -+ -+/* -+ * Name: dump_compress_none() -+ * Func: Don't do any compression, period. -+ */ -+static u16 -+dump_compress_none(const u8 *old, u16 oldsize, u8 *new, u16 newsize) -+{ -+ /* just return the old size */ -+ return oldsize; -+} -+ -+ -+/* -+ * Name: dump_execute() -+ * Func: Execute the dumping process. This makes sure all the appropriate -+ * fields are updated correctly, and calls dump_execute_memdump(), -+ * which does the real work. -+ */ -+void -+dump_execute(const char *panic_str, const struct pt_regs *regs) -+{ -+ int state = -1; -+ unsigned long flags; -+ -+ /* make sure we can dump */ -+ if (!dump_okay) { -+ pr_info("LKCD not yet configured, can't take dump now\n"); -+ return; -+ } -+ -+ /* Exclude multiple dumps at the same time, -+ * and disable interrupts, some drivers may re-enable -+ * interrupts in with silence() -+ * -+ * Try and acquire spin lock. If successful, leave preempt -+ * and interrupts disabled. See spin_lock_irqsave in spinlock.h -+ */ -+ local_irq_save(flags); -+ if (!spin_trylock(&dump_lock)) { -+ local_irq_restore(flags); -+ pr_info("LKCD dump already in progress\n"); -+ return; -+ } -+ -+ /* Bring system into the strictest level of quiescing for min drift -+ * dump drivers can soften this as required in dev->ops->silence() -+ */ -+ dump_oncpu = smp_processor_id() + 1; -+ dump_silence_level = DUMP_HARD_SPIN_CPUS; -+ -+ state = dump_generic_execute(panic_str, regs); -+ -+ dump_oncpu = 0; -+ spin_unlock_irqrestore(&dump_lock, flags); -+ -+ if (state < 0) { -+ printk("Dump Incomplete or failed!\n"); -+ } else { -+ printk("Dump Complete; %d dump pages saved.\n", -+ dump_header.dh_num_dump_pages); -+ } -+} -+ -+/* -+ * Name: dump_register_compression() -+ * Func: Register a dump compression mechanism. -+ */ -+void -+dump_register_compression(struct __dump_compress *item) -+{ -+ if (item) -+ list_add(&(item->list), &dump_compress_list); -+} -+ -+/* -+ * Name: dump_unregister_compression() -+ * Func: Remove a dump compression mechanism, and re-assign the dump -+ * compression pointer if necessary. -+ */ -+void -+dump_unregister_compression(int compression_type) -+{ -+ struct list_head *tmp; -+ struct __dump_compress *dc; -+ -+ /* let's make sure our list is valid */ -+ if (compression_type != DUMP_COMPRESS_NONE) { -+ list_for_each(tmp, &dump_compress_list) { -+ dc = list_entry(tmp, struct __dump_compress, list); -+ if (dc->compress_type == compression_type) { -+ list_del(&(dc->list)); -+ break; -+ } -+ } -+ } -+} -+ -+/* -+ * Name: dump_compress_init() -+ * Func: Initialize (or re-initialize) compression scheme. -+ */ -+static int -+dump_compress_init(int compression_type) -+{ -+ struct list_head *tmp; -+ struct __dump_compress *dc; -+ -+ /* try to remove the compression item */ -+ list_for_each(tmp, &dump_compress_list) { -+ dc = list_entry(tmp, struct __dump_compress, list); -+ if (dc->compress_type == compression_type) { -+ dump_config.dumper->compress = dc; -+ dump_compress = compression_type; -+ pr_debug("Dump Compress %s\n", dc->compress_name); -+ return 0; -+ } -+ } -+ -+ /* -+ * nothing on the list -- return ENODATA to indicate an error -+ * -+ * NB: -+ * EAGAIN: reports "Resource temporarily unavailable" which -+ * isn't very enlightening. -+ */ -+ printk("compression_type:%d not found\n", compression_type); -+ -+ return -ENODATA; -+} -+ -+static int -+dumper_setup(unsigned long flags, unsigned long devid) -+{ -+ int ret = 0; -+ -+ /* unconfigure old dumper if it exists */ -+ dump_okay = 0; -+ if (dump_config.dumper) { -+ pr_debug("Unconfiguring current dumper\n"); -+ dump_unconfigure(); -+ } -+ /* set up new dumper */ -+ if (dump_config.flags & DUMP_FLAGS_SOFTBOOT) { -+ printk("Configuring softboot based dump \n"); -+#ifdef CONFIG_CRASH_DUMP_MEMDEV -+ dump_config.dumper = &dumper_stage1; -+#else -+ printk("Requires CONFIG_CRASHDUMP_MEMDEV. Can't proceed.\n"); -+ return -1; -+#endif -+ } else { -+ dump_config.dumper = &dumper_singlestage; -+ } -+ dump_config.dumper->dev = dump_dev; -+ -+ ret = dump_configure(devid); -+ if (!ret) { -+ dump_okay = 1; -+ pr_debug("%s dumper set up for dev 0x%lx\n", -+ dump_config.dumper->name, devid); -+ dump_config.dump_device = devid; -+ } else { -+ printk("%s dumper set up failed for dev 0x%lx\n", -+ dump_config.dumper->name, devid); -+ dump_config.dumper = NULL; -+ } -+ return ret; -+} -+ -+static int -+dump_target_init(int target) -+{ -+ char type[20]; -+ struct list_head *tmp; -+ struct dump_dev *dev; -+ -+ switch (target) { -+ case DUMP_FLAGS_DISKDUMP: -+ strcpy(type, "blockdev"); break; -+ case DUMP_FLAGS_NETDUMP: -+ strcpy(type, "networkdev"); break; -+ default: -+ return -1; -+ } -+ -+ /* -+ * This is a bit stupid, generating strings from flag -+ * and doing strcmp. This is done because 'struct dump_dev' -+ * has string 'type_name' and not interger 'type'. -+ */ -+ list_for_each(tmp, &dump_target_list) { -+ dev = list_entry(tmp, struct dump_dev, list); -+ if (strcmp(type, dev->type_name) == 0) { -+ dump_dev = dev; -+ return 0; -+ } -+ } -+ return -1; -+} -+ -+/* -+ * Name: dump_ioctl() -+ * Func: Allow all dump tunables through a standard ioctl() mechanism. -+ * This is far better than before, where we'd go through /proc, -+ * because now this will work for multiple OS and architectures. -+ */ -+static int -+dump_ioctl(struct inode *i, struct file *f, unsigned int cmd, unsigned long arg) -+{ -+ /* check capabilities */ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if (!dump_config.dumper && cmd == DIOSDUMPCOMPRESS) -+ /* dump device must be configured first */ -+ return -ENODEV; -+ -+ /* -+ * This is the main mechanism for controlling get/set data -+ * for various dump device parameters. The real trick here -+ * is setting the dump device (DIOSDUMPDEV). That's what -+ * triggers everything else. -+ */ -+ switch (cmd) { -+ case DIOSDUMPDEV: /* set dump_device */ -+ pr_debug("Configuring dump device\n"); -+ if (!(f->f_flags & O_RDWR)) -+ return -EPERM; -+ -+ __dump_open(); -+ return dumper_setup(dump_config.flags, arg); -+ -+ -+ case DIOGDUMPDEV: /* get dump_device */ -+ return put_user((long)dump_config.dump_device, (long *)arg); -+ -+ case DIOSDUMPLEVEL: /* set dump_level */ -+ if (!(f->f_flags & O_RDWR)) -+ return -EPERM; -+ -+ /* make sure we have a positive value */ -+ if (arg < 0) -+ return -EINVAL; -+ -+ /* Fixme: clean this up */ -+ dump_config.level = 0; -+ switch ((int)arg) { -+ case DUMP_LEVEL_ALL: -+ case DUMP_LEVEL_ALL_RAM: -+ dump_config.level |= DUMP_MASK_UNUSED; -+ case DUMP_LEVEL_USED: -+ dump_config.level |= DUMP_MASK_USED; -+ case DUMP_LEVEL_KERN: -+ dump_config.level |= DUMP_MASK_KERN; -+ case DUMP_LEVEL_HEADER: -+ dump_config.level |= DUMP_MASK_HEADER; -+ case DUMP_LEVEL_NONE: -+ break; -+ default: -+ return (-EINVAL); -+ } -+ pr_debug("Dump Level 0x%lx\n", dump_config.level); -+ break; -+ -+ case DIOGDUMPLEVEL: /* get dump_level */ -+ /* fixme: handle conversion */ -+ return put_user((long)dump_config.level, (long *)arg); -+ -+ -+ case DIOSDUMPFLAGS: /* set dump_flags */ -+ /* check flags */ -+ if (!(f->f_flags & O_RDWR)) -+ return -EPERM; -+ -+ /* make sure we have a positive value */ -+ if (arg < 0) -+ return -EINVAL; -+ -+ if (dump_target_init(arg & DUMP_FLAGS_TARGETMASK) < 0) -+ return -EINVAL; /* return proper error */ -+ -+ dump_config.flags = arg; -+ -+ pr_debug("Dump Flags 0x%lx\n", dump_config.flags); -+ break; -+ -+ case DIOGDUMPFLAGS: /* get dump_flags */ -+ return put_user((long)dump_config.flags, (long *)arg); -+ -+ case DIOSDUMPCOMPRESS: /* set the dump_compress status */ -+ if (!(f->f_flags & O_RDWR)) -+ return -EPERM; -+ -+ return dump_compress_init((int)arg); -+ -+ case DIOGDUMPCOMPRESS: /* get the dump_compress status */ -+ return put_user((long)(dump_config.dumper ? -+ dump_config.dumper->compress->compress_type : 0), -+ (long *)arg); -+ -+ default: -+ /* -+ * these are network dump specific ioctls, let the -+ * module handle them. -+ */ -+ return dump_dev_ioctl(cmd, arg); -+ } -+ return 0; -+} -+ -+/* -+ * Handle special cases for dump_device -+ * changing dump device requires doing an opening the device -+ */ -+static int -+proc_dump_device(ctl_table *ctl, int write, struct file *f, -+ void *buffer, size_t *lenp) -+{ -+ int *valp = ctl->data; -+ int oval = *valp; -+ int ret = -EPERM; -+ -+ /* same permission checks as ioctl */ -+ if (capable(CAP_SYS_ADMIN)) { -+ ret = proc_doulonghex(ctl, write, f, buffer, lenp); -+ if (ret == 0 && write && *valp != oval) { -+ /* need to restore old value to close properly */ -+ dump_config.dump_device = (dev_t) oval; -+ __dump_open(); -+ ret = dumper_setup(dump_config.flags, (dev_t) *valp); -+ } -+ } -+ -+ return ret; -+} -+ -+/* All for the want of a proc_do_xxx routine which prints values in hex */ -+static int -+proc_doulonghex(ctl_table *ctl, int write, struct file *f, -+ void *buffer, size_t *lenp) -+{ -+#define TMPBUFLEN 20 -+ unsigned long *i; -+ size_t len, left; -+ char buf[TMPBUFLEN]; -+ -+ if (!ctl->data || !ctl->maxlen || !*lenp || (f->f_pos)) { -+ *lenp = 0; -+ return 0; -+ } -+ -+ i = (unsigned long *) ctl->data; -+ left = *lenp; -+ -+ sprintf(buf, "0x%lx\n", (*i)); -+ len = strlen(buf); -+ if (len > left) -+ len = left; -+ if(copy_to_user(buffer, buf, len)) -+ return -EFAULT; -+ -+ left -= len; -+ *lenp -= left; -+ f->f_pos += *lenp; -+ return 0; -+} -+ -+/* -+ * ----------------------------------------------------------------------- -+ * I N I T F U N C T I O N S -+ * ----------------------------------------------------------------------- -+ */ -+ -+/* -+ * These register and unregister routines are exported for modules -+ * to register their dump drivers (like block, net etc) -+ */ -+int -+dump_register_device(struct dump_dev *ddev) -+{ -+ struct list_head *tmp; -+ struct dump_dev *dev; -+ -+ list_for_each(tmp, &dump_target_list) { -+ dev = list_entry(tmp, struct dump_dev, list); -+ if (strcmp(ddev->type_name, dev->type_name) == 0) { -+ printk("Target type %s already registered\n", -+ dev->type_name); -+ return -1; /* return proper error */ -+ } -+ } -+ list_add(&(ddev->list), &dump_target_list); -+ -+ return 0; -+} -+ -+void -+dump_unregister_device(struct dump_dev *ddev) -+{ -+ list_del(&(ddev->list)); -+ if (ddev != dump_dev) -+ return; -+ -+ dump_okay = 0; -+ -+ if (dump_config.dumper) -+ dump_unconfigure(); -+ -+ dump_config.flags &= ~DUMP_FLAGS_TARGETMASK; -+ dump_okay = 0; -+ dump_dev = NULL; -+ dump_config.dumper = NULL; -+} -+ -+static int panic_event(struct notifier_block *this, unsigned long event, -+ void *ptr) -+{ -+ struct pt_regs regs; -+ -+ get_current_regs(®s); -+ dump_execute((const char *)ptr, ®s); -+ return 0; -+} -+ -+extern struct notifier_block *panic_notifier_list; -+static int panic_event(struct notifier_block *, unsigned long, void *); -+static struct notifier_block panic_block = { -+ .notifier_call = panic_event, -+}; -+ -+#ifdef CONFIG_MAGIC_SYSRQ -+/* Sysrq handler */ -+static void sysrq_handle_crashdump(int key, struct pt_regs *pt_regs, -+ struct tty_struct *tty) { -+ dump_execute("sysrq", pt_regs); -+} -+ -+static struct sysrq_key_op sysrq_crashdump_op = { -+ .handler = sysrq_handle_crashdump, -+ .help_msg = "Dump", -+ .action_msg = "Starting crash dump", -+}; -+#endif -+ -+static inline void -+dump_sysrq_register(void) -+{ -+#ifdef CONFIG_MAGIC_SYSRQ -+ __sysrq_lock_table(); -+ __sysrq_put_key_op(DUMP_SYSRQ_KEY, &sysrq_crashdump_op); -+ __sysrq_unlock_table(); -+#endif -+} -+ -+static inline void -+dump_sysrq_unregister(void) -+{ -+#ifdef CONFIG_MAGIC_SYSRQ -+ __sysrq_lock_table(); -+ if (__sysrq_get_key_op(DUMP_SYSRQ_KEY) == &sysrq_crashdump_op) -+ __sysrq_put_key_op(DUMP_SYSRQ_KEY, NULL); -+ __sysrq_unlock_table(); -+#endif -+} -+ -+/* -+ * Name: dump_init() -+ * Func: Initialize the dump process. This will set up any architecture -+ * dependent code. The big key is we need the memory offsets before -+ * the page table is initialized, because the base memory offset -+ * is changed after paging_init() is called. -+ */ -+static int __init -+dump_init(void) -+{ -+ struct sysinfo info; -+ -+ /* try to create our dump device */ -+ if (register_chrdev(CRASH_DUMP_MAJOR, "dump", &dump_fops)) { -+ printk("cannot register dump character device!\n"); -+ return -EBUSY; -+ } -+ -+ __dump_init((u64)PAGE_OFFSET); -+ -+ /* set the dump_compression_list structure up */ -+ dump_register_compression(&dump_none_compression); -+ -+ /* grab the total memory size now (not if/when we crash) */ -+ si_meminfo(&info); -+ -+ /* set the memory size */ -+ dump_header.dh_memory_size = (u64)info.totalram; -+ -+ sysctl_header = register_sysctl_table(kernel_root, 0); -+ dump_sysrq_register(); -+ -+ notifier_chain_register(&panic_notifier_list, &panic_block); -+ dump_function_ptr = dump_execute; -+ -+ pr_info("Crash dump driver initialized.\n"); -+ return 0; -+} -+ -+static void __exit -+dump_cleanup(void) -+{ -+ dump_okay = 0; -+ -+ if (dump_config.dumper) -+ dump_unconfigure(); -+ -+ /* arch-specific cleanup routine */ -+ __dump_cleanup(); -+ -+ /* ignore errors while unregistering -- since can't do anything */ -+ unregister_sysctl_table(sysctl_header); -+ unregister_chrdev(CRASH_DUMP_MAJOR, "dump"); -+ dump_sysrq_unregister(); -+ notifier_chain_unregister(&panic_notifier_list, &panic_block); -+ dump_function_ptr = NULL; -+} -+ -+EXPORT_SYMBOL(dump_register_compression); -+EXPORT_SYMBOL(dump_unregister_compression); -+EXPORT_SYMBOL(dump_register_device); -+EXPORT_SYMBOL(dump_unregister_device); -+EXPORT_SYMBOL(dump_config); -+EXPORT_SYMBOL(dump_silence_level); -+ -+EXPORT_SYMBOL(__dump_irq_enable); -+EXPORT_SYMBOL(__dump_irq_restore); -+ -+MODULE_AUTHOR("Matt D. Robinson "); -+MODULE_DESCRIPTION("Linux Kernel Crash Dump (LKCD) driver"); -+MODULE_LICENSE("GPL"); -+ -+module_init(dump_init); -+module_exit(dump_cleanup); ---- linux-2.5.69/include/linux/dumpdev.h.lkcdbase Mon Jun 2 17:28:52 2003 -+++ linux-2.5.69/include/linux/dumpdev.h Mon Jun 2 17:31:01 2003 -@@ -0,0 +1,161 @@ -+/* -+ * Generic dump device interfaces for flexible system dump -+ * (Enables variation of dump target types e.g disk, network, memory) -+ * -+ * These interfaces have evolved based on discussions on lkcd-devel. -+ * Eventually the intent is to support primary and secondary or -+ * alternate targets registered at the same time, with scope for -+ * situation based failover or multiple dump devices used for parallel -+ * dump i/o. -+ * -+ * Started: Oct 2002 - Suparna Bhattacharya (suparna@in.ibm.com) -+ * -+ * Copyright (C) 2001 - 2002 Matt D. Robinson. All rights reserved. -+ * Copyright (C) 2002 International Business Machines Corp. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+#ifndef _LINUX_DUMPDEV_H -+#define _LINUX_DUMPDEV_H -+ -+#include -+#include -+#include -+ -+/* Determined by the dump target (device) type */ -+ -+struct dump_dev; -+ -+struct dump_dev_ops { -+ int (*open)(struct dump_dev *, unsigned long); /* configure */ -+ int (*release)(struct dump_dev *); /* unconfigure */ -+ int (*silence)(struct dump_dev *); /* when dump starts */ -+ int (*resume)(struct dump_dev *); /* when dump is over */ -+ int (*seek)(struct dump_dev *, loff_t); -+ /* trigger a write (async in nature typically) */ -+ int (*write)(struct dump_dev *, void *, unsigned long); -+ /* not usually used during dump, but option available */ -+ int (*read)(struct dump_dev *, void *, unsigned long); -+ /* use to poll for completion */ -+ int (*ready)(struct dump_dev *, void *); -+ int (*ioctl)(struct dump_dev *, unsigned int, unsigned long); -+}; -+ -+struct dump_dev { -+ char type_name[32]; /* block, net-poll etc */ -+ unsigned long device_id; /* interpreted differently for various types */ -+ struct dump_dev_ops *ops; -+ struct list_head list; -+ loff_t curr_offset; -+}; -+ -+/* -+ * dump_dev type variations: -+ */ -+ -+/* block */ -+struct dump_blockdev { -+ struct dump_dev ddev; -+ kdev_t kdev_id; -+ struct block_device *bdev; -+ struct bio *bio; -+ loff_t start_offset; -+ loff_t limit; -+ int err; -+}; -+ -+static inline struct dump_blockdev *DUMP_BDEV(struct dump_dev *dev) -+{ -+ return container_of(dev, struct dump_blockdev, ddev); -+} -+ -+ -+/* mem - for internal use by soft-boot based dumper */ -+struct dump_memdev { -+ struct dump_dev ddev; -+ unsigned long indirect_map_root; -+ unsigned long nr_free; -+ struct page *curr_page; -+ unsigned long *curr_map; -+ unsigned long curr_map_offset; -+ unsigned long last_offset; -+ unsigned long last_used_offset; -+ unsigned long last_bs_offset; -+}; -+ -+static inline struct dump_memdev *DUMP_MDEV(struct dump_dev *dev) -+{ -+ return container_of(dev, struct dump_memdev, ddev); -+} -+ -+/* Todo/future - meant for raw dedicated interfaces e.g. mini-ide driver */ -+struct dump_rdev { -+ struct dump_dev ddev; -+ char name[32]; -+ int (*reset)(struct dump_rdev *, unsigned int, -+ unsigned long); -+ /* ... to do ... */ -+}; -+ -+/* just to get the size right when saving config across a soft-reboot */ -+struct dump_anydev { -+ union { -+ struct dump_blockdev bddev; -+ /* .. add other types here .. */ -+ }; -+}; -+ -+ -+ -+/* Dump device / target operation wrappers */ -+/* These assume that dump_dev is initiatized to dump_config.dumper->dev */ -+ -+extern struct dump_dev *dump_dev; -+ -+static inline int dump_dev_open(unsigned long arg) -+{ -+ return dump_dev->ops->open(dump_dev, arg); -+} -+ -+static inline int dump_dev_release(void) -+{ -+ return dump_dev->ops->release(dump_dev); -+} -+ -+static inline int dump_dev_silence(void) -+{ -+ return dump_dev->ops->silence(dump_dev); -+} -+ -+static inline int dump_dev_resume(void) -+{ -+ return dump_dev->ops->resume(dump_dev); -+} -+ -+static inline int dump_dev_seek(loff_t offset) -+{ -+ return dump_dev->ops->seek(dump_dev, offset); -+} -+ -+static inline int dump_dev_write(void *buf, unsigned long len) -+{ -+ return dump_dev->ops->write(dump_dev, buf, len); -+} -+ -+static inline int dump_dev_ready(void *buf) -+{ -+ return dump_dev->ops->ready(dump_dev, buf); -+} -+ -+static inline int dump_dev_ioctl(unsigned int cmd, unsigned long arg) -+{ -+ if (!dump_dev->ops->ioctl) -+ return -EINVAL; -+ return dump_dev->ops->ioctl(dump_dev, cmd, arg); -+} -+ -+extern int dump_register_device(struct dump_dev *); -+extern void dump_unregister_device(struct dump_dev *); -+ -+#endif /* _LINUX_DUMPDEV_H */ ---- linux-2.5.69/include/linux/dump.h.lkcdbase Mon Jun 2 17:28:56 2003 -+++ linux-2.5.69/include/linux/dump.h Mon Jun 2 17:31:01 2003 -@@ -0,0 +1,376 @@ -+/* -+ * Kernel header file for Linux crash dumps. -+ * -+ * Created by: Matt Robinson (yakker@sgi.com) -+ * Copyright 1999 - 2002 Silicon Graphics, Inc. All rights reserved. -+ * -+ * vmdump.h to dump.h by: Matt D. Robinson (yakker@sourceforge.net) -+ * Copyright 2001 - 2002 Matt D. Robinson. All rights reserved. -+ * Copyright (C) 2002 Free Software Foundation, Inc. All rights reserved. -+ * -+ * Most of this is the same old stuff from vmdump.h, except now we're -+ * actually a stand-alone driver plugged into the block layer interface, -+ * with the exception that we now allow for compression modes externally -+ * loaded (e.g., someone can come up with their own). -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+/* This header file includes all structure definitions for crash dumps. */ -+#ifndef _DUMP_H -+#define _DUMP_H -+ -+#if defined(CONFIG_CRASH_DUMP) || defined (CONFIG_CRASH_DUMP_MODULE) -+ -+#include -+#include -+#include -+ -+/* -+ * Predefine default DUMP_PAGE constants, asm header may override. -+ * -+ * On ia64 discontinuous memory systems it's possible for the memory -+ * banks to stop at 2**12 page alignments, the smallest possible page -+ * size. But the system page size, PAGE_SIZE, is in fact larger. -+ */ -+#define DUMP_PAGE_SHIFT PAGE_SHIFT -+#define DUMP_PAGE_MASK PAGE_MASK -+#define DUMP_PAGE_ALIGN(addr) PAGE_ALIGN(addr) -+#define DUMP_HEADER_OFFSET PAGE_SIZE -+ -+/* keep DUMP_PAGE_SIZE constant to 4K = 1<<12 -+ * it may be different from PAGE_SIZE then. -+ */ -+#define DUMP_PAGE_SIZE 4096 -+ -+/* -+ * Predefined default memcpy() to use when copying memory to the dump buffer. -+ * -+ * On ia64 there is a heads up function that can be called to let the prom -+ * machine check monitor know that the current activity is risky and it should -+ * ignore the fault (nofault). In this case the ia64 header will redefine this -+ * macro to __dump_memcpy() and use it's arch specific version. -+ */ -+#define DUMP_memcpy memcpy -+ -+/* necessary header files */ -+#include /* for architecture-specific header */ -+ -+/* -+ * Size of the buffer that's used to hold: -+ * -+ * 1. the dump header (padded to fill the complete buffer) -+ * 2. the possibly compressed page headers and data -+ */ -+#define DUMP_BUFFER_SIZE (64 * 1024) /* size of dump buffer */ -+#define DUMP_HEADER_SIZE DUMP_BUFFER_SIZE -+ -+/* standard header definitions */ -+#define DUMP_MAGIC_NUMBER 0xa8190173618f23edULL /* dump magic number */ -+#define DUMP_MAGIC_LIVE 0xa8190173618f23cdULL /* live magic number */ -+#define DUMP_VERSION_NUMBER 0x8 /* dump version number */ -+#define DUMP_PANIC_LEN 0x100 /* dump panic string length */ -+ -+/* dump levels - type specific stuff added later -- add as necessary */ -+#define DUMP_LEVEL_NONE 0x0 /* no dumping at all -- just bail */ -+#define DUMP_LEVEL_HEADER 0x1 /* kernel dump header only */ -+#define DUMP_LEVEL_KERN 0x2 /* dump header and kernel pages */ -+#define DUMP_LEVEL_USED 0x4 /* dump header, kernel/user pages */ -+#define DUMP_LEVEL_ALL_RAM 0x8 /* dump header, all RAM pages */ -+#define DUMP_LEVEL_ALL 0x10 /* dump all memory RAM and firmware */ -+ -+ -+/* dump compression options -- add as necessary */ -+#define DUMP_COMPRESS_NONE 0x0 /* don't compress this dump */ -+#define DUMP_COMPRESS_RLE 0x1 /* use RLE compression */ -+#define DUMP_COMPRESS_GZIP 0x2 /* use GZIP compression */ -+ -+/* dump flags - any dump-type specific flags -- add as necessary */ -+#define DUMP_FLAGS_NONE 0x0 /* no flags are set for this dump */ -+#define DUMP_FLAGS_SOFTBOOT 0x2 /* 2 stage soft-boot based dump */ -+ -+#define DUMP_FLAGS_TARGETMASK 0xf0000000 /* handle special case targets */ -+#define DUMP_FLAGS_DISKDUMP 0x80000000 /* dump to local disk */ -+#define DUMP_FLAGS_NETDUMP 0x40000000 /* dump over the network */ -+ -+/* dump header flags -- add as necessary */ -+#define DUMP_DH_FLAGS_NONE 0x0 /* no flags set (error condition!) */ -+#define DUMP_DH_RAW 0x1 /* raw page (no compression) */ -+#define DUMP_DH_COMPRESSED 0x2 /* page is compressed */ -+#define DUMP_DH_END 0x4 /* end marker on a full dump */ -+#define DUMP_DH_TRUNCATED 0x8 /* dump is incomplete */ -+#define DUMP_DH_TEST_PATTERN 0x10 /* dump page is a test pattern */ -+#define DUMP_DH_NOT_USED 0x20 /* 1st bit not used in flags */ -+ -+/* names for various dump parameters in /proc/kernel */ -+#define DUMP_ROOT_NAME "sys/dump" -+#define DUMP_DEVICE_NAME "device" -+#define DUMP_COMPRESS_NAME "compress" -+#define DUMP_LEVEL_NAME "level" -+#define DUMP_FLAGS_NAME "flags" -+#define DUMP_ADDR_NAME "addr" -+ -+#define DUMP_SYSRQ_KEY 'd' /* key to use for MAGIC_SYSRQ key */ -+ -+/* CTL_DUMP names: */ -+enum -+{ -+ CTL_DUMP_DEVICE=1, -+ CTL_DUMP_COMPRESS=3, -+ CTL_DUMP_LEVEL=3, -+ CTL_DUMP_FLAGS=4, -+ CTL_DUMP_ADDR=5, -+ CTL_DUMP_TEST=6, -+}; -+ -+ -+/* page size for gzip compression -- buffered slightly beyond hardware PAGE_SIZE used by DUMP */ -+#define DUMP_DPC_PAGE_SIZE (DUMP_PAGE_SIZE + 512) -+ -+/* dump ioctl() control options */ -+#define DIOSDUMPDEV 1 /* set the dump device */ -+#define DIOGDUMPDEV 2 /* get the dump device */ -+#define DIOSDUMPLEVEL 3 /* set the dump level */ -+#define DIOGDUMPLEVEL 4 /* get the dump level */ -+#define DIOSDUMPFLAGS 5 /* set the dump flag parameters */ -+#define DIOGDUMPFLAGS 6 /* get the dump flag parameters */ -+#define DIOSDUMPCOMPRESS 7 /* set the dump compress level */ -+#define DIOGDUMPCOMPRESS 8 /* get the dump compress level */ -+ -+/* these ioctls are used only by netdump module */ -+#define DIOSTARGETIP 9 /* set the target m/c's ip */ -+#define DIOGTARGETIP 10 /* get the target m/c's ip */ -+#define DIOSTARGETPORT 11 /* set the target m/c's port */ -+#define DIOGTARGETPORT 12 /* get the target m/c's port */ -+#define DIOSSOURCEPORT 13 /* set the source m/c's port */ -+#define DIOGSOURCEPORT 14 /* get the source m/c's port */ -+#define DIOSETHADDR 15 /* set ethernet address */ -+#define DIOGETHADDR 16 /* get ethernet address */ -+ -+/* -+ * Structure: __dump_header -+ * Function: This is the header dumped at the top of every valid crash -+ * dump. -+ */ -+struct __dump_header { -+ /* the dump magic number -- unique to verify dump is valid */ -+ u64 dh_magic_number; -+ -+ /* the version number of this dump */ -+ u32 dh_version; -+ -+ /* the size of this header (in case we can't read it) */ -+ u32 dh_header_size; -+ -+ /* the level of this dump (just a header?) */ -+ u32 dh_dump_level; -+ -+ /* -+ * We assume dump_page_size to be 4K in every case. -+ * Store here the configurable system page size (4K, 8K, 16K, etc.) -+ */ -+ u32 dh_page_size; -+ -+ /* the size of all physical memory */ -+ u64 dh_memory_size; -+ -+ /* the start of physical memory */ -+ u64 dh_memory_start; -+ -+ /* the end of physical memory */ -+ u64 dh_memory_end; -+ -+ /* the number of hardware/physical pages in this dump specifically */ -+ u32 dh_num_dump_pages; -+ -+ /* the panic string, if available */ -+ char dh_panic_string[DUMP_PANIC_LEN]; -+ -+ /* timeval depends on architecture, two long values */ -+ struct { -+ u64 tv_sec; -+ u64 tv_usec; -+ } dh_time; /* the time of the system crash */ -+ -+ /* the NEW utsname (uname) information -- in character form */ -+ /* we do this so we don't have to include utsname.h */ -+ /* plus it helps us be more architecture independent */ -+ /* now maybe one day soon they'll make the [65] a #define! */ -+ char dh_utsname_sysname[65]; -+ char dh_utsname_nodename[65]; -+ char dh_utsname_release[65]; -+ char dh_utsname_version[65]; -+ char dh_utsname_machine[65]; -+ char dh_utsname_domainname[65]; -+ -+ /* the address of current task (OLD = void *, NEW = u64) */ -+ u64 dh_current_task; -+ -+ /* what type of compression we're using in this dump (if any) */ -+ u32 dh_dump_compress; -+ -+ /* any additional flags */ -+ u32 dh_dump_flags; -+ -+ /* any additional flags */ -+ u32 dh_dump_device; -+} __attribute__((packed)); -+ -+/* -+ * Structure: __dump_page -+ * Function: To act as the header associated to each physical page of -+ * memory saved in the system crash dump. This allows for -+ * easy reassembly of each crash dump page. The address bits -+ * are split to make things easier for 64-bit/32-bit system -+ * conversions. -+ * -+ * dp_byte_offset and dp_page_index are landmarks that are helpful when -+ * looking at a hex dump of /dev/vmdump, -+ */ -+struct __dump_page { -+ /* the address of this dump page */ -+ u64 dp_address; -+ -+ /* the size of this dump page */ -+ u32 dp_size; -+ -+ /* flags (currently DUMP_COMPRESSED, DUMP_RAW or DUMP_END) */ -+ u32 dp_flags; -+} __attribute__((packed)); -+ -+/* -+ * Structure: __lkcdinfo -+ * Function: This structure contains information needed for the lkcdutils -+ * package (particularly lcrash) to determine what information is -+ * associated to this kernel, specifically. -+ */ -+struct __lkcdinfo { -+ int arch; -+ int ptrsz; -+ int byte_order; -+ int linux_release; -+ int page_shift; -+ int page_size; -+ u64 page_mask; -+ u64 page_offset; -+ int stack_offset; -+}; -+ -+#ifdef __KERNEL__ -+ -+/* -+ * Structure: __dump_compress -+ * Function: This is what an individual compression mechanism can use -+ * to plug in their own compression techniques. It's always -+ * best to build these as individual modules so that people -+ * can put in whatever they want. -+ */ -+struct __dump_compress { -+ /* the list_head structure for list storage */ -+ struct list_head list; -+ -+ /* the type of compression to use (DUMP_COMPRESS_XXX) */ -+ int compress_type; -+ const char *compress_name; -+ -+ /* the compression function to call */ -+ u16 (*compress_func)(const u8 *, u16, u8 *, u16); -+}; -+ -+/* functions for dump compression registration */ -+extern void dump_register_compression(struct __dump_compress *); -+extern void dump_unregister_compression(int); -+ -+/* -+ * Structure dump_mbank[]: -+ * -+ * For CONFIG_DISCONTIGMEM systems this array specifies the -+ * memory banks/chunks that need to be dumped after a panic. -+ * -+ * For classic systems it specifies a single set of pages from -+ * 0 to max_mapnr. -+ */ -+struct __dump_mbank { -+ u64 start; -+ u64 end; -+ int type; -+ int pad1; -+ long pad2; -+}; -+ -+#define DUMP_MBANK_TYPE_CONVENTIONAL_MEMORY 1 -+#define DUMP_MBANK_TYPE_OTHER 2 -+ -+#define MAXCHUNKS 256 -+extern int dump_mbanks; -+extern struct __dump_mbank dump_mbank[MAXCHUNKS]; -+ -+/* notification event codes */ -+#define DUMP_BEGIN 0x0001 /* dump beginning */ -+#define DUMP_END 0x0002 /* dump ending */ -+ -+/* Scheduler soft spin control. -+ * -+ * 0 - no dump in progress -+ * 1 - cpu0 is dumping, ... -+ */ -+extern unsigned long dump_oncpu; -+extern void dump_execute(const char *, const struct pt_regs *); -+ -+/* -+ * Notifier list for kernel code which wants to be called -+ * at kernel dump. -+ */ -+extern struct notifier_block *dump_notifier_list; -+static inline int register_dump_notifier(struct notifier_block *nb) -+{ -+ return notifier_chain_register(&dump_notifier_list, nb); -+} -+static inline int unregister_dump_notifier(struct notifier_block * nb) -+{ -+ return notifier_chain_unregister(&dump_notifier_list, nb); -+} -+ -+extern void (*dump_function_ptr)(const char *, const struct pt_regs *); -+static inline void dump(char * str, struct pt_regs * regs) -+{ -+ if (dump_function_ptr) -+ dump_function_ptr(str, regs); -+} -+ -+/* -+ * Common Arch Specific Functions should be declared here. -+ * This allows the C compiler to detect discrepancies. -+ */ -+extern void __dump_open(void); -+extern void __dump_cleanup(void); -+extern void __dump_init(u64); -+extern void __dump_save_regs(struct pt_regs *, const struct pt_regs *); -+extern int __dump_configure_header(const struct pt_regs *); -+extern void __dump_irq_enable(void); -+extern void __dump_irq_restore(void); -+extern int __dump_page_valid(unsigned long index); -+#ifdef CONFIG_SMP -+extern void __dump_save_other_cpus(void); -+#else -+#define __dump_save_other_cpus() -+#endif -+ -+/* to track all used (compound + zero order) pages */ -+#define PageInuse(p) (PageCompound(p) || page_count(p)) -+ -+#endif /* __KERNEL__ */ -+ -+#else /* !CONFIG_CRASH_DUMP */ -+ -+/* If not configured then make code disappear! */ -+#define register_dump_watchdog(x) do { } while(0) -+#define unregister_dump_watchdog(x) do { } while(0) -+#define register_dump_notifier(x) do { } while(0) -+#define unregister_dump_notifier(x) do { } while(0) -+#define dump_in_progress() 0 -+#define dump(x, y) do { } while(0) -+ -+#endif /* !CONFIG_CRASH_DUMP */ -+ -+#endif /* _DUMP_H */ ---- linux-2.5.69/include/linux/dump_netdev.h.lkcdbase Mon Jun 2 17:29:01 2003 -+++ linux-2.5.69/include/linux/dump_netdev.h Mon Jun 2 17:31:01 2003 -@@ -0,0 +1,80 @@ -+/* -+ * linux/drivers/net/netconsole.h -+ * -+ * Copyright (C) 2001 Ingo Molnar -+ * -+ * This file contains the implementation of an IRQ-safe, crash-safe -+ * kernel console implementation that outputs kernel messages to the -+ * network. -+ * -+ * Modification history: -+ * -+ * 2001-09-17 started by Ingo Molnar. -+ */ -+ -+/**************************************************************** -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2, or (at your option) -+ * any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -+ * -+ ****************************************************************/ -+ -+#define NETCONSOLE_VERSION 0x03 -+ -+enum netdump_commands { -+ COMM_NONE = 0, -+ COMM_SEND_MEM = 1, -+ COMM_EXIT = 2, -+ COMM_REBOOT = 3, -+ COMM_HELLO = 4, -+ COMM_GET_NR_PAGES = 5, -+ COMM_GET_PAGE_SIZE = 6, -+ COMM_START_NETDUMP_ACK = 7, -+ COMM_GET_REGS = 8, -+ COMM_GET_MAGIC = 9, -+ COMM_START_WRITE_NETDUMP_ACK = 10, -+}; -+ -+typedef struct netdump_req_s { -+ u64 magic; -+ u32 nr; -+ u32 command; -+ u32 from; -+ u32 to; -+} req_t; -+ -+enum netdump_replies { -+ REPLY_NONE = 0, -+ REPLY_ERROR = 1, -+ REPLY_LOG = 2, -+ REPLY_MEM = 3, -+ REPLY_RESERVED = 4, -+ REPLY_HELLO = 5, -+ REPLY_NR_PAGES = 6, -+ REPLY_PAGE_SIZE = 7, -+ REPLY_START_NETDUMP = 8, -+ REPLY_END_NETDUMP = 9, -+ REPLY_REGS = 10, -+ REPLY_MAGIC = 11, -+ REPLY_START_WRITE_NETDUMP = 12, -+}; -+ -+typedef struct netdump_reply_s { -+ u32 nr; -+ u32 code; -+ u32 info; -+} reply_t; -+ -+#define HEADER_LEN (1 + sizeof(reply_t)) -+ -+ ---- linux-2.5.69/include/asm-i386/dump.h.lkcdbase Mon Jun 2 17:28:47 2003 -+++ linux-2.5.69/include/asm-i386/dump.h Mon Jun 2 17:31:10 2003 -@@ -0,0 +1,93 @@ -+/* -+ * Kernel header file for Linux crash dumps. -+ * -+ * Created by: Matt Robinson (yakker@sgi.com) -+ * -+ * Copyright 1999 Silicon Graphics, Inc. All rights reserved. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+/* This header file holds the architecture specific crash dump header */ -+#ifndef _ASM_DUMP_H -+#define _ASM_DUMP_H -+ -+/* necessary header files */ -+#include -+#include -+#include -+#include -+ -+/* definitions */ -+#define DUMP_ASM_MAGIC_NUMBER 0xdeaddeadULL /* magic number */ -+#define DUMP_ASM_VERSION_NUMBER 0x3 /* version number */ -+ -+/* max number of cpus */ -+#define DUMP_MAX_NUM_CPUS 32 -+ -+/* -+ * Structure: __dump_header_asm -+ * Function: This is the header for architecture-specific stuff. It -+ * follows right after the dump header. -+ */ -+struct __dump_header_asm { -+ /* the dump magic number -- unique to verify dump is valid */ -+ u64 dha_magic_number; -+ -+ /* the version number of this dump */ -+ u32 dha_version; -+ -+ /* the size of this header (in case we can't read it) */ -+ u32 dha_header_size; -+ -+ /* the esp for i386 systems */ -+ u32 dha_esp; -+ -+ /* the eip for i386 systems */ -+ u32 dha_eip; -+ -+ /* the dump registers */ -+ struct pt_regs dha_regs; -+ -+ /* smp specific */ -+ u32 dha_smp_num_cpus; -+ u32 dha_dumping_cpu; -+ struct pt_regs dha_smp_regs[DUMP_MAX_NUM_CPUS]; -+ u32 dha_smp_current_task[DUMP_MAX_NUM_CPUS]; -+ u32 dha_stack[DUMP_MAX_NUM_CPUS]; -+ u32 dha_stack_ptr[DUMP_MAX_NUM_CPUS]; -+} __attribute__((packed)); -+ -+#ifdef __KERNEL__ -+ -+extern struct __dump_header_asm dump_header_asm; -+ -+#ifdef CONFIG_SMP -+extern unsigned long irq_affinity[]; -+extern int (*dump_ipi_function_ptr)(struct pt_regs *); -+extern void dump_send_ipi(void); -+#else -+#define dump_send_ipi() do { } while(0) -+#endif -+ -+static inline void get_current_regs(struct pt_regs *regs) -+{ -+ __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx)); -+ __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx)); -+ __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx)); -+ __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi)); -+ __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi)); -+ __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp)); -+ __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax)); -+ __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp)); -+ __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss)); -+ __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs)); -+ __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds)); -+ __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes)); -+ __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags)); -+ regs->eip = (unsigned long)current_text_addr(); -+} -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* _ASM_DUMP_H */ ---- linux-2.5.69/init/kerntypes.c.lkcdbase Mon Jun 2 17:29:10 2003 -+++ linux-2.5.69/init/kerntypes.c Mon Jun 2 17:29:06 2003 -@@ -0,0 +1,31 @@ -+/* -+ * kerntypes.c -+ * -+ * Copyright (C) 2000 Tom Morano (tjm@sgi.com) and -+ * Matt D. Robinson (yakker@alacritech.com) -+ * -+ * Dummy module that includes headers for all kernel types of interest. -+ * The kernel type information is used by the lcrash utility when -+ * analyzing system crash dumps or the live system. Using the type -+ * information for the running system, rather than kernel header files, -+ * makes for a more flexible and robust analysis tool. -+ * -+ * This source code is released under version 2 of the GNU GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef LINUX_COMPILE_VERSION_ID_TYPE -+/* Define version type for version validation of dump and kerntypes */ -+LINUX_COMPILE_VERSION_ID_TYPE; -+#endif -+ -+void -+kerntypes_dummy(void) -+{ -+} ---- linux-2.5.69/drivers/dump/dump_methods.h.lkcdbase Mon Jun 2 17:56:12 2003 -+++ linux-2.5.69/drivers/dump/dump_methods.h Mon Jun 2 17:55:51 2003 -@@ -0,0 +1,348 @@ -+/* -+ * Generic interfaces for flexible system dump -+ * -+ * Started: Oct 2002 - Suparna Bhattacharya (suparna@in.ibm.com) -+ * -+ * Copyright (C) 2002 International Business Machines Corp. -+ * -+ * This code is released under version 2 of the GNU GPL. -+ */ -+ -+#ifndef _LINUX_DUMP_METHODS_H -+#define _LINUX_DUMP_METHODS_H -+ -+/* -+ * Inspired by Matt Robinson's suggestion of introducing dump -+ * methods as a way to enable different crash dump facilities to -+ * coexist where each employs its own scheme or dumping policy. -+ * -+ * The code here creates a framework for flexible dump by defining -+ * a set of methods and providing associated helpers that differentiate -+ * between the underlying mechanism (how to dump), overall scheme -+ * (sequencing of stages and data dumped and associated quiescing), -+ * output format (what the dump output looks like), target type -+ * (where to save the dump; see dumpdev.h), and selection policy -+ * (state/data to dump). -+ * -+ * These sets of interfaces can be mixed and matched to build a -+ * dumper suitable for a given situation, allowing for -+ * flexibility as well appropriate degree of code reuse. -+ * For example all features and options of lkcd (including -+ * granular selective dumping in the near future) should be -+ * available even when say, the 2 stage soft-boot based mechanism -+ * is used for taking disruptive dumps. -+ * -+ * Todo: Additionally modules or drivers may supply their own -+ * custom dumpers which extend dump with module specific -+ * information or hardware state, and can even tweak the -+ * mechanism when it comes to saving state relevant to -+ * them. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#define MAX_PASSES 6 -+#define MAX_DEVS 4 -+ -+ -+/* To customise selection of pages to be dumped in a given pass/group */ -+struct dump_data_filter{ -+ char name[32]; -+ int (*selector)(int, unsigned long, unsigned long); -+ ulong level_mask; /* dump level(s) for which this filter applies */ -+ loff_t start, end; /* location range applicable */ -+}; -+ -+ -+/* -+ * Determined by the kind of dump mechanism and appropriate -+ * overall scheme -+ */ -+struct dump_scheme_ops { -+ /* sets aside memory, inits data structures etc */ -+ int (*configure)(unsigned long devid); -+ /* releases resources */ -+ int (*unconfigure)(void); -+ -+ /* ordering of passes, invoking iterator */ -+ int (*sequencer)(void); -+ /* iterates over system data, selects and acts on data to dump */ -+ int (*iterator)(int, int (*)(unsigned long, unsigned long), -+ struct dump_data_filter *); -+ /* action when data is selected for dump */ -+ int (*save_data)(unsigned long, unsigned long); -+ /* action when data is to be excluded from dump */ -+ int (*skip_data)(unsigned long, unsigned long); -+ /* policies for space, multiple dump devices etc */ -+ int (*write_buffer)(void *, unsigned long); -+}; -+ -+struct dump_scheme { -+ /* the name serves as an anchor to locate the scheme after reboot */ -+ char name[32]; -+ struct dump_scheme_ops *ops; -+ struct list_head list; -+}; -+ -+/* Quiescing/Silence levels (controls IPI callback behaviour) */ -+extern enum dump_silence_levels { -+ DUMP_SOFT_SPIN_CPUS = 1, -+ DUMP_HARD_SPIN_CPUS = 2, -+ DUMP_HALT_CPUS = 3, -+} dump_silence_level; -+ -+/* determined by the dump (file) format */ -+struct dump_fmt_ops { -+ /* build header */ -+ int (*configure_header)(const char *, const struct pt_regs *); -+ int (*update_header)(void); /* update header and write it out */ -+ /* save curr context */ -+ void (*save_context)(int, const struct pt_regs *, -+ struct task_struct *); -+ /* typically called by the save_data action */ -+ /* add formatted data to the dump buffer */ -+ int (*add_data)(unsigned long, unsigned long); -+ int (*update_end_marker)(void); -+}; -+ -+struct dump_fmt { -+ unsigned long magic; -+ char name[32]; /* lcrash, crash, elf-core etc */ -+ struct dump_fmt_ops *ops; -+ struct list_head list; -+}; -+ -+/* -+ * Modules will be able add their own data capture schemes by -+ * registering their own dumpers. Typically they would use the -+ * primary dumper as a template and tune it with their routines. -+ * Still Todo. -+ */ -+ -+/* The combined dumper profile (mechanism, scheme, dev, fmt) */ -+struct dumper { -+ char name[32]; /* singlestage, overlay (stg1), passthru(stg2), pull */ -+ struct dump_scheme *scheme; -+ struct dump_fmt *fmt; -+ struct __dump_compress *compress; -+ struct dump_data_filter *filter; -+ struct dump_dev *dev; -+ /* state valid only for active dumper(s) - per instance */ -+ /* run time state/context */ -+ int curr_pass; -+ unsigned long count; -+ loff_t curr_offset; /* current logical offset into dump device */ -+ loff_t curr_loc; /* current memory location */ -+ void *curr_buf; /* current position in the dump buffer */ -+ void *dump_buf; /* starting addr of dump buffer */ -+ int header_dirty; /* whether the header needs to be written out */ -+ int header_len; -+ struct list_head dumper_list; /* links to other dumpers */ -+}; -+ -+/* Starting point to get to the current configured state */ -+struct dump_config { -+ ulong level; -+ ulong flags; -+ struct dumper *dumper; -+ unsigned long dump_device; -+ unsigned long dump_addr; /* relevant only for in-memory dumps */ -+ struct list_head dump_dev_list; -+}; -+ -+extern struct dump_config dump_config; -+ -+/* Used to save the dump config across a reboot for 2-stage dumps: -+ * -+ * Note: The scheme, format, compression and device type should be -+ * registered at bootup, for this config to be sharable across soft-boot. -+ * The function addresses could have changed and become invalid, and -+ * need to be set up again. -+ */ -+struct dump_config_block { -+ u64 magic; /* for a quick sanity check after reboot */ -+ struct dump_memdev memdev; /* handle to dump stored in memory */ -+ struct dump_config config; -+ struct dumper dumper; -+ struct dump_scheme scheme; -+ struct dump_fmt fmt; -+ struct __dump_compress compress; -+ struct dump_data_filter filter_table[MAX_PASSES]; -+ struct dump_anydev dev[MAX_DEVS]; /* target dump device */ -+}; -+ -+ -+/* Wrappers that invoke the methods for the current (active) dumper */ -+ -+/* Scheme operations */ -+ -+static inline int dump_sequencer(void) -+{ -+ return dump_config.dumper->scheme->ops->sequencer(); -+} -+ -+static inline int dump_iterator(int pass, int (*action)(unsigned long, -+ unsigned long), struct dump_data_filter *filter) -+{ -+ return dump_config.dumper->scheme->ops->iterator(pass, action, filter); -+} -+ -+#define dump_save_data dump_config.dumper->scheme->ops->save_data -+#define dump_skip_data dump_config.dumper->scheme->ops->skip_data -+ -+static inline int dump_write_buffer(void *buf, unsigned long len) -+{ -+ return dump_config.dumper->scheme->ops->write_buffer(buf, len); -+} -+ -+static inline int dump_configure(unsigned long devid) -+{ -+ return dump_config.dumper->scheme->ops->configure(devid); -+} -+ -+static inline int dump_unconfigure(void) -+{ -+ return dump_config.dumper->scheme->ops->unconfigure(); -+} -+ -+/* Format operations */ -+ -+static inline int dump_configure_header(const char *panic_str, -+ const struct pt_regs *regs) -+{ -+ return dump_config.dumper->fmt->ops->configure_header(panic_str, regs); -+} -+ -+static inline void dump_save_context(int cpu, const struct pt_regs *regs, -+ struct task_struct *tsk) -+{ -+ dump_config.dumper->fmt->ops->save_context(cpu, regs, tsk); -+} -+ -+static inline int dump_save_this_cpu(const struct pt_regs *regs) -+{ -+ int cpu = smp_processor_id(); -+ -+ dump_save_context(cpu, regs, current); -+ return 1; -+} -+ -+static inline int dump_update_header(void) -+{ -+ return dump_config.dumper->fmt->ops->update_header(); -+} -+ -+static inline int dump_update_end_marker(void) -+{ -+ return dump_config.dumper->fmt->ops->update_end_marker(); -+} -+ -+static inline int dump_add_data(unsigned long loc, unsigned long sz) -+{ -+ return dump_config.dumper->fmt->ops->add_data(loc, sz); -+} -+ -+/* Compression operation */ -+static inline int dump_compress_data(char *src, int slen, char *dst) -+{ -+ return dump_config.dumper->compress->compress_func(src, slen, -+ dst, DUMP_DPC_PAGE_SIZE); -+} -+ -+ -+/* Prototypes of some default implementations of dump methods */ -+ -+extern struct __dump_compress dump_none_compression; -+ -+/* Default scheme methods (dump_scheme.c) */ -+ -+extern int dump_generic_sequencer(void); -+extern int dump_page_iterator(int pass, int (*action)(unsigned long, unsigned -+ long), struct dump_data_filter *filter); -+extern int dump_generic_save_data(unsigned long loc, unsigned long sz); -+extern int dump_generic_skip_data(unsigned long loc, unsigned long sz); -+extern int dump_generic_write_buffer(void *buf, unsigned long len); -+extern int dump_generic_configure(unsigned long); -+extern int dump_generic_unconfigure(void); -+ -+/* Default scheme template */ -+extern struct dump_scheme dump_scheme_singlestage; -+ -+/* Default dump format methods */ -+ -+extern int dump_lcrash_configure_header(const char *panic_str, -+ const struct pt_regs *regs); -+extern void dump_lcrash_save_context(int cpu, const struct pt_regs *regs, -+ struct task_struct *tsk); -+extern int dump_generic_update_header(void); -+extern int dump_lcrash_add_data(unsigned long loc, unsigned long sz); -+extern int dump_lcrash_update_end_marker(void); -+ -+/* Default format (lcrash) template */ -+extern struct dump_fmt dump_fmt_lcrash; -+ -+/* Default dump selection filter table */ -+ -+/* -+ * Entries listed in order of importance and correspond to passes -+ * The last entry (with a level_mask of zero) typically reflects data that -+ * won't be dumped -- this may for example be used to identify data -+ * that will be skipped for certain so the corresponding memory areas can be -+ * utilized as scratch space. -+ */ -+extern struct dump_data_filter dump_filter_table[]; -+ -+/* Some pre-defined dumpers */ -+extern struct dumper dumper_singlestage; -+extern struct dumper dumper_stage1; -+extern struct dumper dumper_stage2; -+ -+/* These are temporary */ -+#define DUMP_MASK_HEADER DUMP_LEVEL_HEADER -+#define DUMP_MASK_KERN DUMP_LEVEL_KERN -+#define DUMP_MASK_USED DUMP_LEVEL_USED -+#define DUMP_MASK_UNUSED DUMP_LEVEL_ALL_RAM -+#define DUMP_MASK_REST 0 /* dummy for now */ -+ -+/* Helpers - move these to dump.h later ? */ -+ -+int dump_generic_execute(const char *panic_str, const struct pt_regs *regs); -+extern int dump_ll_write(void *buf, unsigned long len); -+int dump_check_and_free_page(struct dump_memdev *dev, struct page *page); -+ -+static inline void dumper_reset(void) -+{ -+ dump_config.dumper->curr_buf = dump_config.dumper->dump_buf; -+ dump_config.dumper->curr_loc = 0; -+ dump_config.dumper->curr_offset = 0; -+ dump_config.dumper->count = 0; -+ dump_config.dumper->curr_pass = 0; -+} -+ -+/* -+ * May later be moulded to perform boot-time allocations so we can dump -+ * earlier during bootup -+ */ -+static inline void *dump_alloc_mem(unsigned long size) -+{ -+ return kmalloc(size, GFP_KERNEL); -+} -+ -+static inline void dump_free_mem(void *buf) -+{ -+ struct page *page; -+ -+ /* ignore reserved pages (e.g. post soft boot stage) */ -+ if (buf && (page = virt_to_page(buf))) { -+ if (PageReserved(page)) -+ return; -+ } -+ -+ kfree(buf); -+} -+ -+ -+#endif /* _LINUX_DUMP_METHODS_H */ diff --git a/lustre/kernel_patches/patches/lkcd-kernel-changes-2.5.73.patch b/lustre/kernel_patches/patches/lkcd-kernel-changes-2.5.73.patch deleted file mode 100644 index ebe09e8..0000000 --- a/lustre/kernel_patches/patches/lkcd-kernel-changes-2.5.73.patch +++ /dev/null @@ -1,608 +0,0 @@ - - - - arch/i386/Kconfig | 50 ++++++++++++++++++++++++++++ - arch/i386/boot/Makefile | 1 - arch/i386/kernel/i386_ksyms.c | 19 ++++++++++ - arch/i386/kernel/nmi.c | 2 + - arch/i386/kernel/setup.c | 10 +++++ - arch/i386/kernel/smp.c | 16 +++++++- - arch/i386/kernel/traps.c | 2 + - arch/i386/mm/init.c | 6 +++ - arch/s390/boot/Makefile | 2 - - arch/s390/boot/install.sh | 24 +++++++++---- - drivers/Makefile | 1 - include/asm-i386/kmap_types.h | 3 + - include/asm-i386/mach-default/irq_vectors.h | 1 - include/asm-i386/smp.h | 1 - include/linux/major.h | 2 + - include/linux/sysctl.h | 2 + - init/Makefile | 4 ++ - init/main.c | 10 +++++ - init/version.c | 4 ++ - kernel/ksyms.c | 8 ++++ - kernel/panic.c | 17 +++++++++ - kernel/sched.c | 22 ++++++++++++ - lib/Kconfig | 10 +++-- - mm/page_alloc.c | 3 + - scripts/mkcompile_h | 4 +- - 25 files changed, 207 insertions(+), 17 deletions(-) - ---- linux-2.5.73/drivers/Makefile~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:08 2003 -+++ linux-2.5.73-n9560/drivers/Makefile Mon Jun 30 14:56:26 2003 -@@ -50,3 +50,4 @@ obj-$(CONFIG_ISDN_BOOL) += isdn/ - obj-$(CONFIG_MCA) += mca/ - obj-$(CONFIG_EISA) += eisa/ - obj-$(CONFIG_CPU_FREQ) += cpufreq/ -+obj-$(CONFIG_CRASH_DUMP) += dump/ ---- linux-2.5.73/include/linux/major.h~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:37 2003 -+++ linux-2.5.73-n9560/include/linux/major.h Mon Jun 30 14:56:26 2003 -@@ -157,6 +157,8 @@ - - #define OSST_MAJOR 206 /* OnStream-SCx0 SCSI tape */ - -+#define CRASH_DUMP_MAJOR 221 /* crash dump interface */ -+ - #define IBM_TTY3270_MAJOR 227 - #define IBM_FS3270_MAJOR 228 - ---- linux-2.5.73/include/linux/sysctl.h~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:55 2003 -+++ linux-2.5.73-n9560/include/linux/sysctl.h Mon Jun 30 14:56:26 2003 -@@ -130,6 +130,8 @@ enum - KERN_PIDMAX=55, /* int: PID # limit */ - KERN_CORE_PATTERN=56, /* string: pattern for core-file names */ - KERN_PANIC_ON_OOPS=57, /* int: whether we will panic on an oops */ -+ -+ KERN_DUMP=60, /* directory: dump parameters */ - }; - - ---- linux-2.5.73/include/asm-i386/mach-default/irq_vectors.h~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:38 2003 -+++ linux-2.5.73-n9560/include/asm-i386/mach-default/irq_vectors.h Mon Jun 30 14:56:26 2003 -@@ -48,6 +48,7 @@ - #define INVALIDATE_TLB_VECTOR 0xfd - #define RESCHEDULE_VECTOR 0xfc - #define CALL_FUNCTION_VECTOR 0xfb -+#define DUMP_VECTOR 0xfa - - #define THERMAL_APIC_VECTOR 0xf0 - /* ---- linux-2.5.73/include/asm-i386/kmap_types.h~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:01 2003 -+++ linux-2.5.73-n9560/include/asm-i386/kmap_types.h Mon Jun 30 14:56:26 2003 -@@ -24,7 +24,8 @@ D(10) KM_IRQ0, - D(11) KM_IRQ1, - D(12) KM_SOFTIRQ0, - D(13) KM_SOFTIRQ1, --D(14) KM_TYPE_NR -+D(14) KM_TYPE_NR, -+D(15) KM_DUMP - }; - - #undef D ---- linux-2.5.73/include/asm-i386/smp.h~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:56 2003 -+++ linux-2.5.73-n9560/include/asm-i386/smp.h Mon Jun 30 14:56:26 2003 -@@ -39,6 +39,7 @@ extern int smp_num_siblings; - extern int cpu_sibling_map[]; - - extern void smp_flush_tlb(void); -+extern void dump_send_ipi(void); - extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); - extern void smp_send_reschedule(int cpu); - extern void smp_invalidate_rcv(void); /* Process an NMI */ ---- linux-2.5.73/arch/i386/kernel/i386_ksyms.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:35 2003 -+++ linux-2.5.73-n9560/arch/i386/kernel/i386_ksyms.c Mon Jun 30 14:56:26 2003 -@@ -16,6 +16,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -33,6 +34,7 @@ - #include - #include - #include -+#include - - extern void dump_thread(struct pt_regs *, struct user *); - extern spinlock_t rtc_lock; -@@ -208,3 +210,20 @@ EXPORT_SYMBOL(kmap_atomic_to_page); - EXPORT_SYMBOL(edd); - EXPORT_SYMBOL(eddnr); - #endif -+ -+#ifdef CONFIG_CRASH_DUMP_MODULE -+#ifdef CONFIG_SMP -+extern irq_desc_t irq_desc[NR_IRQS]; -+extern unsigned long irq_affinity[NR_IRQS]; -+extern void stop_this_cpu(void *); -+EXPORT_SYMBOL(irq_desc); -+EXPORT_SYMBOL(irq_affinity); -+EXPORT_SYMBOL(stop_this_cpu); -+EXPORT_SYMBOL(dump_send_ipi); -+#endif -+extern int pfn_is_ram(unsigned long); -+EXPORT_SYMBOL(pfn_is_ram); -+#ifdef ARCH_HAS_NMI_WATCHDOG -+EXPORT_SYMBOL(touch_nmi_watchdog); -+#endif -+#endif ---- linux-2.5.73/arch/i386/kernel/nmi.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:55 2003 -+++ linux-2.5.73-n9560/arch/i386/kernel/nmi.c Mon Jun 30 14:56:26 2003 -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - - #include -@@ -426,6 +427,7 @@ void nmi_watchdog_tick (struct pt_regs * - bust_spinlocks(1); - printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip); - show_registers(regs); -+ dump("NMI Watchdog detected LOCKUP", regs); - printk("console shuts up ...\n"); - console_silent(); - spin_unlock(&nmi_print_lock); ---- linux-2.5.73/arch/i386/kernel/setup.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:44 2003 -+++ linux-2.5.73-n9560/arch/i386/kernel/setup.c Mon Jun 30 14:56:26 2003 -@@ -438,6 +438,7 @@ static void __init setup_memory_region(v - print_memory_map(who); - } /* setup_memory_region */ - -+unsigned long crashdump_addr = 0xdeadbeef; - - static void __init parse_cmdline_early (char ** cmdline_p) - { -@@ -531,6 +532,9 @@ static void __init parse_cmdline_early ( - if (c == ' ' && !memcmp(from, "highmem=", 8)) - highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT; - -+ if (c == ' ' && !memcmp(from, "crashdump=", 10)) -+ crashdump_addr = memparse(from+10, &from); -+ - c = *(from++); - if (!c) - break; -@@ -913,6 +917,8 @@ static int __init noreplacement_setup(ch - - __setup("noreplacement", noreplacement_setup); - -+extern void crashdump_reserve(void); -+ - void __init setup_arch(char **cmdline_p) - { - unsigned long max_low_pfn; -@@ -977,6 +983,10 @@ void __init setup_arch(char **cmdline_p) - generic_apic_probe(*cmdline_p); - #endif - -+#ifdef CONFIG_CRASH_DUMP_SOFTBOOT -+ crashdump_reserve(); /* Preserve crash dump state from prev boot */ -+#endif -+ - #ifdef CONFIG_ACPI_BOOT - /* - * Parse the ACPI tables for possible boot-time SMP configuration. ---- linux-2.5.73/arch/i386/kernel/smp.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:32 2003 -+++ linux-2.5.73-n9560/arch/i386/kernel/smp.c Mon Jun 30 16:01:58 2003 -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -144,6 +145,13 @@ inline void __send_IPI_shortcut(unsigned - */ - cfg = __prepare_ICR(shortcut, vector); - -+ if (vector == DUMP_VECTOR) { -+ /* -+ * Setup DUMP IPI to be delivered as an NMI -+ */ -+ cfg = (cfg&~APIC_VECTOR_MASK)|APIC_DM_NMI; -+ } -+ - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ -@@ -450,6 +458,11 @@ void flush_tlb_all(void) - on_each_cpu(do_flush_tlb_all, 0, 1, 1); - } - -+void dump_send_ipi(void) -+{ -+ send_IPI_allbutself(DUMP_VECTOR); -+} -+ - /* - * this function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing -@@ -528,7 +541,7 @@ int smp_call_function (void (*func) (voi - return 0; - } - --static void stop_this_cpu (void * dummy) -+void stop_this_cpu (void * dummy) - { - /* - * Remove this CPU: -@@ -589,4 +602,3 @@ asmlinkage void smp_call_function_interr - atomic_inc(&call_data->finished); - } - } -- ---- linux-2.5.73/arch/i386/kernel/traps.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:35 2003 -+++ linux-2.5.73-n9560/arch/i386/kernel/traps.c Mon Jun 30 14:56:26 2003 -@@ -25,6 +25,7 @@ - #include - #include - #include -+#include - - #ifdef CONFIG_EISA - #include -@@ -258,6 +259,7 @@ void die(const char * str, struct pt_reg - handle_BUG(regs); - printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); - show_registers(regs); -+ dump((char *)str, regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); - if (in_interrupt()) ---- linux-2.5.73/arch/i386/mm/init.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:06 2003 -+++ linux-2.5.73-n9560/arch/i386/mm/init.c Mon Jun 30 14:56:26 2003 -@@ -189,6 +189,12 @@ static inline int page_is_ram(unsigned l - return 0; - } - -+/* To enable modules to check if a page is in RAM */ -+int pfn_is_ram(unsigned long pfn) -+{ -+ return (page_is_ram(pfn)); -+} -+ - #ifdef CONFIG_HIGHMEM - pte_t *kmap_pte; - pgprot_t kmap_prot; ---- linux-2.5.73/arch/i386/boot/Makefile~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:42 2003 -+++ linux-2.5.73-n9560/arch/i386/boot/Makefile Mon Jun 30 14:56:26 2003 -@@ -101,3 +101,4 @@ zlilo: $(BOOTIMAGE) - - install: $(BOOTIMAGE) - sh $(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" -+ if [ -f init/kerntypes.o ]; then cp init/kerntypes.o $(INSTALL_PATH)/Kerntypes; fi ---- linux-2.5.73/arch/i386/Kconfig~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:34 2003 -+++ linux-2.5.73-n9560/arch/i386/Kconfig Mon Jun 30 14:56:26 2003 -@@ -1347,6 +1347,56 @@ source "arch/i386/oprofile/Kconfig" - - menu "Kernel hacking" - -+config CRASH_DUMP -+ tristate "Crash dump support (EXPERIMENTAL)" -+ depends on EXPERIMENTAL -+ default n -+ ---help--- -+ Say Y here to enable saving an image of system memory when a panic -+ or other error occurs. Dumps can also be forced with the SysRq+d -+ key if MAGIC_SYSRQ is enabled. -+ -+config CRASH_DUMP_BLOCKDEV -+ tristate "Crash dump block device driver" -+ depends on CRASH_DUMP -+ help -+ Say Y to allow saving crash dumps directly to a disk device. -+ -+config CRASH_DUMP_NETDEV -+ tristate "Crash dump network device driver" -+ depends on CRASH_DUMP -+ help -+ Say Y to allow saving crash dumps over a network device. -+ -+config CRASH_DUMP_MEMDEV -+ bool "Crash dump staged memory driver" -+ depends on CRASH_DUMP -+ help -+ Say Y to allow intermediate saving crash dumps in spare -+ memory pages which would then be written out to disk -+ later. -+ -+config CRASH_DUMP_SOFTBOOT -+ bool "Save crash dump across a soft reboot" -+ depends on CRASH_DUMP_MEMDEV -+ help -+ Say Y to allow a crash dump to be preserved in memory -+ pages across a soft reboot and written out to disk -+ thereafter. For this to work, CRASH_DUMP must be -+ configured as part of the kernel (not as a module). -+ -+config CRASH_DUMP_COMPRESS_RLE -+ tristate "Crash dump RLE compression" -+ depends on CRASH_DUMP -+ help -+ Say Y to allow saving dumps with Run Length Encoding compression. -+ -+config CRASH_DUMP_COMPRESS_GZIP -+ tristate "Crash dump GZIP compression" -+ depends on CRASH_DUMP -+ help -+ Say Y to allow saving dumps with Gnu Zip compression. -+ - config DEBUG_KERNEL - bool "Kernel debugging" - help ---- linux-2.5.73/arch/s390/boot/Makefile~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:36 2003 -+++ linux-2.5.73-n9560/arch/s390/boot/Makefile Mon Jun 30 14:56:26 2003 -@@ -16,4 +16,4 @@ $(obj)/image: vmlinux FORCE - - install: $(CONFIGURE) $(obj)/image - sh -x $(obj)/install.sh $(KERNELRELEASE) $(obj)/image \ -- System.map Kerntypes "$(INSTALL_PATH)" -+ System.map init/kerntypes.o "$(INSTALL_PATH)" ---- linux-2.5.73/arch/s390/boot/install.sh~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:57 2003 -+++ linux-2.5.73-n9560/arch/s390/boot/install.sh Mon Jun 30 14:56:26 2003 -@@ -16,7 +16,8 @@ - # $1 - kernel version - # $2 - kernel image file - # $3 - kernel map file --# $4 - default install path (blank if root directory) -+# $4 - kernel type file -+# $5 - default install path (blank if root directory) - # - - # User may have a custom install script -@@ -26,13 +27,22 @@ if [ -x /sbin/installkernel ]; then exec - - # Default install - same as make zlilo - --if [ -f $4/vmlinuz ]; then -- mv $4/vmlinuz $4/vmlinuz.old -+if [ -f $5/vmlinuz ]; then -+ mv $5/vmlinuz $5/vmlinuz.old - fi - --if [ -f $4/System.map ]; then -- mv $4/System.map $4/System.old -+if [ -f $5/System.map ]; then -+ mv $5/System.map $5/System.old - fi - --cat $2 > $4/vmlinuz --cp $3 $4/System.map -+if [ -f $5/Kerntypes ]; then -+ mv $5/Kerntypes $5/Kerntypes.old -+fi -+ -+cat $2 > $5/vmlinuz -+cp $3 $5/System.map -+ -+# copy the kernel type file if it exists -+if [ -f $4 ]; then -+ cp $4 $5/Kerntypes -+fi ---- linux-2.5.73/scripts/mkcompile_h~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:36 2003 -+++ linux-2.5.73-n9560/scripts/mkcompile_h Mon Jun 30 14:56:26 2003 -@@ -33,7 +33,7 @@ UTS_VERSION="$UTS_VERSION `LANG=C date`" - - UTS_LEN=64 - UTS_TRUNCATE="sed -e s/\(.\{1,$UTS_LEN\}\).*/\1/" -- -+LINUX_COMPILE_VERSION_ID="__linux_compile_version_id__`hostname | tr -c '[0-9A-Za-z\n]' '__'`_`LANG=C date | tr -c '[0-9A-Za-z\n]' '_'`" - # Generate a temporary compile.h - - ( echo /\* This file is auto generated, version $VERSION \*/ -@@ -55,6 +55,8 @@ UTS_TRUNCATE="sed -e s/\(.\{1,$UTS_LEN\} - fi - - echo \#define LINUX_COMPILER \"`$CC -v 2>&1 | tail -1`\" -+ echo \#define LINUX_COMPILE_VERSION_ID $LINUX_COMPILE_VERSION_ID -+ echo \#define LINUX_COMPILE_VERSION_ID_TYPE typedef char* "$LINUX_COMPILE_VERSION_ID""_t" - ) > .tmpcompile - - # Only replace the real compile.h if the new one is different, ---- linux-2.5.73/kernel/ksyms.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:30 2003 -+++ linux-2.5.73-n9560/kernel/ksyms.c Mon Jun 30 14:56:26 2003 -@@ -59,6 +59,8 @@ - #include - #include - #include -+#include -+#include - - #if defined(CONFIG_PROC_FS) - #include -@@ -606,3 +608,9 @@ EXPORT_SYMBOL(ptrace_notify); - EXPORT_SYMBOL(console_printk); - - EXPORT_SYMBOL(current_kernel_time); -+ -+#ifdef CONFIG_CRASH_DUMP_MODULE -+EXPORT_SYMBOL(min_low_pfn); -+EXPORT_SYMBOL(dump_oncpu); -+EXPORT_SYMBOL(dump_function_ptr); -+#endif ---- linux-2.5.73/kernel/panic.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:18 2003 -+++ linux-2.5.73-n9560/kernel/panic.c Mon Jun 30 14:56:26 2003 -@@ -16,12 +16,16 @@ - #include - #include - #include -+#ifdef CONFIG_KEXEC -+#include -+#endif - - asmlinkage void sys_sync(void); /* it's really int */ - - int panic_timeout; - int panic_on_oops; - int tainted; -+void (*dump_function_ptr)(const char *, const struct pt_regs *) = 0; - - struct notifier_block *panic_notifier_list; - -@@ -54,6 +58,7 @@ NORET_TYPE void panic(const char * fmt, - va_start(args, fmt); - vsnprintf(buf, sizeof(buf), fmt, args); - va_end(args); -+ - printk(KERN_EMERG "Kernel panic: %s\n",buf); - if (in_interrupt()) - printk(KERN_EMERG "In interrupt handler - not syncing\n"); -@@ -76,6 +81,18 @@ NORET_TYPE void panic(const char * fmt, - * We can't use the "normal" timers since we just panicked.. - */ - printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout); -+#ifdef CONFIG_KEXEC -+{ -+ struct kimage *image; -+ image = xchg(&kexec_image, 0); -+ if (image) { -+ printk(KERN_EMERG "by starting a new kernel ..\n"); -+ mdelay(panic_timeout*1000); -+ machine_kexec(image); -+ } -+} -+#endif -+ - mdelay(panic_timeout*1000); - /* - * Should we run the reboot notifier. For the moment Im ---- linux-2.5.73/kernel/sched.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:33:08 2003 -+++ linux-2.5.73-n9560/kernel/sched.c Mon Jun 30 14:56:26 2003 -@@ -40,6 +40,9 @@ - #define cpu_to_node_mask(cpu) (cpu_online_map) - #endif - -+/* used to soft spin in sched while dump is in progress */ -+int dump_oncpu; -+ - /* - * Convert user-nice values [ -20 ... 0 ... 19 ] - * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], -@@ -1249,6 +1252,15 @@ asmlinkage void schedule(void) - struct list_head *queue; - int idx; - -+ /* -+ * If crash dump is in progress, this other cpu's -+ * need to wait until it completes. -+ * NB: this code is optimized away for kernels without -+ * dumping enabled. -+ */ -+ if (unlikely(dump_oncpu)) -+ goto dump_scheduling_disabled; -+ - /* - * Test if we are atomic. Since do_exit() needs to call into - * schedule() atomically, we ignore that path for now. -@@ -1336,6 +1348,16 @@ switch_tasks: - preempt_enable_no_resched(); - if (test_thread_flag(TIF_NEED_RESCHED)) - goto need_resched; -+ -+ return; -+ -+ dump_scheduling_disabled: -+ /* allow scheduling only if this is the dumping cpu */ -+ if (dump_oncpu != smp_processor_id()+1) { -+ while (dump_oncpu) -+ cpu_relax(); -+ } -+ return; - } - - #ifdef CONFIG_PREEMPT ---- linux-2.5.73/lib/Kconfig~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:56 2003 -+++ linux-2.5.73-n9560/lib/Kconfig Mon Jun 30 14:56:26 2003 -@@ -17,14 +17,16 @@ config CRC32 - # - config ZLIB_INFLATE - tristate -- default y if CRAMFS=y || PPP_DEFLATE=y || JFFS2_FS=y || ZISOFS_FS=y || BINFMT_ZFLAT=y || CRYPTO_DEFLATE=y -- default m if CRAMFS=m || PPP_DEFLATE=m || JFFS2_FS=m || ZISOFS_FS=m || BINFMT_ZFLAT=m || CRYPTO_DEFLATE=m -+ default y if CRAMFS=y || PPP_DEFLATE=y || JFFS2_FS=y || ZISOFS_FS=y || BINFMT_ZFLAT=y || CRYPTO_DEFLATE=y || CRASH_DUMP_COMPRESS_GZIP=y -+ default m if CRAMFS=m || PPP_DEFLATE=m || JFFS2_FS=m || ZISOFS_FS=m || BINFMT_ZFLAT=m || CRYPTO_DEFLATE=m || CRASH_DUMP_COMPRESS_GZIP=m - - config ZLIB_DEFLATE - tristate - default m if PPP_DEFLATE!=y && JFFS2_FS!=y && CRYPTO_DEFLATE!=y && \ -- (PPP_DEFLATE=m || JFFS2_FS=m || CRYPTO_DEFLATE=m) -- default y if PPP_DEFLATE=y || JFFS2_FS=y || CRYPTO_DEFLATE=y -+ (PPP_DEFLATE=m || JFFS2_FS=m || CRYPTO_DEFLATE=m \ -+ || CRASH_DUMP_COMPRESS_GZIP=m ) -+ default y if PPP_DEFLATE=y || JFFS2_FS=y || CRYPTO_DEFLATE=y \ -+ || CRASH_DUMP_COMPRESS_GZIP=y - - endmenu - ---- linux-2.5.73/mm/page_alloc.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:32 2003 -+++ linux-2.5.73-n9560/mm/page_alloc.c Mon Jun 30 14:56:26 2003 -@@ -84,7 +84,8 @@ static void bad_page(const char *functio - page->mapping = NULL; - } - --#ifndef CONFIG_HUGETLB_PAGE -+#if !defined(CONFIG_HUGETLB_PAGE) && !defined(CONFIG_CRASH_DUMP) \ -+ && !defined(CONFIG_CRASH_DUMP_MODULE) - #define prep_compound_page(page, order) do { } while (0) - #define destroy_compound_page(page, order) do { } while (0) - #else ---- linux-2.5.73/init/Makefile~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:42 2003 -+++ linux-2.5.73-n9560/init/Makefile Mon Jun 30 14:56:26 2003 -@@ -9,6 +9,9 @@ mounts-$(CONFIG_BLK_DEV_RAM) += do_mount - mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o - mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o - -+extra-$(CONFIG_CRASH_DUMP) += kerntypes.o -+CFLAGS_kerntypes.o := -gstabs -+ - # files to be removed upon make clean - clean-files := ../include/linux/compile.h - -@@ -24,3 +27,4 @@ $(obj)/version.o: include/linux/compile. - include/linux/compile.h: FORCE - @echo ' CHK $@' - @sh $(srctree)/scripts/mkcompile_h $@ "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CC) $(CFLAGS)" -+ ---- linux-2.5.73/init/main.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:35 2003 -+++ linux-2.5.73-n9560/init/main.c Mon Jun 30 14:56:26 2003 -@@ -101,6 +101,16 @@ extern void ipc_init(void); - int system_running = 0; - - /* -+ * The kernel_magic value represents the address of _end, which allows -+ * namelist tools to "match" each other respectively. That way a tool -+ * that looks at /dev/mem can verify that it is using the right System.map -+ * file -- if kernel_magic doesn't equal the namelist value of _end, -+ * something's wrong. -+ */ -+extern unsigned long _end; -+unsigned long *kernel_magic = &_end; -+ -+/* - * Boot command-line arguments - */ - #define MAX_INIT_ARGS 8 ---- linux-2.5.73/init/version.c~lkcd-kernel-changes-2.5.73 Sun Jun 22 11:32:45 2003 -+++ linux-2.5.73-n9560/init/version.c Mon Jun 30 14:56:26 2003 -@@ -10,6 +10,7 @@ - #include - #include - #include -+#include - - #define version(a) Version_ ## a - #define version_string(a) version(a) -@@ -24,3 +25,6 @@ struct new_utsname system_utsname = { - const char *linux_banner = - "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" - LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n"; -+ -+const char *LINUX_COMPILE_VERSION_ID = __stringify(LINUX_COMPILE_VERSION_ID); -+LINUX_COMPILE_VERSION_ID_TYPE; - -_ diff --git a/lustre/kernel_patches/patches/vfs_intent_2.5.72_rev1.patch b/lustre/kernel_patches/patches/vfs_intent_2.5.72_rev1.patch deleted file mode 100644 index 566dec9..0000000 --- a/lustre/kernel_patches/patches/vfs_intent_2.5.72_rev1.patch +++ /dev/null @@ -1,1031 +0,0 @@ - fs/exec.c | 15 ++- - fs/namei.c | 189 ++++++++++++++++++++++++++++++++++++++++++------- - fs/namespace.c | 2 - fs/open.c | 64 ++++++++++------ - fs/stat.c | 28 +++++-- - fs/sysfs/inode.c | 2 - include/linux/dcache.h | 36 +++++++++ - include/linux/fs.h | 10 ++ - include/linux/namei.h | 19 +++- - kernel/ksyms.c | 8 ++ - net/sunrpc/rpc_pipe.c | 6 - - net/unix/af_unix.c | 2 - 12 files changed, 310 insertions(+), 71 deletions(-) - ---- linux-2.5.73/fs/sysfs/inode.c~vfs_intent_2.5.72_rev1 2003-06-22 12:33:11.000000000 -0600 -+++ linux-2.5.73-braam/fs/sysfs/inode.c 2003-07-15 02:23:28.000000000 -0600 -@@ -81,7 +81,7 @@ struct dentry * sysfs_get_dentry(struct - qstr.name = name; - qstr.len = strlen(name); - qstr.hash = full_name_hash(name,qstr.len); -- return lookup_hash(&qstr,parent); -+ return lookup_hash(&qstr,parent, NULL); - } - - void sysfs_hash_and_remove(struct dentry * dir, const char * name) ---- linux-2.5.73/fs/exec.c~vfs_intent_2.5.72_rev1 2003-06-22 12:32:41.000000000 -0600 -+++ linux-2.5.73-braam/fs/exec.c 2003-07-15 02:23:28.000000000 -0600 -@@ -116,6 +116,9 @@ asmlinkage long sys_uselib(const char __ - struct file * file; - struct nameidata nd; - int error; -+ intent_init(&nd.it, IT_OPEN, O_RDONLY); -+ -+ error = user_path_walk_it(library, &nd); - - error = user_path_walk(library, &nd); - if (error) -@@ -129,7 +132,7 @@ asmlinkage long sys_uselib(const char __ - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.it); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -453,8 +456,12 @@ static inline void free_arg_pages(struct - struct file *open_exec(const char *name) - { - struct nameidata nd; -- int err = path_lookup(name, LOOKUP_FOLLOW, &nd); -- struct file *file = ERR_PTR(err); -+ int err; -+ struct file *file; -+ -+ intent_init(&nd.it, IT_OPEN, O_RDONLY); -+ err = path_lookup(name, LOOKUP_FOLLOW, &nd); -+ file = ERR_PTR(err); - - if (!err) { - struct inode *inode = nd.dentry->d_inode; -@@ -466,7 +473,7 @@ struct file *open_exec(const char *name) - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.it); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { ---- linux-2.5.73/fs/namei.c~vfs_intent_2.5.72_rev1 2003-06-22 12:32:41.000000000 -0600 -+++ linux-2.5.73-braam/fs/namei.c 2003-07-15 02:23:28.000000000 -0600 -@@ -263,8 +263,19 @@ int deny_write_access(struct file * file - return 0; - } - -+void intent_release(struct lookup_intent *it) -+{ -+ if (!it) -+ return; -+ if (it->it_magic != INTENT_MAGIC) -+ return; -+ if (it->it_op_release) -+ it->it_op_release(it); -+} -+ - void path_release(struct nameidata *nd) - { -+ intent_release(&nd->it); - dput(nd->dentry); - mntput(nd->mnt); - } -@@ -273,7 +284,7 @@ void path_release(struct nameidata *nd) - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags, struct nameidata *nd) - { - struct dentry * dentry = __d_lookup(parent, name); - -@@ -283,6 +294,14 @@ static struct dentry * cached_lookup(str - if (!dentry) - dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_nd) { -+ if (!dentry->d_op->d_revalidate_nd(dentry, flags, nd) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -336,7 +355,7 @@ ok: - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags, struct nameidata *nd) - { - struct dentry * result; - struct inode *dir = parent->d_inode; -@@ -361,7 +380,10 @@ static struct dentry * real_lookup(struc - struct dentry * dentry = d_alloc(parent, name); - result = ERR_PTR(-ENOMEM); - if (dentry) { -- result = dir->i_op->lookup(dir, dentry); -+ if (dir->i_op->lookup_it) -+ result = dir->i_op->lookup_it(dir, dentry, nd); -+ else -+ result = dir->i_op->lookup(dir, dentry); - if (result) - dput(dentry); - else -@@ -381,6 +403,12 @@ static struct dentry * real_lookup(struc - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate_nd) { -+ if (!result->d_op->d_revalidate_nd(result, flags, nd) && -+ !d_invalidate(result)) { -+ dput(result); -+ result = ERR_PTR(-ENOENT); -+ } - } - return result; - } -@@ -455,15 +483,25 @@ static int follow_mount(struct vfsmount - return res; - } - --static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry) -+static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry, -+ struct lookup_intent *it) - { - struct vfsmount *mounted; - - spin_lock(&dcache_lock); - mounted = lookup_mnt(*mnt, *dentry); - if (mounted) { -+ int opc = 0, mode = 0; - *mnt = mntget(mounted); - spin_unlock(&dcache_lock); -+ if (it) { -+ opc = it->it_op; -+ mode = it->it_mode; -+ } -+ if (it) { -+ it->it_op = opc; -+ it->it_mode = mode; -+ } - dput(*dentry); - mntput(mounted->mnt_parent); - *dentry = dget(mounted->mnt_root); -@@ -475,7 +513,7 @@ static inline int __follow_down(struct v - - int follow_down(struct vfsmount **mnt, struct dentry **dentry) - { -- return __follow_down(mnt,dentry); -+ return __follow_down(mnt,dentry,NULL); - } - - static inline void follow_dotdot(struct vfsmount **mnt, struct dentry **dentry) -@@ -531,7 +569,8 @@ static int do_lookup(struct nameidata *n - - if (!dentry) - goto need_lookup; -- if (dentry->d_op && dentry->d_op->d_revalidate) -+ if (dentry->d_op && (dentry->d_op->d_revalidate || -+ dentry->d_op->d_revalidate_nd) ) - goto need_revalidate; - done: - path->mnt = mnt; -@@ -539,13 +578,17 @@ done: - return 0; - - need_lookup: -- dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE, nd); - if (IS_ERR(dentry)) - goto fail; - goto done; - - need_revalidate: -- if (dentry->d_op->d_revalidate(dentry, flags)) -+ if (dentry->d_op->d_revalidate && -+ dentry->d_op->d_revalidate(dentry, flags)) -+ goto done; -+ if (dentry->d_op->d_revalidate_nd && -+ dentry->d_op->d_revalidate_nd(dentry, flags, nd)) - goto done; - if (d_invalidate(dentry)) - goto done; -@@ -556,6 +599,32 @@ fail: - return PTR_ERR(dentry); - } - -+ -+static int revalidate_special(struct nameidata *nd) -+{ -+ struct dentry *dentry = nd->dentry; -+ int err, counter = 0; -+ -+ if (!dentry->d_op || !dentry->d_op->d_revalidate_nd) -+ return 0; -+ revalidate_again: -+ if (!dentry->d_op->d_revalidate_nd(dentry, 0, nd)) { -+ struct dentry *new; -+ if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC))) -+ return err; -+ new = real_lookup(dentry->d_parent, &dentry->d_name, 0, nd); -+ d_invalidate(dentry); -+ dput(dentry); -+ dentry = new; -+ counter++; -+ if (counter < 10) -+ goto revalidate_again; -+ printk("excessive revalidate_it loops\n"); -+ return -ESTALE; -+ } -+ return 0; -+} -+ - /* - * Name resolution. - * -@@ -655,7 +724,9 @@ int link_path_walk(const char * name, st - - if (inode->i_op->follow_link) { - mntget(next.mnt); -+ nd->flags |= LOOKUP_LINK_NOTLAST; - err = do_follow_link(next.dentry, nd); -+ nd->flags &= ~LOOKUP_LINK_NOTLAST; - dput(next.dentry); - mntput(next.mnt); - if (err) -@@ -673,7 +744,7 @@ int link_path_walk(const char * name, st - nd->dentry = next.dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup_it) - break; - continue; - /* here ends the main loop */ -@@ -693,6 +764,11 @@ last_component: - inode = nd->dentry->d_inode; - /* fallthrough */ - case 1: -+ nd->flags |= LOOKUP_LAST; -+ err = revalidate_special(nd); -+ nd->flags &= ~LOOKUP_LAST; -+ if (err) -+ break; - goto return_base; - } - if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { -@@ -700,7 +776,9 @@ last_component: - if (err < 0) - break; - } -+ nd->flags |= LOOKUP_LAST; - err = do_lookup(nd, &this, &next, 0); -+ nd->flags &= ~LOOKUP_LAST; - if (err) - break; - follow_mount(&next.mnt, &next.dentry); -@@ -724,7 +802,8 @@ last_component: - break; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup_it)) - break; - } - goto return_base; -@@ -743,7 +822,7 @@ out_dput: - dput(next.dentry); - break; - } -- path_release(nd); -+ path_release(nd); - return_err: - return err; - } -@@ -866,7 +945,8 @@ int path_lookup(const char *name, unsign - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base, -+ struct nameidata *nd) - { - struct dentry * dentry; - struct inode *inode; -@@ -889,13 +969,16 @@ struct dentry * lookup_hash(struct qstr - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, nd); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; -- dentry = inode->i_op->lookup(inode, new); -+ if (inode->i_op->lookup_it) -+ dentry = inode->i_op->lookup_it(inode, new, nd); -+ else -+ dentry = inode->i_op->lookup(inode, new); - if (!dentry) - dentry = new; - else -@@ -906,7 +989,7 @@ out: - } - - /* SMP-safe */ --struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct nameidata *nd) - { - unsigned long hash; - struct qstr this; -@@ -926,11 +1009,16 @@ struct dentry * lookup_one_len(const cha - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash(&this, base, nd); - access: - return ERR_PTR(-EACCES); - } - -+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+{ -+ return lookup_one_len_it(name, base, len, NULL); -+} -+ - /* - * namei() - * -@@ -942,10 +1030,11 @@ access: - * that namei follows links, while lnamei does not. - * SMP-safe - */ --int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) -+int __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd) - { - char *tmp = getname(name); - int err = PTR_ERR(tmp); -+ - - if (!IS_ERR(tmp)) { - err = path_lookup(tmp, flags, nd); -@@ -954,6 +1043,12 @@ int __user_walk(const char __user *name, - return err; - } - -+int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) -+{ -+ intent_init(&nd->it, IT_LOOKUP, 0); -+ return __user_walk_it(name, flags, nd); -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -1097,6 +1192,32 @@ void unlock_rename(struct dentry *p1, st - } - } - -+int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) -+{ -+ int error = may_create(dir, dentry); -+ -+ if (error) -+ return error; -+ -+ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_nd)) -+ return -EACCES; /* shouldn't it be ENOSYS? */ -+ mode &= S_IALLUGO; -+ mode |= S_IFREG; -+ error = security_inode_create(dir, dentry, mode); -+ if (error) -+ return error; -+ DQUOT_INIT(dir); -+ if (dir->i_op->create_nd) -+ error = dir->i_op->create_nd(dir, dentry, mode, nd); -+ else -+ error = dir->i_op->create(dir, dentry, mode); -+ if (!error) { -+ inode_dir_notify(dir, DN_CREATE); -+ security_inode_post_create(dir, dentry, mode); -+ } -+ return error; -+} -+ - int vfs_create(struct inode *dir, struct dentry *dentry, int mode) - { - int error = may_create(dir, dentry); -@@ -1236,6 +1357,9 @@ int open_namei(const char * pathname, in - /* - * Create - we need to know the parent. - */ -+ nd->it.it_mode = mode; -+ nd->it.it_op |= IT_CREAT; -+ - error = path_lookup(pathname, LOOKUP_PARENT, nd); - if (error) - return error; -@@ -1251,7 +1375,9 @@ int open_namei(const char * pathname, in - - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ nd->flags |= LOOKUP_LAST; -+ dentry = lookup_hash(&nd->last, nd->dentry, nd); -+ nd->flags &= ~LOOKUP_LAST; - - do_last: - error = PTR_ERR(dentry); -@@ -1259,12 +1385,13 @@ do_last: - up(&dir->d_inode->i_sem); - goto exit; - } -- -+ -+ nd->it.it_mode = mode; - /* Negative dentry, just create the file */ - if (!dentry->d_inode) { - if (!IS_POSIXACL(dir->d_inode)) - mode &= ~current->fs->umask; -- error = vfs_create(dir->d_inode, dentry, mode); -+ error = vfs_create_it(dir->d_inode, dentry, mode, nd); - up(&dir->d_inode->i_sem); - dput(nd->dentry); - nd->dentry = dentry; -@@ -1289,7 +1416,7 @@ do_last: - error = -ELOOP; - if (flag & O_NOFOLLOW) - goto exit_dput; -- while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry)); -+ while (__follow_down(&nd->mnt,&dentry,&nd->it) && d_mountpoint(dentry)); - } - error = -ENOENT; - if (!dentry->d_inode) -@@ -1354,7 +1481,9 @@ do_link: - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ nd->flags |= LOOKUP_LAST; -+ dentry = lookup_hash(&nd->last, nd->dentry, nd); -+ nd->flags &= ~LOOKUP_LAST; - putname(nd->last.name); - goto do_last; - } -@@ -1368,7 +1497,7 @@ static struct dentry *lookup_create(stru - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash(&nd->last, nd->dentry, nd); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1600,7 +1729,7 @@ asmlinkage long sys_rmdir(const char __u - goto exit1; - } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash(&nd.last, nd.dentry, &nd); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1669,7 +1798,7 @@ asmlinkage long sys_unlink(const char __ - if (nd.last_type != LAST_NORM) - goto exit1; - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash(&nd.last, nd.dentry, &nd); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -2019,7 +2148,7 @@ static inline int do_rename(const char * - - trap = lock_rename(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash(&oldnd.last, old_dir, &oldnd); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -2039,7 +2168,7 @@ static inline int do_rename(const char * - error = -EINVAL; - if (old_dentry == trap) - goto exit4; -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash(&newnd.last, new_dir, &newnd); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; -@@ -2104,7 +2233,10 @@ static inline int - __vfs_follow_link(struct nameidata *nd, const char *link) - { - int res = 0; -+ struct lookup_intent it = nd->it; -+ int mode = it.it_mode; - char *name; -+ - if (IS_ERR(link)) - goto fail; - -@@ -2114,6 +2246,9 @@ __vfs_follow_link(struct nameidata *nd, - /* weird __emul_prefix() stuff did it */ - goto out; - } -+ -+ intent_init(&nd->it, it.it_op, it.it_flags); -+ nd->it.it_mode = mode; - res = link_path_walk(link, nd); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) ---- linux-2.5.73/fs/namespace.c~vfs_intent_2.5.72_rev1 2003-06-22 12:32:57.000000000 -0600 -+++ linux-2.5.73-braam/fs/namespace.c 2003-07-15 02:23:28.000000000 -0600 -@@ -728,6 +728,7 @@ long do_mount(char * dev_name, char * di - int retval = 0; - int mnt_flags = 0; - -+ intent_init(&nd.it, IT_LOOKUP, 0); - /* Discard magic */ - if ((flags & MS_MGC_MSK) == MS_MGC_VAL) - flags &= ~MS_MGC_MSK; -@@ -937,6 +938,7 @@ void set_fs_pwd(struct fs_struct *fs, st - mntput(old_pwdmnt); - } - } -+EXPORT_SYMBOL(set_fs_pwd); - - static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) - { ---- linux-2.5.73/fs/open.c~vfs_intent_2.5.72_rev1 2003-06-22 12:32:31.000000000 -0600 -+++ linux-2.5.73-braam/fs/open.c 2003-07-15 02:23:28.000000000 -0600 -@@ -200,7 +200,7 @@ static inline long do_sys_truncate(const - struct nameidata nd; - struct inode * inode; - int error; -- -+ intent_init(&nd.it, IT_GETATTR, 0); - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; -@@ -443,6 +443,7 @@ asmlinkage long sys_access(const char __ - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ intent_init(&nd.it, IT_GETATTR, 0); - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -474,6 +475,7 @@ asmlinkage long sys_access(const char __ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ - path_release(&nd); - } - -@@ -488,6 +490,7 @@ asmlinkage long sys_chdir(const char __u - { - struct nameidata nd; - int error; -+ intent_init(&nd.it, IT_GETATTR, 0); - - error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); - if (error) -@@ -539,6 +542,7 @@ asmlinkage long sys_chroot(const char __ - { - struct nameidata nd; - int error; -+ intent_init(&nd.it, IT_GETATTR, 0); - - error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); - if (error) -@@ -611,7 +615,7 @@ asmlinkage long sys_chmod(const char __u - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -- -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto dput_and_out; -@@ -719,25 +723,8 @@ asmlinkage long sys_fchown(unsigned int - * for the internal routines (ie open_namei()/follow_link() etc). 00 is - * used by symlinks. - */ --struct file *filp_open(const char * filename, int flags, int mode) --{ -- int namei_flags, error; -- struct nameidata nd; -- -- namei_flags = flags; -- if ((namei_flags+1) & O_ACCMODE) -- namei_flags++; -- if (namei_flags & O_TRUNC) -- namei_flags |= 2; -- -- error = open_namei(filename, namei_flags, mode, &nd); -- if (!error) -- return dentry_open(nd.dentry, nd.mnt, flags); -- -- return ERR_PTR(error); --} -- --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags, -+ struct lookup_intent *it) - { - struct file * f; - struct inode *inode; -@@ -749,6 +736,7 @@ struct file *dentry_open(struct dentry * - goto cleanup_dentry; - f->f_flags = flags; - f->f_mode = (flags+1) & O_ACCMODE; -+ f->f_it = it; - inode = dentry->d_inode; - if (f->f_mode & FMODE_WRITE) { - error = get_write_access(inode); -@@ -767,6 +755,7 @@ struct file *dentry_open(struct dentry * - error = f->f_op->open(inode,f); - if (error) - goto cleanup_all; -+ intent_release(it); - } - f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); - -@@ -778,7 +767,7 @@ struct file *dentry_open(struct dentry * - f = ERR_PTR(-EINVAL); - } - } -- -+ - return f; - - cleanup_all: -@@ -791,11 +780,42 @@ cleanup_all: - cleanup_file: - put_filp(f); - cleanup_dentry: -+ intent_release(it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); - } - -+struct file *filp_open(const char * filename, int flags, int mode) -+{ -+ int namei_flags, error; -+ struct file * temp_filp; -+ struct nameidata nd; -+ intent_init(&nd.it, IT_OPEN, flags); -+ -+ namei_flags = flags; -+ if ((namei_flags+1) & O_ACCMODE) -+ namei_flags++; -+ if (namei_flags & O_TRUNC) -+ namei_flags |= 2; -+ -+ error = open_namei(filename, namei_flags, mode, &nd); -+ if (!error) { -+ temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.it); -+ return temp_filp; -+ } -+ return ERR_PTR(error); -+} -+ -+ -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ struct lookup_intent it; -+ intent_init(&it, IT_LOOKUP, 0); -+ -+ return dentry_open_it(dentry, mnt, flags, &it); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ ---- linux-2.5.73/fs/stat.c~vfs_intent_2.5.72_rev1 2003-06-22 12:32:35.000000000 -0600 -+++ linux-2.5.73-braam/fs/stat.c 2003-07-15 02:23:28.000000000 -0600 -@@ -33,7 +33,7 @@ void generic_fillattr(struct inode *inod - stat->blksize = inode->i_blksize; - } - --int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) -+int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat) - { - struct inode *inode = dentry->d_inode; - int retval; -@@ -44,6 +44,8 @@ int vfs_getattr(struct vfsmount *mnt, st - - if (inode->i_op->getattr) - return inode->i_op->getattr(mnt, dentry, stat); -+ if (inode->i_op->getattr_it) -+ return inode->i_op->getattr_it(mnt, dentry, it, stat); - - generic_fillattr(inode, stat); - if (!stat->blksize) { -@@ -56,15 +58,21 @@ int vfs_getattr(struct vfsmount *mnt, st - return 0; - } - -+int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) -+{ -+ return vfs_getattr_it(mnt, dentry, NULL, stat); -+} -+ - int vfs_stat(char __user *name, struct kstat *stat) - { - struct nameidata nd; - int error; -+ intent_init(&nd.it, IT_GETATTR, 0); - -- error = user_path_walk(name, &nd); -+ error = user_path_walk_it(name, &nd); - if (!error) { -- error = vfs_getattr(nd.mnt, nd.dentry, stat); -- path_release(&nd); -+ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.it, stat); -+ path_release(&nd); - } - return error; - } -@@ -73,11 +81,12 @@ int vfs_lstat(char __user *name, struct - { - struct nameidata nd; - int error; -+ intent_init(&nd.it, IT_GETATTR, 0); - -- error = user_path_walk_link(name, &nd); -+ error = user_path_walk_link_it(name, &nd); - if (!error) { -- error = vfs_getattr(nd.mnt, nd.dentry, stat); -- path_release(&nd); -+ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.it, stat); -+ path_release(&nd); - } - return error; - } -@@ -86,9 +95,12 @@ int vfs_fstat(unsigned int fd, struct ks - { - struct file *f = fget(fd); - int error = -EBADF; -+ struct nameidata nd; -+ intent_init(&nd.it, IT_GETATTR, 0); - - if (f) { -- error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat); -+ error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.it, stat); -+ intent_release(&nd.it); - fput(f); - } - return error; ---- linux-2.5.73/include/linux/dcache.h~vfs_intent_2.5.72_rev1 2003-06-22 12:33:35.000000000 -0600 -+++ linux-2.5.73-braam/include/linux/dcache.h 2003-07-15 02:23:28.000000000 -0600 -@@ -4,6 +4,7 @@ - #ifdef __KERNEL__ - - #include -+#include - #include - #include - #include -@@ -12,6 +13,38 @@ - - struct vfsmount; - -+#define IT_OPEN (1) -+#define IT_CREAT (1<<1) -+#define IT_READDIR (1<<2) -+#define IT_GETATTR (1<<3) -+#define IT_LOOKUP (1<<4) -+#define IT_UNLINK (1<<5) -+#define IT_GETXATTR (1<<6) -+ -+struct nameidata; -+#define INTENT_MAGIC 0x19620323 -+struct lookup_intent { -+ int it_op; -+ int it_mode; -+ void (*it_op_release)(struct lookup_intent *); -+ int it_magic; -+ int it_flags; -+ int it_disposition; -+ int it_status; -+ struct iattr *it_iattr; -+ __u64 it_lock_handle[2]; -+ int it_lock_mode; -+ void *it_data; -+}; -+ -+static inline void intent_init(struct lookup_intent *it, int op, int flags) -+{ -+ memset(it, 0, sizeof(*it)); -+ it->it_magic = INTENT_MAGIC; -+ it->it_op = op; -+ it->it_flags = flags; -+} -+ - /* - * linux/include/linux/dcache.h - * -@@ -34,6 +67,8 @@ struct qstr { - char name_str[0]; - }; - -+#include -+ - struct dentry_stat_t { - int nr_dentry; - int nr_unused; -@@ -112,6 +147,7 @@ struct dentry_operations { - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate_nd)(struct dentry *, int, struct nameidata *); - }; - - /* the dentry parameter passed to d_hash and d_compare is the parent ---- linux-2.5.73/include/linux/fs.h~vfs_intent_2.5.72_rev1 2003-06-22 12:32:38.000000000 -0600 -+++ linux-2.5.73-braam/include/linux/fs.h 2003-07-15 02:23:28.000000000 -0600 -@@ -237,6 +237,8 @@ typedef int (get_blocks_t)(struct inode - #define ATTR_ATTR_FLAG 1024 - #define ATTR_KILL_SUID 2048 - #define ATTR_KILL_SGID 4096 -+#define ATTR_RAW 8192 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 16384 /* called from open path, ie O_TRUNC */ - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -445,6 +447,7 @@ struct file { - /* Used by fs/eventpoll.c to link all the hooks to this file */ - struct list_head f_ep_links; - spinlock_t f_ep_lock; -+ struct lookup_intent *f_it; - }; - extern spinlock_t files_lock; - #define file_list_lock() spin_lock(&files_lock); -@@ -731,7 +734,10 @@ struct file_operations { - - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); -+ int (*create_nd) (struct inode *,struct dentry *,int, struct nameidata *); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup_it) (struct inode *,struct dentry *, -+ struct nameidata *); - int (*link) (struct dentry *,struct inode *,struct dentry *); - int (*unlink) (struct inode *,struct dentry *); - int (*symlink) (struct inode *,struct dentry *,const char *); -@@ -745,7 +751,9 @@ struct inode_operations { - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); -+ int (*getattr_it) (struct vfsmount *, struct dentry *, struct lookup_intent *, struct kstat *); - int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); - ssize_t (*listxattr) (struct dentry *, char *, size_t); -@@ -958,6 +966,7 @@ extern int register_filesystem(struct fi - extern int unregister_filesystem(struct file_system_type *); - extern struct vfsmount *kern_mount(struct file_system_type *); - extern int may_umount(struct vfsmount *); -+struct vfsmount *do_kern_mount(const char *type, int flags, const char *name, void *data); - extern long do_mount(char *, char *, char *, unsigned long, void *); - - extern int vfs_statfs(struct super_block *, struct kstatfs *); -@@ -1025,6 +1034,7 @@ extern int do_truncate(struct dentry *, - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -+extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char __user *); - ---- linux-2.5.73/include/linux/namei.h~vfs_intent_2.5.72_rev1 2003-06-22 12:32:40.000000000 -0600 -+++ linux-2.5.73-braam/include/linux/namei.h 2003-07-15 02:23:28.000000000 -0600 -@@ -11,6 +11,7 @@ struct nameidata { - struct qstr last; - unsigned int flags; - int last_type; -+ struct lookup_intent it; - }; - - /* -@@ -27,24 +28,32 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA - * - locked when lookup done with dcache_lock held - */ - #define LOOKUP_FOLLOW 1 --#define LOOKUP_DIRECTORY 2 --#define LOOKUP_CONTINUE 4 --#define LOOKUP_PARENT 16 --#define LOOKUP_NOALT 32 -+#define LOOKUP_DIRECTORY (1<<1) -+#define LOOKUP_CONTINUE (1<<2) -+#define LOOKUP_PARENT (1<<3) -+#define LOOKUP_NOALT (1<<4) -+#define LOOKUP_LAST (1<<5) -+#define LOOKUP_LINK_NOTLAST (1<<6) - - - extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)); - #define user_path_walk(name,nd) \ - __user_walk(name, LOOKUP_FOLLOW, nd) - #define user_path_walk_link(name,nd) \ - __user_walk(name, 0, nd) -+#define user_path_walk_it(name,nd) \ -+ __user_walk_it(name, LOOKUP_FOLLOW, nd) -+#define user_path_walk_link_it(name,nd) \ -+ __user_walk_it(name, 0, nd) -+extern void intent_release(struct lookup_intent *); - extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); - extern int FASTCALL(path_walk(const char *, struct nameidata *)); - extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); - extern void path_release(struct nameidata *); - - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); --extern struct dentry * lookup_hash(struct qstr *, struct dentry *); -+extern struct dentry * lookup_hash(struct qstr *, struct dentry *, struct nameidata *); - - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); ---- linux-2.5.73/kernel/ksyms.c~vfs_intent_2.5.72_rev1 2003-07-15 02:21:56.000000000 -0600 -+++ linux-2.5.73-braam/kernel/ksyms.c 2003-07-16 16:36:31.000000000 -0600 -@@ -377,6 +377,7 @@ EXPORT_SYMBOL(unregister_filesystem); - EXPORT_SYMBOL(kern_mount); - EXPORT_SYMBOL(__mntput); - EXPORT_SYMBOL(may_umount); -+EXPORT_SYMBOL(reparent_to_init); - - /* executable format registration */ - EXPORT_SYMBOL(register_binfmt); -@@ -406,6 +407,12 @@ EXPORT_SYMBOL(del_timer); - EXPORT_SYMBOL(request_irq); - EXPORT_SYMBOL(free_irq); - -+/* lustre */ -+EXPORT_SYMBOL(do_kern_mount); -+EXPORT_SYMBOL(exit_files); -+//EXPORT_SYMBOL(kmem_cache_validate); -+ -+ - /* waitqueue handling */ - EXPORT_SYMBOL(add_wait_queue); - EXPORT_SYMBOL(add_wait_queue_exclusive); -@@ -551,6 +558,7 @@ EXPORT_SYMBOL(sys_tz); - EXPORT_SYMBOL(file_fsync); - EXPORT_SYMBOL(fsync_buffers_list); - EXPORT_SYMBOL(clear_inode); -+EXPORT_SYMBOL(__iget); - EXPORT_SYMBOL(init_special_inode); - EXPORT_SYMBOL(new_inode); - EXPORT_SYMBOL(__insert_inode_hash); ---- linux-2.5.73/net/unix/af_unix.c~vfs_intent_2.5.72_rev1 2003-06-22 12:32:57.000000000 -0600 -+++ linux-2.5.73-braam/net/unix/af_unix.c 2003-07-15 02:23:28.000000000 -0600 -@@ -702,7 +702,7 @@ static int unix_bind(struct socket *sock - /* - * Do the final lookup. - */ -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash(&nd.last, nd.dentry, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out_mknod_unlock; ---- linux-2.5.73/net/sunrpc/rpc_pipe.c~vfs_intent_2.5.72_rev1 2003-06-22 12:32:39.000000000 -0600 -+++ linux-2.5.73-braam/net/sunrpc/rpc_pipe.c 2003-07-15 02:23:28.000000000 -0600 -@@ -598,7 +598,7 @@ rpc_lookup_negative(char *path, struct n - return ERR_PTR(error); - dir = nd->dentry->d_inode; - down(&dir->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash(&nd->last, nd->dentry, NULL); - if (IS_ERR(dentry)) - goto out_err; - if (dentry->d_inode) { -@@ -660,7 +660,7 @@ rpc_rmdir(char *path) - return error; - dir = nd.dentry->d_inode; - down(&dir->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash(&nd.last, nd.dentry, NULL); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); - goto out_release; -@@ -721,7 +721,7 @@ rpc_unlink(char *path) - return error; - dir = nd.dentry->d_inode; - down(&dir->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash(&nd.last, nd.dentry, NULL); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); - goto out_release; - -_ diff --git a/lustre/kernel_patches/patches/vfs_nointent_2.5.69_rev1.patch b/lustre/kernel_patches/patches/vfs_nointent_2.5.69_rev1.patch deleted file mode 100644 index e2b086b..0000000 --- a/lustre/kernel_patches/patches/vfs_nointent_2.5.69_rev1.patch +++ /dev/null @@ -1,409 +0,0 @@ - fs/exec.c | 2 - - fs/namei.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++---- - fs/open.c | 73 ++++++++++++++++++++++++++++++++++++++++++++------- - include/linux/fs.h | 9 +++++- - 4 files changed, 142 insertions(+), 17 deletions(-) - ---- uml-2.5/fs/namei.c~vfs_nointent_2.5.69_rev1 2003-06-20 00:04:07.000000000 -0600 -+++ uml-2.5-braam/fs/namei.c 2003-06-20 06:22:37.000000000 -0600 -@@ -1279,7 +1279,7 @@ int may_open(struct nameidata *nd, int a - if (!error) { - DQUOT_INIT(inode); - -- error = do_truncate(dentry, 0); -+ error = do_truncate(dentry, 0, 1); - } - put_write_access(inode); - if (error) -@@ -1517,6 +1517,7 @@ asmlinkage long sys_mknod(const char __u - char * tmp; - struct dentry * dentry; - struct nameidata nd; -+ intent_init(&nd.it, IT_LOOKUP, 0); - - if (S_ISDIR(mode)) - return -EPERM; -@@ -1527,6 +1528,15 @@ asmlinkage long sys_mknod(const char __u - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -+ -+ if (nd.dentry->d_inode->i_op->mknod_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod_raw(&nd, mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ - dentry = lookup_create(&nd, 0); - error = PTR_ERR(dentry); - -@@ -1549,6 +1559,7 @@ asmlinkage long sys_mknod(const char __u - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1590,10 +1601,18 @@ asmlinkage long sys_mkdir(const char __u - if (!IS_ERR(tmp)) { - struct dentry *dentry; - struct nameidata nd; -+ intent_init(&nd.it, IT_LOOKUP, 0); - - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -+ if (nd.dentry->d_inode->i_op->mkdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir_raw(&nd, mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } - dentry = lookup_create(&nd, 1); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { -@@ -1603,6 +1622,7 @@ asmlinkage long sys_mkdir(const char __u - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1683,6 +1703,7 @@ asmlinkage long sys_rmdir(const char __u - char * name; - struct dentry *dentry; - struct nameidata nd; -+ intent_init(&nd.it, IT_LOOKUP, 0); - - name = getname(pathname); - if(IS_ERR(name)) -@@ -1703,6 +1724,16 @@ asmlinkage long sys_rmdir(const char __u - error = -EBUSY; - goto exit1; - } -+ -+ if (nd.dentry->d_inode->i_op->rmdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ error = op->rmdir_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } -+ - down(&nd.dentry->d_inode->i_sem); - dentry = lookup_hash(&nd.last, nd.dentry, &nd); - error = PTR_ERR(dentry); -@@ -1759,6 +1790,7 @@ asmlinkage long sys_unlink(const char __ - struct dentry *dentry; - struct nameidata nd; - struct inode *inode = NULL; -+ intent_init(&nd.it, IT_LOOKUP, 0); - - name = getname(pathname); - if(IS_ERR(name)) -@@ -1770,6 +1802,13 @@ asmlinkage long sys_unlink(const char __ - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); - dentry = lookup_hash(&nd.last, nd.dentry, &nd); - error = PTR_ERR(dentry); -@@ -1837,10 +1876,18 @@ asmlinkage long sys_symlink(const char _ - if (!IS_ERR(to)) { - struct dentry *dentry; - struct nameidata nd; -+ intent_init(&nd.it, IT_LOOKUP, 0); - - error = path_lookup(to, LOOKUP_PARENT, &nd); - if (error) - goto out; -+ if (nd.dentry->d_inode->i_op->symlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink_raw(&nd, from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } - dentry = lookup_create(&nd, 0); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { -@@ -1848,6 +1895,7 @@ asmlinkage long sys_symlink(const char _ - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(to); -@@ -1911,6 +1959,8 @@ asmlinkage long sys_link(const char __us - struct nameidata nd, old_nd; - int error; - char * to; -+ intent_init(&nd.it, IT_LOOKUP, 0); -+ intent_init(&old_nd.it, IT_LOOKUP, 0); - - to = getname(newname); - if (IS_ERR(to)) -@@ -1925,6 +1975,13 @@ asmlinkage long sys_link(const char __us - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -+ if (nd.dentry->d_inode->i_op->link_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link_raw(&old_nd, &nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } - new_dentry = lookup_create(&nd, 0); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { -@@ -1975,7 +2032,7 @@ exit: - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error = 0; - struct inode *target; -@@ -2020,7 +2077,7 @@ int vfs_rename_dir(struct inode *old_dir - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - struct inode *target; - int error; -@@ -2097,6 +2154,8 @@ static inline int do_rename(const char * - struct dentry * old_dentry, *new_dentry; - struct dentry * trap; - struct nameidata oldnd, newnd; -+ intent_init(&oldnd.it, IT_LOOKUP, 0); -+ intent_init(&newnd.it, IT_LOOKUP, 0); - - error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); - if (error) -@@ -2119,6 +2178,13 @@ static inline int do_rename(const char * - if (newnd.last_type != LAST_NORM) - goto exit2; - -+ if (old_dir->d_inode->i_op->rename_raw) { -+ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit2; -+ } -+ - trap = lock_rename(new_dir, old_dir); - - old_dentry = lookup_hash(&oldnd.last, old_dir, &oldnd); -@@ -2150,8 +2216,7 @@ static inline int do_rename(const char * - if (new_dentry == trap) - goto exit5; - -- error = vfs_rename(old_dir->d_inode, old_dentry, -- new_dir->d_inode, new_dentry); -+ error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); - exit5: - dput(new_dentry); - exit4: ---- uml-2.5/fs/open.c~vfs_nointent_2.5.69_rev1 2003-06-18 21:42:57.000000000 -0600 -+++ uml-2.5-braam/fs/open.c 2003-06-20 06:22:37.000000000 -0600 -@@ -75,9 +75,10 @@ out: - return error; - } - --int do_truncate(struct dentry *dentry, loff_t length) -+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) - { - int err; -+ struct inode_operations *op = dentry->d_inode->i_op; - struct iattr newattrs; - - /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ -@@ -87,7 +88,14 @@ int do_truncate(struct dentry *dentry, l - newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; - down(&dentry->d_inode->i_sem); -- err = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ newattrs.ia_ctime = CURRENT_TIME; -+ err = op->setattr_raw(dentry->d_inode, &newattrs); -+ } else -+ err = notify_change(dentry, &newattrs); - up(&dentry->d_inode->i_sem); - return err; - } -@@ -142,7 +150,7 @@ static inline long do_sys_truncate(const - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length); -+ error = do_truncate(nd.dentry, length, 0); - } - put_write_access(inode); - -@@ -194,7 +202,7 @@ static inline long do_sys_ftruncate(unsi - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length); -+ error = do_truncate(dentry, length, 0); - out_putf: - fput(file); - out: -@@ -265,9 +273,19 @@ asmlinkage long sys_utime(char __user * - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; - } -- down(&inode->i_sem); -- error = notify_change(nd.dentry, &newattrs); -- up(&inode->i_sem); -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } else { -+ down(&inode->i_sem); -+ error = notify_change(nd.dentry, &newattrs); -+ up(&inode->i_sem); -+ } - dput_and_out: - path_release(&nd); - out: -@@ -310,9 +328,19 @@ long do_utimes(char __user * filename, s - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; - } -- down(&inode->i_sem); -- error = notify_change(nd.dentry, &newattrs); -- up(&inode->i_sem); -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } else { -+ down(&inode->i_sem); -+ error = notify_change(nd.dentry, &newattrs); -+ up(&inode->i_sem); -+ } - dput_and_out: - path_release(&nd); - out: -@@ -513,6 +541,18 @@ asmlinkage long sys_chmod(const char __u - if (IS_RDONLY(inode)) - goto dput_and_out; - -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto dput_and_out; -@@ -546,6 +586,18 @@ static int chown_common(struct dentry * - if (IS_RDONLY(inode)) - goto out; - error = -EPERM; -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; - newattrs.ia_valid = ATTR_CTIME; -@@ -559,6 +611,7 @@ static int chown_common(struct dentry * - } - if (!S_ISDIR(inode->i_mode)) - newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; -+ - down(&inode->i_sem); - error = notify_change(dentry, &newattrs); - up(&inode->i_sem); ---- uml-2.5/include/linux/fs.h~vfs_nointent_2.5.69_rev1 2003-06-18 21:40:58.000000000 -0600 -+++ uml-2.5-braam/include/linux/fs.h 2003-06-20 06:22:37.000000000 -0600 -@@ -738,13 +738,20 @@ struct inode_operations { - struct dentry * (*lookup_it) (struct inode *,struct dentry *, - struct nameidata *); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link_raw) (struct nameidata *,struct nameidata *); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink_raw) (struct nameidata *); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink_raw) (struct nameidata *,const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir_raw) (struct nameidata *,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir_raw) (struct nameidata *); - int (*mknod) (struct inode *,struct dentry *,int,dev_t); -+ int (*mknod_raw) (struct nameidata *,int,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename_raw) (struct nameidata *, struct nameidata *); - int (*readlink) (struct dentry *, char __user *,int); - int (*follow_link) (struct dentry *, struct nameidata *); - void (*truncate) (struct inode *); -@@ -1029,7 +1036,7 @@ static inline int break_lease(struct ino - - asmlinkage long sys_open(const char *, int, int); - asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ --extern int do_truncate(struct dentry *, loff_t start); -+extern int do_truncate(struct dentry *, loff_t start, int called_from_open); - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); ---- uml-2.5/fs/exec.c~vfs_nointent_2.5.69_rev1 2003-06-04 21:29:14.000000000 -0600 -+++ uml-2.5-braam/fs/exec.c 2003-06-20 06:22:37.000000000 -0600 -@@ -1358,7 +1358,7 @@ int do_coredump(long signr, int exit_cod - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0) != 0) -+ if (do_truncate(file->f_dentry, 0, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); - -_ diff --git a/lustre/kernel_patches/pc/ext3-noread-inode.pc b/lustre/kernel_patches/pc/ext3-noread-inode.pc deleted file mode 100644 index 9c3cea8..0000000 --- a/lustre/kernel_patches/pc/ext3-noread-inode.pc +++ /dev/null @@ -1,3 +0,0 @@ -fs/ext3/ialloc.c -fs/ext3/inode.c -include/linux/ext3_fs.h diff --git a/lustre/kernel_patches/pc/inode-protection-from-pdflush.pc b/lustre/kernel_patches/pc/inode-protection-from-pdflush.pc deleted file mode 100644 index c1b89a1..0000000 --- a/lustre/kernel_patches/pc/inode-protection-from-pdflush.pc +++ /dev/null @@ -1,2 +0,0 @@ -fs/fs-writeback.c -include/linux/fs.h diff --git a/lustre/kernel_patches/pc/kexec-2.5.73-full.pc b/lustre/kernel_patches/pc/kexec-2.5.73-full.pc deleted file mode 100644 index 939f3d4..0000000 --- a/lustre/kernel_patches/pc/kexec-2.5.73-full.pc +++ /dev/null @@ -1,23 +0,0 @@ -MAINTAINERS -arch/i386/Kconfig -arch/i386/defconfig -arch/i386/kernel/Makefile -arch/i386/kernel/apic.c -arch/i386/kernel/dmi_scan.c -arch/i386/kernel/entry.S -arch/i386/kernel/i8259.c -arch/i386/kernel/io_apic.c -arch/i386/kernel/machine_kexec.c -arch/i386/kernel/reboot.c -arch/i386/kernel/relocate_kernel.S -arch/i386/kernel/smp.c -fs/aio.c -include/asm-i386/apic.h -include/asm-i386/apicdef.h -include/asm-i386/kexec.h -include/asm-i386/unistd.h -include/linux/kexec.h -include/linux/reboot.h -kernel/Makefile -kernel/kexec.c -kernel/sys.c diff --git a/lustre/kernel_patches/pc/kgdb-ga-2.5.73.pc b/lustre/kernel_patches/pc/kgdb-ga-2.5.73.pc deleted file mode 100644 index 1d99524..0000000 --- a/lustre/kernel_patches/pc/kgdb-ga-2.5.73.pc +++ /dev/null @@ -1,28 +0,0 @@ -arch/i386/Kconfig -arch/i386/kernel/entry.S -arch/i386/kernel/kgdb_stub.c -arch/i386/kernel/Makefile -arch/i386/kernel/nmi.c -arch/i386/kernel/smp.c -arch/i386/kernel/traps.c -arch/i386/lib/kgdb_serial.c -arch/i386/lib/Makefile -arch/i386/Makefile -arch/i386/mm/fault.c -Documentation/i386/kgdb/andthen -Documentation/i386/kgdb/debug-nmi.txt -Documentation/i386/kgdb/gdb-globals.txt -Documentation/i386/kgdb/gdbinit -Documentation/i386/kgdb/gdbinit.hw -Documentation/i386/kgdb/gdbinit-modules -Documentation/i386/kgdb/kgdb.txt -Documentation/i386/kgdb/loadmodule.sh -drivers/char/keyboard.c -drivers/char/sysrq.c -drivers/serial/8250.c -include/asm-i386/bugs.h -include/asm-i386/kgdb.h -include/asm-i386/kgdb_local.h -include/linux/config.h -kernel/sched.c -MAINTAINERS diff --git a/lustre/kernel_patches/pc/kgdb-ga-docco-fixes-2.5.73.pc b/lustre/kernel_patches/pc/kgdb-ga-docco-fixes-2.5.73.pc deleted file mode 100644 index 87e2eca..0000000 --- a/lustre/kernel_patches/pc/kgdb-ga-docco-fixes-2.5.73.pc +++ /dev/null @@ -1 +0,0 @@ -Documentation/i386/kgdb/kgdb.txt diff --git a/lustre/kernel_patches/pc/kgdb-use-ggdb-2.5.73.pc b/lustre/kernel_patches/pc/kgdb-use-ggdb-2.5.73.pc deleted file mode 100644 index 43f4115..0000000 --- a/lustre/kernel_patches/pc/kgdb-use-ggdb-2.5.73.pc +++ /dev/null @@ -1 +0,0 @@ -arch/i386/Makefile diff --git a/lustre/kernel_patches/pc/lkcd-cvs-2.5.69.pc b/lustre/kernel_patches/pc/lkcd-cvs-2.5.69.pc deleted file mode 100644 index 2799a8e..0000000 --- a/lustre/kernel_patches/pc/lkcd-cvs-2.5.69.pc +++ /dev/null @@ -1,19 +0,0 @@ -drivers/dump/Makefile -drivers/dump/dump_blockdev.c -drivers/dump/dump_execute.c -drivers/dump/dump_filters.c -drivers/dump/dump_fmt.c -drivers/dump/dump_gzip.c -drivers/dump/dump_i386.c -drivers/dump/dump_memdev.c -drivers/dump/dump_netdev.c -drivers/dump/dump_overlay.c -drivers/dump/dump_rle.c -drivers/dump/dump_scheme.c -drivers/dump/dump_setup.c -include/linux/dumpdev.h -include/linux/dump.h -include/linux/dump_netdev.h -include/asm-i386/dump.h -init/kerntypes.c -drivers/dump/dump_methods.h diff --git a/lustre/kernel_patches/pc/lkcd-kernel-changes-2.5.73.pc b/lustre/kernel_patches/pc/lkcd-kernel-changes-2.5.73.pc deleted file mode 100644 index 722bb20..0000000 --- a/lustre/kernel_patches/pc/lkcd-kernel-changes-2.5.73.pc +++ /dev/null @@ -1,25 +0,0 @@ -drivers/Makefile -include/linux/major.h -include/linux/sysctl.h -include/asm-i386/mach-default/irq_vectors.h -include/asm-i386/kmap_types.h -include/asm-i386/smp.h -arch/i386/kernel/i386_ksyms.c -arch/i386/kernel/nmi.c -arch/i386/kernel/setup.c -arch/i386/kernel/smp.c -arch/i386/kernel/traps.c -arch/i386/mm/init.c -arch/i386/boot/Makefile -arch/i386/Kconfig -arch/s390/boot/Makefile -arch/s390/boot/install.sh -scripts/mkcompile_h -kernel/ksyms.c -kernel/panic.c -kernel/sched.c -lib/Kconfig -mm/page_alloc.c -init/Makefile -init/main.c -init/version.c diff --git a/lustre/kernel_patches/pc/vfs_intent_2.5.72_rev1.pc b/lustre/kernel_patches/pc/vfs_intent_2.5.72_rev1.pc deleted file mode 100644 index 24a4603..0000000 --- a/lustre/kernel_patches/pc/vfs_intent_2.5.72_rev1.pc +++ /dev/null @@ -1,14 +0,0 @@ -fs/sysfs/inode.c -fs/dcache.c -fs/exec.c -fs/xattr.c -fs/namei.c -fs/namespace.c -fs/open.c -fs/stat.c -include/linux/dcache.h -include/linux/fs.h -include/linux/namei.h -kernel/ksyms.c -net/unix/af_unix.c -net/sunrpc/rpc_pipe.c diff --git a/lustre/kernel_patches/pc/vfs_nointent_2.5.69_rev1.pc b/lustre/kernel_patches/pc/vfs_nointent_2.5.69_rev1.pc deleted file mode 100644 index 2849da1..0000000 --- a/lustre/kernel_patches/pc/vfs_nointent_2.5.69_rev1.pc +++ /dev/null @@ -1,4 +0,0 @@ -fs/namei.c -fs/open.c -include/linux/fs.h -fs/exec.c diff --git a/lustre/ldlm/Makefile.mk b/lustre/ldlm/Makefile.mk index 04402a7..650331e 100644 --- a/lustre/ldlm/Makefile.mk +++ b/lustre/ldlm/Makefile.mk @@ -7,4 +7,4 @@ include $(src)/../portals/Kernelenv obj-y += ldlm.o ldlm-objs := l_lock.o ldlm_lock.o ldlm_resource.o ldlm_extent.o ldlm_request.o \ - ldlm_lockd.o ldlm_lib.o + ldlm_lockd.o ldlm_lib.o ldlm_flock.o ldlm_plain.o diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 848893b..435ef48 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -79,7 +79,8 @@ int ll_set_opt(const char *opt, char *data, int fl) RETURN(fl); } -void ll_options(char *options, char **ost, char **mds, int *flags) +void ll_options(char *options, char **ost, char **mdc, char **profile, + char **mds_uuid, int *flags) { char *this_char; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) @@ -102,7 +103,11 @@ void ll_options(char *options, char **ost, char **mds, int *flags) CDEBUG(D_SUPER, "this_char %s\n", this_char); if (!*ost && (*ost = ll_read_opt("osc", this_char))) continue; - if (!*mds && (*mds = ll_read_opt("mdc", this_char))) + if (!*mdc && (*mdc = ll_read_opt("mdc", this_char))) + continue; + if (!*profile && (*profile = ll_read_opt("profile", this_char))) + continue; + if (!*mds_uuid && (*mds_uuid = ll_read_opt("mds_uuid", this_char))) continue; if (!(*flags & LL_SBI_NOLCK) && ((*flags) = (*flags) | @@ -120,6 +125,145 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; } +int ll_process_log_rec(struct llog_rec_hdr *rec, void *data) +{ + struct ll_sb_info * sbi = data; + int cfg_len = rec->lrh_len; + char *cfg_buf = (char*) (rec + 1); + int rc = 0; + + if (rec->lrh_type == OBD_CFG_REC) { + char *buf; + struct lustre_cfg *lcfg; + char *old_name = NULL; + int old_len = 0; + char *old_uuid = NULL; + int old_uuid_len = 0; + char *inst_name = NULL; + int inst_len = 0; + + rc = lustre_cfg_getdata(&buf, cfg_len, cfg_buf, 1); + if (rc) + GOTO(out, rc); + lcfg = (struct lustre_cfg* ) buf; + + if (sbi && lcfg->lcfg_dev_name) { + inst_len = strlen(lcfg->lcfg_dev_name) + + strlen(sbi->ll_instance) + 2; + OBD_ALLOC(inst_name, inst_len); + sprintf(inst_name, "%s-%s", lcfg->lcfg_dev_name, + sbi->ll_instance); + old_name = lcfg->lcfg_dev_name; + old_len = lcfg->lcfg_dev_namelen; + lcfg->lcfg_dev_name = inst_name; + lcfg->lcfg_dev_namelen = strlen(inst_name) + 1; + } + + if (sbi && lcfg->lcfg_command == LCFG_ATTACH) { + old_uuid = lcfg->lcfg_inlbuf2; + old_uuid_len = lcfg->lcfg_inllen2; + + lcfg->lcfg_inlbuf2 = (char*)&sbi->ll_sb_uuid.uuid; + lcfg->lcfg_inllen2 = sizeof(sbi->ll_sb_uuid); + } + + rc = class_process_config(lcfg); + + if (old_name) { + lcfg->lcfg_dev_name = old_name; + lcfg->lcfg_dev_namelen = old_len; + OBD_FREE(inst_name, inst_len); + } + + if (old_uuid) { + lcfg->lcfg_inlbuf2 = old_uuid; + lcfg->lcfg_inllen2 = old_uuid_len; + } + + lustre_cfg_freedata(buf, cfg_len); + } else if (rec->lrh_type == PTL_CFG_REC) { + rc = kportal_nal_cmd((struct portals_cfg *)cfg_buf); + } +out: + RETURN(rc); +} + + +int ll_process_log(char *mds, char *config, void * instance) +{ + struct lustre_cfg lcfg; + int dev; + struct obd_device *obd; + struct lustre_handle mdc_conn = {0, }; + struct obd_export *exp; + struct obd_uuid uuid = { "MDC_mount_UUID" }; + int rc = 0; + int err; + + lcfg.lcfg_command = LCFG_ATTACH; + lcfg.lcfg_dev_name = "mdc_dev"; + lcfg.lcfg_dev_namelen = strlen(lcfg.lcfg_dev_name) + 1; + lcfg.lcfg_inlbuf1 = "mdc"; + lcfg.lcfg_inllen1 = strlen(lcfg.lcfg_inlbuf1) + 1; + lcfg.lcfg_inlbuf2 = "mdc_dev_UUID"; + lcfg.lcfg_inllen2 = strlen(lcfg.lcfg_inlbuf2) + 1; + dev = class_attach(&lcfg); + if (dev < 0) + GOTO(out, err = dev); + + obd = class_name2obd("mdc_dev"); + if (obd == NULL) + GOTO(out, err = -EINVAL); + + memset(&lcfg, 0, sizeof(lcfg)); + + lcfg.lcfg_command = LCFG_SETUP; + lcfg.lcfg_dev_name = "mdc_dev"; + lcfg.lcfg_dev_namelen = strlen(lcfg.lcfg_dev_name) + 1; + lcfg.lcfg_inlbuf1 = mds; + lcfg.lcfg_inllen1 = strlen(lcfg.lcfg_inlbuf1) + 1; + lcfg.lcfg_inlbuf2 = "NET_mds_facet_tcp_UUID"; + lcfg.lcfg_inllen2 = strlen(lcfg.lcfg_inlbuf2) + 1; + err = class_setup(obd, &lcfg); + if (err < 0) + GOTO(out, err); + + err = obd_connect(&mdc_conn, obd, &uuid); + if (err) { + CERROR("cannot connect to %s: rc = %d\n", mds, err); + GOTO(out, err); + } + + exp = class_conn2export(&mdc_conn); + rc = mdc_llog_process(exp, config, instance, ll_process_log_rec); + if (rc) { + CERROR("mdc_llog_process failed: rc = %d\n", err); + } + + err = obd_disconnect(exp, 0); + + memset(&lcfg, 0, sizeof(lcfg)); + lcfg.lcfg_command = LCFG_CLEANUP; + lcfg.lcfg_dev_name = "mdc_dev"; + err = class_cleanup(obd, &lcfg); + if (err < 0) + GOTO(out, err); + + memset(&lcfg, 0, sizeof(lcfg)); + lcfg.lcfg_command = LCFG_DETACH; + lcfg.lcfg_dev_name = "mdc_dev"; + err = class_detach(obd, &lcfg); + if (err < 0) + GOTO(out, err); + + +out: + if (rc == 0) + rc = err; + + RETURN(rc); +} + int ll_fill_super(struct super_block *sb, void *data, int silent) { struct inode *root = 0; @@ -127,6 +271,8 @@ int ll_fill_super(struct super_block *sb, void *data, int silent) struct ll_sb_info *sbi; char *osc = NULL; char *mdc = NULL; + char *mds_uuid = NULL; + char *profile = NULL; int err; struct ll_fid rootfid; struct obd_statfs osfs; @@ -135,6 +281,7 @@ int ll_fill_super(struct super_block *sb, void *data, int silent) struct lustre_handle mdc_conn = {0, }; struct lustre_md md; class_uuid_t uuid; + kdev_t devno; ENTRY; @@ -151,7 +298,58 @@ int ll_fill_super(struct super_block *sb, void *data, int silent) class_uuid_unparse(uuid, &sbi->ll_sb_uuid); sbi->ll_flags |= LL_SBI_READAHEAD; - ll_options(data, &osc, &mdc, &sbi->ll_flags); + ll_options(data, &osc, &mdc, &profile, &mds_uuid, &sbi->ll_flags); + + if (profile) { + struct lustre_profile *lprof; + int len; + + if (!mds_uuid) { + CERROR("no mds_uuid\n"); + GOTO(out_free, err = -EINVAL); + } + + /* save these so we can cleanup later */ + obd_str2uuid(&sbi->ll_mds_uuid, mds_uuid); + + len = strlen(profile) + 1; + OBD_ALLOC(sbi->ll_profile, len); + if (sbi->ll_profile == NULL) + GOTO(out_free, err = -ENOMEM); + memcpy(sbi->ll_profile, profile, len); + + /* generate a string unique to this super, let's try + the address of the super itself.*/ + len = (sizeof(sb) * 2) + 1; + OBD_ALLOC(sbi->ll_instance, len); + if (sbi->ll_instance == NULL) + GOTO(out_free, err = -ENOMEM); + sprintf(sbi->ll_instance, "%p", sb); + + err = ll_process_log(mds_uuid, profile, sbi); + if (err < 0) { + CERROR("Unable to process log: %s\n", profile); + + GOTO(out_free, err); + } + + lprof = class_get_profile(profile); + if (lprof == NULL) { + CERROR("No profile found: %s\n", profile); + GOTO(out_free, err = -EINVAL); + } + if (osc) + OBD_FREE(osc, strlen(osc) + 1); + OBD_ALLOC(osc, strlen(lprof->lp_osc) + + strlen(sbi->ll_instance) + 2); + sprintf(osc, "%s-%s", lprof->lp_osc, sbi->ll_instance); + + if (mdc) + OBD_FREE(mdc, strlen(mdc) + 1); + OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + + strlen(sbi->ll_instance) + 2); + sprintf(mdc, "%s-%s", lprof->lp_mdc, sbi->ll_instance); + } if (!osc) { CERROR("no osc\n"); @@ -162,6 +360,7 @@ int ll_fill_super(struct super_block *sb, void *data, int silent) CERROR("no mdc\n"); GOTO(out_free, err = -EINVAL); } + obd = class_name2obd(mdc); if (!obd) { @@ -192,6 +391,10 @@ int ll_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize_bits = log2(osfs.os_bsize); sb->s_magic = LL_SUPER_MAGIC; sb->s_maxbytes = PAGE_CACHE_MAXBYTES; + + devno = get_uuid2int(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid, + strlen(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid)); + sb->s_dev = devno; obd = class_name2obd(osc); if (!obd) { @@ -250,6 +453,10 @@ out_dev: OBD_FREE(mdc, strlen(mdc) + 1); if (osc) OBD_FREE(osc, strlen(osc) + 1); + if (profile) + OBD_FREE(profile, strlen(profile) + 1); + if (mds_uuid) + OBD_FREE(mds_uuid, strlen(mds_uuid) + 1); RETURN(err); @@ -258,7 +465,26 @@ out_osc: obd_disconnect(sbi->ll_osc_exp, 0); out_mdc: obd_disconnect(sbi->ll_mdc_exp, 0); + out_free: + if (sbi->ll_profile != NULL) { + int len = sizeof(sbi->ll_profile) + sizeof("-clean") + 1; + int err; + + if (sbi->ll_instance != NULL) { + char * cln_prof; + OBD_ALLOC(cln_prof, len); + sprintf(cln_prof, "%s-clean", sbi->ll_profile); + + err = ll_process_log(sbi->ll_mds_uuid.uuid, cln_prof, + sbi); + if (err < 0) + CERROR("Unable to process log: %s\n", cln_prof); + OBD_FREE(cln_prof, len); + OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance)+ 1); + } + OBD_FREE(sbi->ll_profile, strlen(sbi->ll_profile) + 1); + } lprocfs_unregister_mountpoint(sbi); OBD_FREE(sbi, sizeof(*sbi)); @@ -274,6 +500,7 @@ void ll_put_super(struct super_block *sb) ENTRY; CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb); + list_del(&sbi->ll_conn_chain); obd_disconnect(sbi->ll_osc_exp, 0); @@ -294,7 +521,7 @@ void ll_put_super(struct super_block *sb) obd_disconnect(sbi->ll_mdc_exp, 0); -#warning We do this to get rid of orphaned dentries. That is not really trw. + // We do this to get rid of orphaned dentries. That is not really trw. spin_lock(&dcache_lock); hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) { struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash); @@ -302,6 +529,24 @@ void ll_put_super(struct super_block *sb) } spin_unlock(&dcache_lock); + if (sbi->ll_profile != NULL) { + char * cln_prof; + int len = sizeof(sbi->ll_profile) + sizeof("-clean") + 1; + int err; + + OBD_ALLOC(cln_prof, len); + sprintf(cln_prof, "%s-clean", sbi->ll_profile); + + err = ll_process_log(sbi->ll_mds_uuid.uuid, cln_prof, + sbi); + if (err < 0) + CERROR("Unable to process log: %s\n", cln_prof); + + OBD_FREE(cln_prof, len); + OBD_FREE(sbi->ll_profile, strlen(sbi->ll_profile) + 1); + OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance) + 1); + } + OBD_FREE(sbi, sizeof(*sbi)); EXIT; @@ -312,8 +557,8 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock) { struct inode *inode; l_lock(&lock->l_resource->lr_namespace->ns_lock); - if (lock->l_data) - inode = igrab(lock->l_data); + if (lock->l_ast_data) + inode = igrab(lock->l_ast_data); else inode = NULL; l_unlock(&lock->l_resource->lr_namespace->ns_lock); @@ -322,8 +567,8 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock) static int null_if_equal(struct ldlm_lock *lock, void *data) { - if (data == lock->l_data) - lock->l_data = NULL; + if (data == lock->l_ast_data) + lock->l_ast_data = NULL; if (lock->l_req_mode != lock->l_granted_mode) return LDLM_ITER_STOP; @@ -904,7 +1149,7 @@ int ll_prep_inode(struct obd_export *exp, struct inode **inode, } else { LASSERT(sb); *inode = ll_iget(sb, md.body->ino, &md); - if (!*inode) { + if (*inode == NULL || is_bad_inode(*inode)) { /* free the lsm if we allocated one above */ if (md.lsm != NULL) obd_free_memmd(exp, &md.lsm); diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index f9b629e..c1a412c 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -12,15 +12,35 @@ int lov_get_stripecnt(struct lov_obd *lov, int stripe_count); int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count); void lov_free_memmd(struct lov_stripe_md **lsmp); +/* lov_log.c */ +int lov_llog_setup(struct obd_device *obd, struct obd_device *disk_obd, + int index, int count ,struct llog_logid *logids); +int lov_llog_cleanup(struct obd_device *obd); +int lov_llog_origin_add(struct obd_export *exp, + int index, + struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, + struct llog_cookie *logcookies, int numcookies); +int lov_llog_repl_cancel(struct obd_device *obd, struct lov_stripe_md *lsm, + int count, struct llog_cookie *cookies, int flags); + + +#if 0 +int lov_get_catalogs(struct lov_obd *lov, struct llog_handle *cathandle); +int lov_log_add(struct obd_export *exp, + struct llog_handle *cathandle, + struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, + struct llog_cookie *logcookies, int numcookies); +#endif + /* lov_pack.c */ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmm, struct lov_stripe_md *lsm); int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsm, struct lov_mds_md *lmm, int lmmsize); int lov_setstripe(struct obd_export *exp, - struct lov_stripe_md **lsmp, struct lov_mds_md *lmmu); + struct lov_stripe_md **lsmp, struct lov_mds_md *lump); int lov_getstripe(struct obd_export *exp, - struct lov_stripe_md *lsm, struct lov_mds_md *lmmu); + struct lov_stripe_md *lsm, struct lov_mds_md *lump); /* lproc_lov.c */ extern struct file_operations lov_proc_target_fops; diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index 6c575a5..070a932 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -39,7 +39,7 @@ #define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long)) #define FILTER_SUBDIR_COUNT 32 /* set to zero for no subdirs */ -#define FILTER_GROUPS 2 /* must be at least 3; not dynamic yet */ +#define FILTER_GROUPS 3 /* must be at least 3; not dynamic yet */ #define FILTER_MOUNT_RECOV 2 #define FILTER_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */ @@ -58,11 +58,13 @@ struct filter_server_data { __u32 fsd_client_start; /* start of per-client data area */ __u16 fsd_client_size; /* size of per-client data area */ __u16 fsd_subdir_count; /* number of subdirectories for objects */ - __u64 fsd_catalog_oid; /* recovery catalog object id */ - __u32 fsd_catalog_ogen; /* recovery catalog inode generation */ + //__u64 fsd_catalog_oid; /* recovery catalog object id */ + //__u32 fsd_catalog_ogen; /* recovery catalog inode generation */ + //__u64 fsd_catalog_ogr; /* recovery catalog inode group */ __u8 fsd_peeruuid[37]; /* UUID of MDS associated with this OST */ __u8 peer_padding[3]; /* unused */ - __u8 fsd_padding[FILTER_LR_SERVER_SIZE - 140]; + //__u8 fsd_padding[FILTER_LR_SERVER_SIZE - 140]; + __u8 fsd_padding[FILTER_LR_SERVER_SIZE - 128]; }; /* Data stored per client in the last_rcvd file. In le32 order. */ @@ -102,7 +104,7 @@ struct dentry *__filter_oa2dentry(struct obd_device *obd, struct obdo *oa, int filter_finish_transno(struct obd_export *, struct obd_trans_info *, int rc); __u64 filter_next_id(struct filter_obd *, struct obdo *); -int filter_update_server_data(struct obd_device *, struct file *, +int filter_update_server_data(struct obd_device *, struct file *, obd_gr, struct filter_server_data *, int force_sync); int filter_update_last_objid(struct obd_device *, obd_gr, int force_sync); int filter_common_setup(struct obd_device *, obd_count len, void *buf, @@ -127,11 +129,17 @@ int filter_commitrw_write(struct obd_export *exp, int objcount, struct obd_trans_info *oti); /* filter_log.c */ -int filter_log_op_create(struct llog_handle *cathandle, struct ll_fid *mds_fid, - obd_id oid, obd_count ogen, struct llog_cookie *); -int filter_log_op_orphan(struct llog_handle *cathandle, obd_id oid, - obd_count ogen, struct llog_cookie *); -struct llog_handle *filter_get_catalog(struct obd_device *); +struct ost_filterdata { + __u32 ofd_epoch; +}; +int filter_log_sz_change(struct llog_handle *cathandle, + struct ll_fid *mds_fid, + __u32 io_epoch, + struct llog_cookie *logcookie, + struct inode *inode); +int filter_get_catalog(struct obd_device *); +void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno, + void *cb_data, int error); /* filter_san.c */ diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index 43f16f4..4ee3356 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -277,7 +277,8 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, memset(res, 0, niocount * sizeof(*res)); push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); - dentry = filter_fid2dentry(exp->exp_obd, NULL, 0, obj->ioo_id); + dentry = filter_fid2dentry(exp->exp_obd, NULL, obj->ioo_gr, + obj->ioo_id); if (IS_ERR(dentry)) GOTO(cleanup, rc = PTR_ERR(dentry)); @@ -450,9 +451,7 @@ int filter_brw(int cmd, struct obd_export *exp, struct obdo *oa, rnb[i].len = pga[i].count; } - ioo.ioo_id = oa->o_id; - ioo.ioo_gr = 0; - ioo.ioo_type = oa->o_mode & S_IFMT; + obdo_to_ioobj(oa, &ioo); ioo.ioo_bufcnt = oa_bufs; ret = filter_preprw(cmd, exp, oa, 1, &ioo, oa_bufs, rnb, lnb, oti); diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c index de21a1f..3e451ce 100644 --- a/lustre/obdfilter/filter_log.c +++ b/lustre/obdfilter/filter_log.c @@ -37,28 +37,33 @@ #include "filter_internal.h" +#if 0 + /* This is called from filter_setup() and should be single threaded */ -struct llog_handle *filter_get_catalog(struct obd_device *obd) +int filter_get_catalog(struct obd_device *obd) { struct filter_obd *filter = &obd->u.filter; struct filter_server_data *fsd = filter->fo_fsd; struct obd_run_ctxt saved; struct llog_handle *cathandle = NULL; struct llog_logid logid; + struct llog_obd_ctxt *ctxt; int rc; ENTRY; push_ctxt(&saved, &obd->obd_ctxt, NULL); if (fsd->fsd_catalog_oid) { logid.lgl_oid = le64_to_cpu(fsd->fsd_catalog_oid); - logid.lgl_ogen = le32_to_cpu(fsd->fsd_catalog_ogen); + logid.lgl_ogen = 0; + logid.lgl_ogr = le64_to_cpu(fsd->fsd_catalog_ogr); rc = llog_create(obd, &cathandle, &logid, NULL); if (rc) { - CERROR("error opening catalog "LPX64":%x: rc %d\n", - logid.lgl_oid, logid.lgl_ogen, + CERROR("error opening catalog "LPX64"/"LPX64": rc %d\n", + logid.lgl_oid, logid.lgl_ogr, (int)PTR_ERR(cathandle)); fsd->fsd_catalog_oid = 0; - fsd->fsd_catalog_ogen = 0; + fsd->fsd_catalog_ogr = 0; + RETURN(rc); } } @@ -71,77 +76,97 @@ struct llog_handle *filter_get_catalog(struct obd_device *obd) } logid = cathandle->lgh_id; fsd->fsd_catalog_oid = cpu_to_le64(logid.lgl_oid); - fsd->fsd_catalog_ogen = cpu_to_le32(logid.lgl_ogen); - rc = filter_update_server_data(obd, filter->fo_rcvd_filp,fsd,0); + fsd->fsd_catalog_ogr = cpu_to_le64(logid.lgl_ogr); + rc = filter_update_server_data(obd, filter->fo_rcvd_filp, 1, + fsd, 0); if (rc) { CERROR("error writing new catalog to disk: rc %d\n",rc); GOTO(out_handle, rc); } } - rc = llog_init_handle(cathandle, LLOG_F_IS_CAT, &obd->u.filter.fo_mdc_uuid); + rc = llog_init_handle(cathandle, LLOG_F_IS_CAT, &obd->obd_uuid); if (rc) GOTO(out_handle, rc); + OBD_ALLOC(ctxt, sizeof(*ctxt)); + if (!ctxt) + GOTO(out_handle, rc = -ENOMEM); + LASSERT(obd->obd_llog_ctxt == NULL); + obd->obd_llog_ctxt = ctxt; + obd->obd_llog_ctxt->loc_obd = obd; + obd->obd_llog_ctxt->loc_handles[LLOG_OBD_SZ_LOG_HANDLE] = cathandle; + out: pop_ctxt(&saved, &obd->obd_ctxt, NULL); - RETURN(cathandle); + RETURN(rc); out_handle: llog_close(cathandle); - cathandle = ERR_PTR(rc); goto out; } +#endif -int filter_log_op_create(struct llog_handle *cathandle, struct ll_fid *mds_fid, - obd_id oid, obd_count ogen, - struct llog_cookie *logcookie) +int filter_log_sz_change(struct llog_handle *cathandle, + struct ll_fid *mds_fid, + __u32 io_epoch, + struct llog_cookie *logcookie, + struct inode *inode) { - struct llog_create_rec *lcr; + struct llog_size_change_rec *lsc; int rc; + struct ost_filterdata *ofd; ENTRY; - OBD_ALLOC(lcr, sizeof(*lcr)); - if (lcr == NULL) - RETURN(-ENOMEM); - lcr->lcr_hdr.lrh_len = lcr->lcr_tail.lrt_len = sizeof(*lcr); - lcr->lcr_hdr.lrh_type = OST_CREATE_REC; - lcr->lcr_fid.id = mds_fid->id; - lcr->lcr_fid.generation = mds_fid->generation; - lcr->lcr_fid.f_type = mds_fid->f_type; - lcr->lcr_oid = oid; - lcr->lcr_ogen = ogen; - - rc = llog_cat_add_rec(cathandle, &lcr->lcr_hdr, logcookie, NULL); - OBD_FREE(lcr, sizeof(*lcr)); - - if (rc > 0) { - LASSERT(rc == sizeof(*logcookie)); - rc = 0; + down(&inode->i_sem); + ofd = inode->i_filterdata; + + if (ofd && ofd->ofd_epoch >= io_epoch) { + if (ofd->ofd_epoch > io_epoch) + CERROR("client sent old epoch %d for obj ino %ld\n", + io_epoch, inode->i_ino); + up(&inode->i_sem); + RETURN(0); } - RETURN(rc); -} -int filter_log_op_orphan(struct llog_handle *cathandle, obd_id oid, - obd_count ogen, struct llog_cookie *logcookie) -{ - struct llog_orphan_rec *lor; - int rc; - ENTRY; + if (ofd && ofd->ofd_epoch < io_epoch) { + ofd->ofd_epoch = io_epoch; + } else if (!ofd) { + OBD_ALLOC(ofd, sizeof(*ofd)); + if (!ofd) + GOTO(out, rc = -ENOMEM); + igrab(inode); + inode->i_filterdata = ofd; + ofd->ofd_epoch = io_epoch; + } + /* the decision to write a record is now made, unlock */ + up(&inode->i_sem); - OBD_ALLOC(lor, sizeof(*lor)); - if (lor == NULL) + OBD_ALLOC(lsc, sizeof(*lsc)); + if (lsc == NULL) RETURN(-ENOMEM); - lor->lor_hdr.lrh_len = lor->lor_tail.lrt_len = sizeof(*lor); - lor->lor_hdr.lrh_type = OST_ORPHAN_REC; - lor->lor_oid = oid; - lor->lor_ogen = ogen; + lsc->lsc_hdr.lrh_len = lsc->lsc_tail.lrt_len = sizeof(*lsc); + lsc->lsc_hdr.lrh_type = OST_SZ_REC; + lsc->lsc_fid = *mds_fid; + lsc->lsc_io_epoch = io_epoch; - rc = llog_cat_add_rec(cathandle, &lor->lor_hdr, logcookie, NULL); + rc = llog_cat_add_rec(cathandle, &lsc->lsc_hdr, logcookie, NULL); + OBD_FREE(lsc, sizeof(*lsc)); if (rc > 0) { LASSERT(rc == sizeof(*logcookie)); rc = 0; } + + out: RETURN(rc); } + +/* When this (destroy) operation is committed, return the cancel cookie */ +void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno, + void *cb_data, int error) +{ + struct llog_cookie *cookie = cb_data; + llog_obd_repl_cancel(obd, NULL, 1, cookie, OBD_LLOG_FL_SENDNOW); + OBD_FREE(cb_data, sizeof(struct llog_cookie)); +} diff --git a/lustre/obdfilter/filter_san.c b/lustre/obdfilter/filter_san.c index 30ce9d7..f6d8d06 100644 --- a/lustre/obdfilter/filter_san.c +++ b/lustre/obdfilter/filter_san.c @@ -37,16 +37,16 @@ /* sanobd setup methods - use a specific mount option */ int filter_san_setup(struct obd_device *obd, obd_count len, void *buf) { - struct obd_ioctl_data* data = buf; + struct lustre_cfg* lcfg = buf; char *option = NULL; - if (!data->ioc_inlbuf2) + if (!lcfg->lcfg_inlbuf2) RETURN(-EINVAL); /* for extN/ext3 filesystem, we must mount it with 'writeback' mode */ - if (!strcmp(data->ioc_inlbuf2, "extN")) + if (!strcmp(lcfg->lcfg_inlbuf2, "extN")) option = "data=writeback"; - else if (!strcmp(data->ioc_inlbuf2, "ext3")) + else if (!strcmp(lcfg->lcfg_inlbuf2, "ext3")) option = "data=writeback,asyncdel"; else LBUG(); /* just a reminder */ diff --git a/lustre/ptlrpc/Makefile.mk b/lustre/ptlrpc/Makefile.mk index f7fb9d4..2ed1d7a 100644 --- a/lustre/ptlrpc/Makefile.mk +++ b/lustre/ptlrpc/Makefile.mk @@ -8,4 +8,4 @@ include $(src)/../portals/Kernelenv obj-y += ptlrpc.o ptlrpc-objs := recover.o connection.o ptlrpc_module.o events.o service.o \ client.o niobuf.o pack_generic.o lproc_ptlrpc.o pinger.o \ - recov_thread.o ptlrpc_lib.o import.o + recov_thread.o ptlrpc_lib.o import.o llog_net.o diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index 45cda95..7fb88f2 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "ptlrpc_internal.h" static struct llog_commit_master lustre_lcm; @@ -119,6 +120,61 @@ void llcd_send(struct llog_commit_data *llcd) } EXPORT_SYMBOL(llcd_send); +/* deleted objects have a commit callback that cancels the MDS + * log record for the deletion. The commit callback calls this + * function + */ +int llog_obd_repl_cancel(struct obd_device *obd, + struct lov_stripe_md *lsm, int count, + struct llog_cookie *cookies, int flags) +{ + struct llog_obd_ctxt *ctxt = obd->obd_llog_ctxt; + struct llog_commit_data *llcd; + int rc = 0; + ENTRY; + + LASSERT(ctxt); + + if (count == 0 || cookies == NULL) { + down(&ctxt->loc_sem); + if (ctxt->loc_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW)) + GOTO(out, rc); + + llcd = ctxt->loc_llcd; + GOTO(send_now, rc); + } + + down(&ctxt->loc_sem); + llcd = ctxt->loc_llcd; + if (llcd == NULL) { + llcd = llcd_grab(); + if (llcd == NULL) { + CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n", + cookies->lgc_lgl.lgl_oid, + cookies->lgc_lgl.lgl_ogen, cookies->lgc_index); + GOTO(out, rc = -ENOMEM); + } + llcd->llcd_import = ctxt->loc_imp; + ctxt->loc_llcd = llcd; + } + + memcpy(llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies, + sizeof(*cookies)); + llcd->llcd_cookiebytes += sizeof(*cookies); + + GOTO(send_now, rc); +send_now: + if ((PAGE_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) || + flags & OBD_LLOG_FL_SENDNOW)) { + ctxt->loc_llcd = NULL; + llcd_send(llcd); + } +out: + up(&ctxt->loc_sem); + return rc; +} +EXPORT_SYMBOL(llog_obd_repl_cancel); + static int log_commit_thread(void *arg) { struct llog_commit_master *lcm = arg; @@ -262,7 +318,7 @@ static int log_commit_thread(void *arg) spin_lock(&lcm->lcm_llcd_lock); list_splice(&lcd->lcd_llcd_list, &lcm->lcm_llcd_resend); if (++llcd->llcd_tries < 5) { - CERROR("commit %p failed %dx: rc %d\n", + CERROR("commit %p failed on attempt %d: rc %d\n", llcd, llcd->llcd_tries, rc); list_add_tail(&llcd->llcd_list, -- 1.8.3.1