From: nikita Date: Tue, 28 Mar 2006 15:56:58 +0000 (+0000) Subject: add mds-laeyring prototype X-Git-Tag: v1_8_0_110~486^2~2135 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=f568fdc99597659f18c1ce6c1968f4daa97718e0;p=fs%2Flustre-release.git add mds-laeyring prototype --- diff --git a/lustre/Makefile.in b/lustre/Makefile.in index 1b7a9be..4369a2a 100644 --- a/lustre/Makefile.in +++ b/lustre/Makefile.in @@ -8,7 +8,7 @@ subdir-m += osc subdir-m += obdecho subdir-m += mgc -@SERVER_TRUE@subdir-m += mds obdfilter ost mgs +@SERVER_TRUE@subdir-m += mds obdfilter ost mgs mdt @CLIENT_TRUE@subdir-m += mdc llite @QUOTA_TRUE@subdir-m += quota diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 806ba21..e0a754f 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -641,6 +641,8 @@ lustre/mdc/Makefile lustre/mdc/autoMakefile lustre/mds/Makefile lustre/mds/autoMakefile +lustre/mdt/Makefile +lustre/mdt/autoMakefile lustre/obdclass/Makefile lustre/obdclass/autoMakefile lustre/obdecho/Makefile diff --git a/lustre/include/linux/Makefile.am b/lustre/include/linux/Makefile.am index 98320be..44e4faa 100644 --- a/lustre/include/linux/Makefile.am +++ b/lustre/include/linux/Makefile.am @@ -15,4 +15,4 @@ EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_ha.h lustre_lib.h \ lustre_export.h lustre_log.h obd_echo.h \ lustre_compat25.h lustre_fsfilt.h lustre_import.h lustre_mds.h obd.h \ lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h lustre_idl.h \ - lustre_quota.h lustre_ucache.h lustre_ver.h.in lustre_param.h + lustre_quota.h lustre_ucache.h lustre_ver.h.in lustre_param.h lu_object.h diff --git a/lustre/include/linux/lu_object.h b/lustre/include/linux/lu_object.h new file mode 100644 index 0000000..05789ee --- /dev/null +++ b/lustre/include/linux/lu_object.h @@ -0,0 +1,410 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LINUX_LU_OBJECT_H +#define __LINUX_LU_OBJECT_H + +#ifdef __KERNEL__ +#include +#include + +/* + * Layered objects support for CMD3/C5. + */ + + +struct seq_file; +struct proc_dir_entry; + +struct lfid { + __u64 f_seq; + __u32 f_id; + __u32 f_version; +}; + +static inline int lfid_eq(const struct lfid *f0, const struct lfid *f1) +{ + /* check that there is no alignment padding */ + CLASSERT(sizeof *f0 == + sizeof f0->f_seq + sizeof f0->f_id + sizeof f0->f_version); + return memcmp(f0, f1, sizeof *f0) == 0; +} + +/* + * lu_* data-types represent server-side entities shared by data and meta-data + * stacks. + * + * Design goals: + * + * 0. support for layering. + * + * Server side object is split into layers, one per device in the + * corresponding device stack. Individual layer is represented by struct + * lu_object. Compound layered object --- by struct lu_object_header. Most + * interface functions take lu_object as an argument and operate on the + * whole compound object. This decision was made due to the following + * reasons: + * + * - it's envisaged that lu_object will be used much more often than + * lu_object_header; + * + * - we want lower (non-top) layers to be able to initiate operations + * on the whole object. + * + * Generic code supports layering more complex than simple stacking, e.g., + * it is possible that at some layer object "spawns" multiple sub-objects + * on the lower layer. + * + * 1. fid-based identification. + * + * Compound object is uniquely identified by its fid. Objects are indexed + * by their fids (hash table is used for index). + * + * 2. caching and life-cycle management. + * + * Object's life-time is controlled by reference counting. When reference + * count drops to 0, object is returned to cache. Cached objects still + * retain their identity (i.e., fid), and can be recovered from cache. + * + * Objects are kept in the global LRU list, and lu_site_purge() function + * can be used to reclaim given number of unused objects from the tail of + * the LRU. + * + * 3. avoiding recursion. + * + * Generic code tries to replace recursion through layers by iterations + * where possible. + * + * + * + * + * + * + * + * + * + */ + +struct lu_site; +struct lu_object; +struct lu_device; +struct lu_object_header; + +/* + * Operations common for data and meta-data devices. + */ +struct lu_device_operations { + /* initialize device */ + int (*ldo_init)(struct lu_device *d); + /* finalize device, release all resources */ + void (*ldo_fini)(struct lu_device *d); + + /* + * Object creation protocol. + * + * Due to design goal of avoiding recursion, object creation (see + * lu_object_alloc()) is somewhat involved: + * + * - first, ->ldo_object_alloc() method of the top-level device + * in the stack is called. It should allocate top level object + * (including lu_object_header), but without any lower-layer + * sub-object(s). + * + * - then lu_object_alloc() sets fid in the header of newly created + * object. + * + * - then ->ldo_object_init() is called. It has to allocate + * lower-layer object(s). To do this, ->ldo_object_init() calls + * ldo_object_alloc() of the lower-layer device(s). + * + * - for all new objects allocated by ->ldo_object_init() (and + * inserted into object stack), ->ldo_object_init() is called again + * repeatedly, until no new objects are created. + * + */ + /* + * Allocate lower-layer parts of the object by calling + * ->ldo_object_alloc() of the corresponding underlying device. + * + * This method is called once for each object inserted into object + * stack. It's responsibility of this method to insert lower-layer + * object(s) it create into appropriate places of object stack. + */ + int (*ldo_object_init)(struct lu_object *); + + /* + * Allocate object for the given device (without lower-layer + * parts). This is called by ->ldo_object_init() from the parent + * layer. + */ + struct lu_object *(*ldo_object_alloc)(struct lu_device *); + + /* + * Dual to ->ldo_object_alloc(). Called when object is removed from + * memory. + */ + void (*ldo_object_free)(struct lu_object *o); + + /* + * Called when last active reference to the object is released (and + * object returns to the cache). + */ + void (*ldo_object_release)(struct lu_object *o); + + /* + * Debugging helper. Print given object. + */ + int (*ldo_object_print)(struct seq_file *f, const struct lu_object *o); +}; + +/* + * Device: a layer in the server side abstraction stacking. + */ +struct lu_device { + /* + * reference count. This is incremented, in particular, on each object + * created at this layer. + * + * XXX which means that atomic_t is probably too small. + */ + atomic_t ld_ref; + struct lu_device_operations *ld_ops; + struct lu_site *ld_site; + struct proc_dir_entry *ld_proc_entry; +}; + +/* + * Flags for the object layers. + */ +enum lu_object_flags { + /* + * this flags is set if ->ldo_object_init() has been called for this + * layer. Used by lu_object_alloc(). + */ + LU_OBJECT_ALLOCATED = (1 << 0) +}; + +/* + * Layer in the layered object. + */ +struct lu_object { + /* + * Header for this object. + */ + struct lu_object_header *lo_header; + /* + * Device for this layer. + */ + struct lu_device *lo_dev; + /* + * Linkage into list of all layers. + */ + struct list_head lo_linkage; + /* + * Depth. Top level layer depth is 0. + */ + int lo_depth; + /* + * Flags from enum lu_object_flags. + */ + unsigned long lo_flags; +}; + +enum lu_object_header_flags { + /* + * Don't keep this object in cache. Object will be destroyed as soon + * as last reference to it is released. This flag cannot be cleared + * once set. + */ + LU_OBJECT_HEARD_BANSHEE = 0, +}; + +/* + * "Compound" object, consisting of multiple layers. + */ +struct lu_object_header { + /* + * Object flags from enum lu_object_header_flags. Set and checked + * atomically. + */ + unsigned long loh_flags; + /* + * Object reference count. Protected by site guard lock. + */ + int loh_ref; + /* + * Fid, uniquely identifying this object. + */ + struct lfid loh_fid; + /* + * Linkage into per-site hash table. Protected by site guard lock. + */ + struct hlist_node loh_hash; + /* + * Linkage into per-site LRU list. Protected by site guard lock. + */ + struct list_head loh_lru; + /* + * Linkage into list of layers. Never modified once set (except lately + * during object destruction). No locking is necessary. + */ + struct list_head loh_layers; +}; + +/* + * lu_site is a "compartment" within which objects are unique, and LRU + * discipline is maintained. + * + * lu_site exists so that multiple layered stacks can co-exist in the same + * address space. + * + */ +struct lu_site { + /* + * lock protecting: + * + * - ->ls_hash hash table (and its linkages in objects); + * + * - ->ls_lru list (and its linkages in objects); + * + * - 0/1 transitions of object ->loh_ref reference count; + * + * yes, it's heavy. + */ + spinlock_t ls_guard; + /* + * Hash-table where objects are indexed by fid. + */ + struct hlist_head *ls_hash; + /* + * Bit-mask for hash-table size. + */ + int ls_hash_mask; + + + /* + * LRU list, updated on each access to object. Protected by + * ->ls_guard. + * + * "Cold" end of LRU is ->ls_lru.next. Accessed object are moved to + * the ->ls_lru.prev (this is due to the non-existence of + * list_for_each_entry_safe_reverse()). + */ + struct list_head ls_lru; + /* + * Total number of objects in this site. Protected by ->ls_guard. + */ + unsigned ls_total; + /* + * Total number of objects in this site with reference counter greater + * than 0. Protected by ->ls_guard. + */ + unsigned ls_busy; + + /* + * Top-level device for this stack. + */ + struct lu_device *ls_top_dev; + + /* statistical counters. Protected by nothing, races are accepted. */ + struct { + __u32 s_created; + __u32 s_cache_hit; + __u32 s_cache_miss; + /* + * Number of hash-table entry checks made. + * + * ->s_cache_check / (->s_cache_miss + ->s_cache_hit) + * + * is an average number of hash slots inspected during single + * lookup. + */ + __u32 s_cache_check; + /* raced cache insertions */ + __u32 s_cache_race; + __u32 s_lru_purged; + } ls_stats; +}; + +/* + * Helpers. + */ +static inline struct lu_device_operations * +lu_object_ops(const struct lu_object *o) +{ + return o->lo_dev->ld_ops; +} + +static inline struct lu_object *lu_object_next(const struct lu_object *o) +{ + return container_of(o->lo_linkage.next, struct lu_object, lo_linkage); +} + +static inline struct lfid *lu_object_fid(const struct lu_object *o) +{ + return &o->lo_header->loh_fid; +} + +static inline struct lu_object *lu_object_top(struct lu_object_header *h) +{ + LASSERT(!list_empty(&h->loh_layers)); + return container_of(h->loh_layers.next, struct lu_object, lo_linkage); +} + +static inline void lu_object_get(struct lu_object *o) +{ + LASSERT(o->lo_header->loh_ref > 0); + spin_lock(&o->lo_dev->ld_site->ls_guard); + o->lo_header->loh_ref ++; + spin_unlock(&o->lo_dev->ld_site->ls_guard); +} + +static inline int lu_object_is_dying(struct lu_object_header *h) +{ + return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags); +} + +void lu_object_put(struct lu_object *o); +void lu_site_purge(struct lu_site *s, int nr); +int lu_object_print(struct seq_file *f, const struct lu_object *o); +struct lu_object *lu_object_find(struct lu_site *s, const struct lfid *f); + +int lu_site_init(struct lu_site *s, struct lu_device *top); +void lu_site_fini(struct lu_site *s); + +void lu_device_get(struct lu_device *d); +void lu_device_put(struct lu_device *d); + +int lu_device_init(struct lu_device *d); +void lu_device_fini(struct lu_device *d); + +int lu_object_init(struct lu_object *o, + struct lu_object_header *h, struct lu_device *d); +void lu_object_fini(struct lu_object *o); +void lu_object_add_top(struct lu_object_header *h, struct lu_object *o); +void lu_object_add(struct lu_object *before, struct lu_object *o); + +int lu_object_header_init(struct lu_object_header *h); +void lu_object_header_fini(struct lu_object_header *h); + +#endif /* __KERNEL__ */ +#endif /* __LINUX_OBD_CLASS_H */ diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 23732f0..9d3a476 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -1232,6 +1232,7 @@ enum llogd_rpc_ops { LLOG_CATINFO = 507, /* for lfs catinfo */ LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, LLOG_ORIGIN_HANDLE_DESTROY = 509, /* for destroy llog object*/ + LLOG_LAST_OPC }; struct llogd_body { diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index f96fbb4..c61a35c 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -733,14 +733,14 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp); /* ptlrpc/pack_generic.c */ int lustre_msg_swabbed(struct lustre_msg *msg); int lustre_msg_check_version(struct lustre_msg *msg, __u32 version); -int lustre_pack_request(struct ptlrpc_request *, int count, int *lens, +int lustre_pack_request(struct ptlrpc_request *, int count, const int *lens, char **bufs); -int lustre_pack_reply(struct ptlrpc_request *, int count, int *lens, +int lustre_pack_reply(struct ptlrpc_request *, int count, const int *lens, char **bufs); void lustre_shrink_reply(struct ptlrpc_request *req, int segment, unsigned int newlen, int move_data); void lustre_free_reply_state(struct ptlrpc_reply_state *rs); -int lustre_msg_size(int count, int *lengths); +int lustre_msg_size(int count, const int *lengths); int lustre_unpack_msg(struct lustre_msg *m, int len); void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen); int lustre_msg_buflen(struct lustre_msg *m, int n); diff --git a/lustre/kernel_patches/patches/2.6-rhel4-kgdb-ga.patch b/lustre/kernel_patches/patches/2.6-rhel4-kgdb-ga.patch new file mode 100644 index 0000000..f3067fa --- /dev/null +++ b/lustre/kernel_patches/patches/2.6-rhel4-kgdb-ga.patch @@ -0,0 +1,6371 @@ + + +This kgdb will get called and will trap almost any kernel +fault WITHOUT BEING ARMED. + +It is entered at boot time via "kgdb" in the boot string, +not "gdb". This entry occurs when the first setup on the +boot string is called, not sometime later. You will not +find a "waiting for gdb" on your console, as the console has +not yet been enabled at this time. (Note, this early stuff +is a bit fragile as the full trap table has yet to be +loaded, something I might address, sometime... So don't try +to look at memory that can not be reached, for example. +Once the full trap table is loaded this restriction goes +away.) + +If you hard code it, you can put a breakpoint() as the FIRST +LINE OF C CODE. + +It does NOT use the serial driver, but if the serial driver +is loaded, it tells it to release the port to avoid +conflict. + +The threads stuff is not configurable, does not require +redirection of schedule() calls and does back track to the +first non schedule() caller on the info threads command. If +you switch to the thread, however, it will show it in the +switch code (as it should). + +It is MUCH more aggressive and paranoid about grabbing the +other cpus on entry. It issues a "send_nmi_all_but_self()" +rather than depending on them to interrupt or hit an NMI +sometime in the distant future. If a cpu does not come to +the party, it will continue without it so all is not lost. + +It does not have anything to do with IOCTL calls, but does +do the control-C thing. + +There is a LOT of info in the patch which ends up in +.../Documentation/i386/kgdb/* + +There is a nifty little thing call kgdb_ts() (kgdb time +stamp) which is a function you can code calls to which puts +some useful stuff in a circular buffer which can be examined +with the supplied gdb macros. + +It also allows you do to do "p foobar(...)" i.e. to call a +function from gdb, just like gdb allows in program +debugging. + +In an SMP system, you can choose to "hold" any given set of +cpus. It also defaults to holding other cpus on single step +(this can be overridden). + +This said, you can imagine my consternation when I found it +"lost it" on continues on 2.5. I found and fixed this this +early pm, a hold cpu on exit goof on my part. + +Oh, and a final point, the configure options are more +extensive (the serial port is set up here, for example, (can +not wait for a command line to do this)). There is one to +do system call exit tests. This is VERY new and causes the +kernel to hit a hard "int 3" if a system call attempts to +exit with preempt count other than zero. This is a fault, +of course, but the current 2.5 is full of them so I don't +recommend turning this on. + + +DESC +kgdbL warning fix +EDESC +From: Ingo Molnar + +this patch fixes a deprecated use of asm input operands. (and shuts up a +gcc 3.3 warning.) + +DESC +kgdb buffer overflow fix +EDESC +From: George Anzinger + + +DESC +kgdbL warning fix +EDESC +From: Ingo Molnar + +this patch fixes a deprecated use of asm input operands. (and shuts up a +gcc 3.3 warning.) + +DESC +kgdb: CONFIG_DEBUG_INFO fix +EDESC +From: Thomas Schlichter + +that patch sets DEBUG_INFO to y by default, even if whether DEBUG_KERNEL nor +KGDB is enabled. The attached patch changes this to enable DEBUG_INFO by +default only if KGDB is enabled. + +DESC +x86_64 fixes +EDESC +From Andi Kleen + +Fix x86_64 for kgdb. We forget why. +DESC +correct kgdb.txt Documentation link (against 2.6.1-rc1-mm2) +EDESC +From: Jesper Juhl + +The help text for "config KGDB" in arch/i386/Kconfig refers to +Documentation/i386/kgdb.txt - the actual location is +Documentation/i386/kgdb/kgdb.txt - patch below to fix that. + +DESC +kgdb: fix for recent gcc +EDESC + +arch/i386/kernel/traps.c:97: error: conflicting types for 'int3' +arch/i386/kernel/traps.c:77: error: previous declaration of 'int3' was here +arch/i386/kernel/traps.c:97: error: conflicting types for 'int3' +arch/i386/kernel/traps.c:77: error: previous declaration of 'int3' was here +arch/i386/kernel/traps.c:99: error: conflicting types for 'debug' +arch/i386/kernel/traps.c:75: error: previous declaration of 'debug' was here +arch/i386/kernel/traps.c:99: error: conflicting types for 'debug' +arch/i386/kernel/traps.c:75: error: previous declaration of 'debug' was here + +DESC +kgdb warning fixes +EDESC + +arch/i386/kernel/kgdb_stub.c:1306: warning: 'time' might be used uninitialized in this function +arch/i386/kernel/kgdb_stub.c:1306: warning: 'dum' might be used uninitialized in this function +DESC +THREAD_SIZE fixes for kgdb +EDESC +From: Matt Mackall + +Noticed the THREAD_SIZE clean-ups are in -mm now. Here are the missing +bits for kgdb, tested in -tiny with 4k stacks. +DESC +Fix stack overflow test for non-8k stacks +EDESC +From: Matt Mackall + +This is needed to work properly with 4k and 16k stacks. +DESC +kgdb-ga.patch fix for i386 single-step into sysenter +EDESC +From: Roland McGrath + +Using kgdb-ga.patch from -mm, if userland single-steps (PTRACE_SINGLESTEP) +into the `sysenter' instruction, kgdb reports a bogus trap: + + Program received signal SIGTRAP, Trace/breakpoint trap. + sysenter_past_esp () at arch/i386/kernel/entry.S:249 + 1: x/i $pc 0xc0106023 : sti + (gdb) + +The hackery in the "FIX_STACK" macro in entry.S changes the saved PC for a +the spurious kernel-mode debug trap when TF was set on user-mode execution +of `sysenter', so sysenter_past_esp is where it actually lies in this case. + The following patch removes the kgdb hiccup when userland +PTRACE_SINGLESTEP's into sysenter. +DESC +fix TRAP_BAD_SYSCALL_EXITS on i386 +EDESC +From: Andy Whitcroft + +We are not using the right offset name, nor the right address when checking +for a non-zero preempt count. Move to TI_preempt_count(%ebp). + +Signed-off-by: Andy Whitcroft +DESC +add TRAP_BAD_SYSCALL_EXITS config for i386 +EDESC +From: Andy Whitcroft + +There seems to be code recently added to -bk and thereby -mm which supports +extra debug for preempt on system call exit. Oddly there doesn't seem to +be configuration options to enable them. Below is a possible patch to +allow enabling this on i386. Sadly the most obvious menu to add this to is +the Kernel Hacking menu, but that is defined in architecture specific +configuration. If this makes sense I could patch the other arches? + +Add a configuration option to allow enabling TRAP_BAD_SYSCALL_EXITS to the +Kernel Hacking menu. + +Signed-off-by: Andy Whitcroft +Signed-off-by: Andrew Morton +--- + + 25-akpm/Documentation/i386/kgdb/andthen | 100 + + 25-akpm/Documentation/i386/kgdb/debug-nmi.txt | 37 + 25-akpm/Documentation/i386/kgdb/gdb-globals.txt | 71 + 25-akpm/Documentation/i386/kgdb/gdbinit | 14 + 25-akpm/Documentation/i386/kgdb/gdbinit-modules | 146 + + 25-akpm/Documentation/i386/kgdb/gdbinit.hw | 117 + + 25-akpm/Documentation/i386/kgdb/kgdb.txt | 775 +++++++ + 25-akpm/Documentation/i386/kgdb/loadmodule.sh | 78 + 25-akpm/MAINTAINERS | 6 + 25-akpm/arch/i386/Kconfig | 8 + 25-akpm/arch/i386/Kconfig.debug | 2 + 25-akpm/arch/i386/Kconfig.kgdb | 175 + + 25-akpm/arch/i386/Makefile | 3 + 25-akpm/arch/i386/kernel/Makefile | 1 + 25-akpm/arch/i386/kernel/entry.S | 29 + 25-akpm/arch/i386/kernel/kgdb_stub.c | 2330 ++++++++++++++++++++++++ + 25-akpm/arch/i386/kernel/nmi.c | 25 + 25-akpm/arch/i386/kernel/smp.c | 12 + 25-akpm/arch/i386/kernel/traps.c | 77 + 25-akpm/arch/i386/lib/Makefile | 1 + 25-akpm/arch/i386/lib/kgdb_serial.c | 485 ++++ + 25-akpm/arch/i386/mm/fault.c | 6 + 25-akpm/arch/x86_64/boot/compressed/head.S | 1 + 25-akpm/arch/x86_64/boot/compressed/misc.c | 1 + 25-akpm/drivers/char/keyboard.c | 3 + 25-akpm/drivers/char/sysrq.c | 23 + 25-akpm/drivers/serial/8250.c | 40 + 25-akpm/drivers/serial/serial_core.c | 5 + 25-akpm/include/asm-i386/bugs.h | 21 + 25-akpm/include/asm-i386/kgdb.h | 59 + 25-akpm/include/asm-i386/kgdb_local.h | 102 + + 25-akpm/include/linux/config.h | 3 + 25-akpm/include/linux/dwarf2-lang.h | 132 + + 25-akpm/include/linux/dwarf2.h | 738 +++++++ + 25-akpm/include/linux/serial_core.h | 4 + 25-akpm/include/linux/spinlock.h | 12 + 25-akpm/kernel/pid.c | 6 + 25-akpm/kernel/sched.c | 7 + 38 files changed, 5645 insertions(+), 10 deletions(-) + +diff -puN arch/i386/Kconfig~kgdb-ga arch/i386/Kconfig +--- 25/arch/i386/Kconfig~kgdb-ga 2004-10-21 14:54:15.256604136 -0700 ++++ 25-akpm/arch/i386/Kconfig 2004-10-21 14:54:15.295598208 -0700 +@@ -1184,6 +1184,14 @@ menu "Executable file formats" + + source "fs/Kconfig.binfmt" + ++config TRAP_BAD_SYSCALL_EXITS ++ bool "Debug bad system call exits" ++ depends on KGDB ++ help ++ If you say Y here the kernel will check for system calls which ++ return without clearing preempt. ++ default n ++ + endmenu + + source "drivers/Kconfig" +diff -puN arch/i386/kernel/entry.S~kgdb-ga arch/i386/kernel/entry.S +--- 25/arch/i386/kernel/entry.S~kgdb-ga 2004-10-21 14:54:15.257603984 -0700 ++++ 25-akpm/arch/i386/kernel/entry.S 2004-10-21 14:54:15.296598056 -0700 +@@ -48,6 +48,18 @@ + #include + #include + #include "irq_vectors.h" ++ /* We do not recover from a stack overflow, but at least ++ * we know it happened and should be able to track it down. ++ */ ++#ifdef CONFIG_STACK_OVERFLOW_TEST ++#define STACK_OVERFLOW_TEST \ ++ testl $(THREAD_SIZE - 512),%esp; \ ++ jnz 10f; \ ++ call stack_overflow; \ ++10: ++#else ++#define STACK_OVERFLOW_TEST ++#endif + + #define nr_syscalls ((syscall_table_size)/4) + +@@ -94,7 +106,8 @@ VM_MASK = 0x00020000 + pushl %ebx; \ + movl $(__USER_DS), %edx; \ + movl %edx, %ds; \ +- movl %edx, %es; ++ movl %edx, %es; \ ++ STACK_OVERFLOW_TEST + + #define RESTORE_INT_REGS \ + popl %ebx; \ +@@ -198,6 +211,7 @@ need_resched: + # sysenter call handler stub + ENTRY(sysenter_entry) + movl TSS_sysenter_esp0(%esp),%esp ++ .globl sysenter_past_esp + sysenter_past_esp: + sti + pushl $(__USER_DS) +@@ -260,6 +274,19 @@ syscall_exit: + testw $_TIF_ALLWORK_MASK, %cx # current->work + jne syscall_exit_work + restore_all: ++#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS ++ movl EFLAGS(%esp), %eax # mix EFLAGS and CS ++ movb CS(%esp), %al ++ testl $(VM_MASK | 3), %eax ++ jz resume_kernelX # returning to kernel or vm86-space ++ ++ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? ++ jz resume_kernelX ++ ++ int $3 ++ ++resume_kernelX: ++#endif + RESTORE_ALL + + # perform work that needs to be done immediately before resumption +diff -puN /dev/null arch/i386/kernel/kgdb_stub.c +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/arch/i386/kernel/kgdb_stub.c 2004-10-21 14:54:15.307596384 -0700 +@@ -0,0 +1,2330 @@ ++/* ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2, or (at your option) any ++ * later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ */ ++ ++/* ++ * Copyright (c) 2000 VERITAS Software Corporation. ++ * ++ */ ++/**************************************************************************** ++ * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ ++ * ++ * Module name: remcom.c $ ++ * Revision: 1.34 $ ++ * Date: 91/03/09 12:29:49 $ ++ * Contributor: Lake Stevens Instrument Division$ ++ * ++ * Description: low level support for gdb debugger. $ ++ * ++ * Considerations: only works on target hardware $ ++ * ++ * Written by: Glenn Engel $ ++ * Updated by: David Grothe ++ * ModuleState: Experimental $ ++ * ++ * NOTES: See Below $ ++ * ++ * Modified for 386 by Jim Kingdon, Cygnus Support. ++ * Compatibility with 2.1.xx kernel by David Grothe ++ * ++ * Changes to allow auto initilization. All that is needed is that it ++ * be linked with the kernel and a break point (int 3) be executed. ++ * The header file defines BREAKPOINT to allow one to do ++ * this. It should also be possible, once the interrupt system is up, to ++ * call putDebugChar("+"). Once this is done, the remote debugger should ++ * get our attention by sending a ^C in a packet. George Anzinger ++ * ++ * Integrated into 2.2.5 kernel by Tigran Aivazian ++ * Added thread support, support for multiple processors, ++ * support for ia-32(x86) hardware debugging. ++ * Amit S. Kale ( akale@veritas.com ) ++ * ++ * ++ * To enable debugger support, two things need to happen. One, a ++ * call to set_debug_traps() is necessary in order to allow any breakpoints ++ * or error conditions to be properly intercepted and reported to gdb. ++ * Two, a breakpoint needs to be generated to begin communication. This ++ * is most easily accomplished by a call to breakpoint(). Breakpoint() ++ * simulates a breakpoint by executing an int 3. ++ * ++ ************* ++ * ++ * The following gdb commands are supported: ++ * ++ * command function Return value ++ * ++ * g return the value of the CPU registers hex data or ENN ++ * G set the value of the CPU registers OK or ENN ++ * ++ * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN ++ * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN ++ * ++ * c Resume at current address SNN ( signal NN) ++ * cAA..AA Continue at address AA..AA SNN ++ * ++ * s Step one instruction SNN ++ * sAA..AA Step one instruction from AA..AA SNN ++ * ++ * k kill ++ * ++ * ? What was the last sigval ? SNN (signal NN) ++ * ++ * All commands and responses are sent with a packet which includes a ++ * checksum. A packet consists of ++ * ++ * $#. ++ * ++ * where ++ * :: ++ * :: < two hex digits computed as modulo 256 sum of > ++ * ++ * When a packet is received, it is first acknowledged with either '+' or '-'. ++ * '+' indicates a successful transfer. '-' indicates a failed transfer. ++ * ++ * Example: ++ * ++ * Host: Reply: ++ * $m0,10#2a +$00010203040506070809101112131415#42 ++ * ++ ****************************************************************************/ ++#define KGDB_VERSION "<20030915.1651.33>" ++#include ++#include ++#include /* for strcpy */ ++#include ++#include ++#include ++#include ++#include /* for linux pt_regs struct */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/************************************************************************ ++ * ++ * external low-level support routines ++ */ ++typedef void (*Function) (void); /* pointer to a function */ ++ ++/* Thread reference */ ++typedef unsigned char threadref[8]; ++ ++extern void putDebugChar(int); /* write a single character */ ++extern int getDebugChar(void); /* read and return a single char */ ++ ++/************************************************************************/ ++/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ ++/* at least NUMREGBYTES*2 are needed for register packets */ ++/* Longer buffer is needed to list all threads */ ++#define BUFMAX 400 ++ ++char *kgdb_version = KGDB_VERSION; ++ ++/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ ++int debug_regs = 0; /* set to non-zero to print registers */ ++ ++/* filled in by an external module */ ++char *gdb_module_offsets; ++ ++static const char hexchars[] = "0123456789abcdef"; ++ ++/* Number of bytes of registers. */ ++#define NUMREGBYTES 64 ++/* ++ * Note that this register image is in a different order than ++ * the register image that Linux produces at interrupt time. ++ * ++ * Linux's register image is defined by struct pt_regs in ptrace.h. ++ * Just why GDB uses a different order is a historical mystery. ++ */ ++enum regnames { _EAX, /* 0 */ ++ _ECX, /* 1 */ ++ _EDX, /* 2 */ ++ _EBX, /* 3 */ ++ _ESP, /* 4 */ ++ _EBP, /* 5 */ ++ _ESI, /* 6 */ ++ _EDI, /* 7 */ ++ _PC /* 8 also known as eip */ , ++ _PS /* 9 also known as eflags */ , ++ _CS, /* 10 */ ++ _SS, /* 11 */ ++ _DS, /* 12 */ ++ _ES, /* 13 */ ++ _FS, /* 14 */ ++ _GS /* 15 */ ++}; ++ ++/*************************** ASSEMBLY CODE MACROS *************************/ ++/* ++ * Put the error code here just in case the user cares. ++ * Likewise, the vector number here (since GDB only gets the signal ++ * number through the usual means, and that's not very specific). ++ * The called_from is the return address so he can tell how we entered kgdb. ++ * This will allow him to seperate out the various possible entries. ++ */ ++#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ ++ ++#define PID_MAX PID_MAX_DEFAULT ++ ++#ifdef CONFIG_SMP ++void smp_send_nmi_allbutself(void); ++#define IF_SMP(x) x ++#undef MAX_NO_CPUS ++#ifndef CONFIG_NO_KGDB_CPUS ++#define CONFIG_NO_KGDB_CPUS 2 ++#endif ++#if CONFIG_NO_KGDB_CPUS > NR_CPUS ++#define MAX_NO_CPUS NR_CPUS ++#else ++#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS ++#endif ++#define hold_init hold_on_sstep: 1, ++#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) ++#define NUM_CPUS num_online_cpus() ++#else ++#define IF_SMP(x) ++#define hold_init ++#undef MAX_NO_CPUS ++#define MAX_NO_CPUS 1 ++#define NUM_CPUS 1 ++#endif ++#define NOCPU (struct task_struct *)0xbad1fbad ++/* *INDENT-OFF* */ ++struct kgdb_info { ++ int used_malloc; ++ void *called_from; ++ long long entry_tsc; ++ int errcode; ++ int vector; ++ int print_debug_info; ++#ifdef CONFIG_SMP ++ int hold_on_sstep; ++ struct { ++ volatile struct task_struct *task; ++ int pid; ++ int hold; ++ struct pt_regs *regs; ++ } cpus_waiting[MAX_NO_CPUS]; ++#endif ++} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; ++ ++/* *INDENT-ON* */ ++ ++#define used_m kgdb_info.used_malloc ++/* ++ * This is little area we set aside to contain the stack we ++ * need to build to allow gdb to call functions. We use one ++ * per cpu to avoid locking issues. We will do all this work ++ * with interrupts off so that should take care of the protection ++ * issues. ++ */ ++#define LOOKASIDE_SIZE 200 /* should be more than enough */ ++#define MALLOC_MAX 200 /* Max malloc size */ ++struct { ++ unsigned int esp; ++ int array[LOOKASIDE_SIZE]; ++} fn_call_lookaside[MAX_NO_CPUS]; ++ ++static int trap_cpu; ++static unsigned int OLD_esp; ++ ++#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] ++#define IF_BIT 0x200 ++#define TF_BIT 0x100 ++ ++#define MALLOC_ROUND 8-1 ++ ++static char malloc_array[MALLOC_MAX]; ++IF_SMP(static void to_gdb(const char *mess)); ++void * ++malloc(int size) ++{ ++ ++ if (size <= (MALLOC_MAX - used_m)) { ++ int old_used = used_m; ++ used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); ++ return &malloc_array[old_used]; ++ } else { ++ return NULL; ++ } ++} ++ ++/* ++ * Gdb calls functions by pushing agruments, including a return address ++ * on the stack and the adjusting EIP to point to the function. The ++ * whole assumption in GDB is that we are on a different stack than the ++ * one the "user" i.e. code that hit the break point, is on. This, of ++ * course is not true in the kernel. Thus various dodges are needed to ++ * do the call without directly messing with EIP (which we can not change ++ * as it is just a location and not a register. To adjust it would then ++ * require that we move every thing below EIP up or down as needed. This ++ * will not work as we may well have stack relative pointer on the stack ++ * (such as the pointer to regs, for example). ++ ++ * So here is what we do: ++ * We detect gdb attempting to store into the stack area and instead, store ++ * into the fn_call_lookaside.array at the same relative location as if it ++ * were the area ESP pointed at. We also trap ESP modifications ++ * and uses these to adjust fn_call_lookaside.esp. On entry ++ * fn_call_lookaside.esp will be set to point at the last entry in ++ * fn_call_lookaside.array. This allows us to check if it has changed, and ++ * if so, on exit, we add the registers we will use to do the move and a ++ * trap/ interrupt return exit sequence. We then adjust the eflags in the ++ * regs array (remember we now have a copy in the fn_call_lookaside.array) to ++ * kill the interrupt bit, AND we change EIP to point at our set up stub. ++ * As part of the register set up we preset the registers to point at the ++ * begining and end of the fn_call_lookaside.array, so all the stub needs to ++ * do is move words from the array to the stack until ESP= the desired value ++ * then do the rti. This will then transfer to the desired function with ++ * all the correct registers. Nifty huh? ++ */ ++extern asmlinkage void fn_call_stub(void); ++extern asmlinkage void fn_rtn_stub(void); ++/* *INDENT-OFF* */ ++__asm__("fn_rtn_stub:\n\t" ++ "movl %eax,%esp\n\t" ++ "fn_call_stub:\n\t" ++ "1:\n\t" ++ "addl $-4,%ebx\n\t" ++ "movl (%ebx), %eax\n\t" ++ "pushl %eax\n\t" ++ "cmpl %esp,%ecx\n\t" ++ "jne 1b\n\t" ++ "popl %eax\n\t" ++ "popl %ebx\n\t" ++ "popl %ecx\n\t" ++ "iret \n\t"); ++/* *INDENT-ON* */ ++#define gdb_i386vector kgdb_info.vector ++#define gdb_i386errcode kgdb_info.errcode ++#define waiting_cpus kgdb_info.cpus_waiting ++#define remote_debug kgdb_info.print_debug_info ++#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold ++/* gdb locks */ ++ ++#ifdef CONFIG_SMP ++static int in_kgdb_called; ++static spinlock_t waitlocks[MAX_NO_CPUS] = ++ {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; ++/* ++ * The following array has the thread pointer of each of the "other" ++ * cpus. We make it global so it can be seen by gdb. ++ */ ++volatile int in_kgdb_entry_log[MAX_NO_CPUS]; ++volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; ++/* ++static spinlock_t continuelocks[MAX_NO_CPUS]; ++*/ ++spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; ++/* waiters on our spinlock plus us */ ++static atomic_t spinlock_waiters = ATOMIC_INIT(1); ++static int spinlock_count = 0; ++static int spinlock_cpu = 0; ++/* ++ * Note we use nested spin locks to account for the case where a break ++ * point is encountered when calling a function by user direction from ++ * kgdb. Also there is the memory exception recursion to account for. ++ * Well, yes, but this lets other cpus thru too. Lets add a ++ * cpu id to the lock. ++ */ ++#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ ++ spinlock_cpu != smp_processor_id()){\ ++ atomic_inc(&spinlock_waiters); \ ++ while (! spin_trylock(x)) {\ ++ in_kgdb(®s);\ ++ }\ ++ atomic_dec(&spinlock_waiters); \ ++ spinlock_count = 1; \ ++ spinlock_cpu = smp_processor_id(); \ ++ }else{ \ ++ spinlock_count++; \ ++ } ++#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) ++#else ++unsigned kgdb_spinlock = 0; ++#define KGDB_SPIN_LOCK(x) --*x ++#define KGDB_SPIN_UNLOCK(x) ++*x ++#endif ++ ++int ++hex(char ch) ++{ ++ if ((ch >= 'a') && (ch <= 'f')) ++ return (ch - 'a' + 10); ++ if ((ch >= '0') && (ch <= '9')) ++ return (ch - '0'); ++ if ((ch >= 'A') && (ch <= 'F')) ++ return (ch - 'A' + 10); ++ return (-1); ++} ++ ++/* scan for the sequence $# */ ++void ++getpacket(char *buffer) ++{ ++ unsigned char checksum; ++ unsigned char xmitcsum; ++ int i; ++ int count; ++ char ch; ++ ++ do { ++ /* wait around for the start character, ignore all other characters */ ++ while ((ch = (getDebugChar() & 0x7f)) != '$') ; ++ checksum = 0; ++ xmitcsum = -1; ++ ++ count = 0; ++ ++ /* now, read until a # or end of buffer is found */ ++ while (count < BUFMAX) { ++ ch = getDebugChar() & 0x7f; ++ if (ch == '#') ++ break; ++ checksum = checksum + ch; ++ buffer[count] = ch; ++ count = count + 1; ++ } ++ buffer[count] = 0; ++ ++ if (ch == '#') { ++ xmitcsum = hex(getDebugChar() & 0x7f) << 4; ++ xmitcsum += hex(getDebugChar() & 0x7f); ++ if ((remote_debug) && (checksum != xmitcsum)) { ++ printk ++ ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", ++ checksum, xmitcsum, buffer); ++ } ++ ++ if (checksum != xmitcsum) ++ putDebugChar('-'); /* failed checksum */ ++ else { ++ putDebugChar('+'); /* successful transfer */ ++ /* if a sequence char is present, reply the sequence ID */ ++ if (buffer[2] == ':') { ++ putDebugChar(buffer[0]); ++ putDebugChar(buffer[1]); ++ /* remove sequence chars from buffer */ ++ count = strlen(buffer); ++ for (i = 3; i <= count; i++) ++ buffer[i - 3] = buffer[i]; ++ } ++ } ++ } ++ } while (checksum != xmitcsum); ++ ++ if (remote_debug) ++ printk("R:%s\n", buffer); ++} ++ ++/* send the packet in buffer. */ ++ ++void ++putpacket(char *buffer) ++{ ++ unsigned char checksum; ++ int count; ++ char ch; ++ ++ /* $#. */ ++ do { ++ if (remote_debug) ++ printk("T:%s\n", buffer); ++ putDebugChar('$'); ++ checksum = 0; ++ count = 0; ++ ++ while ((ch = buffer[count])) { ++ putDebugChar(ch); ++ checksum += ch; ++ count += 1; ++ } ++ ++ putDebugChar('#'); ++ putDebugChar(hexchars[checksum >> 4]); ++ putDebugChar(hexchars[checksum % 16]); ++ ++ } while ((getDebugChar() & 0x7f) != '+'); ++ ++} ++ ++static char remcomInBuffer[BUFMAX]; ++static char remcomOutBuffer[BUFMAX]; ++static short error; ++ ++void ++debug_error(char *format, char *parm) ++{ ++ if (remote_debug) ++ printk(format, parm); ++} ++ ++static void ++print_regs(struct pt_regs *regs) ++{ ++ printk("EAX=%08lx ", regs->eax); ++ printk("EBX=%08lx ", regs->ebx); ++ printk("ECX=%08lx ", regs->ecx); ++ printk("EDX=%08lx ", regs->edx); ++ printk("\n"); ++ printk("ESI=%08lx ", regs->esi); ++ printk("EDI=%08lx ", regs->edi); ++ printk("EBP=%08lx ", regs->ebp); ++ printk("ESP=%08lx ", (long) ®s->esp); ++ printk("\n"); ++ printk(" DS=%08x ", regs->xds); ++ printk(" ES=%08x ", regs->xes); ++ printk(" SS=%08x ", __KERNEL_DS); ++ printk(" FL=%08lx ", regs->eflags); ++ printk("\n"); ++ printk(" CS=%08x ", regs->xcs); ++ printk(" IP=%08lx ", regs->eip); ++#if 0 ++ printk(" FS=%08x ", regs->fs); ++ printk(" GS=%08x ", regs->gs); ++#endif ++ printk("\n"); ++ ++} /* print_regs */ ++ ++#define NEW_esp fn_call_lookaside[trap_cpu].esp ++ ++static void ++regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) ++{ ++ gdb_regs[_EAX] = regs->eax; ++ gdb_regs[_EBX] = regs->ebx; ++ gdb_regs[_ECX] = regs->ecx; ++ gdb_regs[_EDX] = regs->edx; ++ gdb_regs[_ESI] = regs->esi; ++ gdb_regs[_EDI] = regs->edi; ++ gdb_regs[_EBP] = regs->ebp; ++ gdb_regs[_DS] = regs->xds; ++ gdb_regs[_ES] = regs->xes; ++ gdb_regs[_PS] = regs->eflags; ++ gdb_regs[_CS] = regs->xcs; ++ gdb_regs[_PC] = regs->eip; ++ /* Note, as we are a debugging the kernel, we will always ++ * trap in kernel code, this means no priviledge change, ++ * and so the pt_regs structure is not completely valid. In a non ++ * privilege change trap, only EFLAGS, CS and EIP are put on the stack, ++ * SS and ESP are not stacked, this means that the last 2 elements of ++ * pt_regs is not valid (they would normally refer to the user stack) ++ * also, using regs+1 is no good because you end up will a value that is ++ * 2 longs (8) too high. This used to cause stepping over functions ++ * to fail, so my fix is to use the address of regs->esp, which ++ * should point at the end of the stack frame. Note I have ignored ++ * completely exceptions that cause an error code to be stacked, such ++ * as double fault. Stuart Hughes, Zentropix. ++ * original code: gdb_regs[_ESP] = (int) (regs + 1) ; ++ ++ * this is now done on entry and moved to OLD_esp (as well as NEW_esp). ++ */ ++ gdb_regs[_ESP] = NEW_esp; ++ gdb_regs[_SS] = __KERNEL_DS; ++ gdb_regs[_FS] = 0xFFFF; ++ gdb_regs[_GS] = 0xFFFF; ++} /* regs_to_gdb_regs */ ++ ++static void ++gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) ++{ ++ regs->eax = gdb_regs[_EAX]; ++ regs->ebx = gdb_regs[_EBX]; ++ regs->ecx = gdb_regs[_ECX]; ++ regs->edx = gdb_regs[_EDX]; ++ regs->esi = gdb_regs[_ESI]; ++ regs->edi = gdb_regs[_EDI]; ++ regs->ebp = gdb_regs[_EBP]; ++ regs->xds = gdb_regs[_DS]; ++ regs->xes = gdb_regs[_ES]; ++ regs->eflags = gdb_regs[_PS]; ++ regs->xcs = gdb_regs[_CS]; ++ regs->eip = gdb_regs[_PC]; ++ NEW_esp = gdb_regs[_ESP]; /* keep the value */ ++#if 0 /* can't change these */ ++ regs->esp = gdb_regs[_ESP]; ++ regs->xss = gdb_regs[_SS]; ++ regs->fs = gdb_regs[_FS]; ++ regs->gs = gdb_regs[_GS]; ++#endif ++ ++} /* gdb_regs_to_regs */ ++ ++int thread_list = 0; ++ ++void ++get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) ++{ ++ unsigned long stack_page; ++ int count = 0; ++ IF_SMP(int i); ++ if (!p || p == current) { ++ regs_to_gdb_regs(gdb_regs, regs); ++ return; ++ } ++#ifdef CONFIG_SMP ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (p == kgdb_info.cpus_waiting[i].task) { ++ regs_to_gdb_regs(gdb_regs, ++ kgdb_info.cpus_waiting[i].regs); ++ gdb_regs[_ESP] = ++ (int) &kgdb_info.cpus_waiting[i].regs->esp; ++ ++ return; ++ } ++ } ++#endif ++ memset(gdb_regs, 0, NUMREGBYTES); ++ gdb_regs[_ESP] = p->thread.esp; ++ gdb_regs[_PC] = p->thread.eip; ++ gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; ++ gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); ++ gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); ++ ++/* ++ * This code is to give a more informative notion of where a process ++ * is waiting. It is used only when the user asks for a thread info ++ * list. If he then switches to the thread, s/he will find the task ++ * is in schedule, but a back trace should show the same info we come ++ * up with. This code was shamelessly purloined from process.c. It was ++ * then enhanced to provide more registers than simply the program ++ * counter. ++ */ ++ ++ if (!thread_list) { ++ return; ++ } ++ ++ if (p->state == TASK_RUNNING) ++ return; ++ stack_page = (unsigned long) p->thread_info; ++ if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > ++ THREAD_SIZE - sizeof(long) + stack_page) ++ return; ++ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ ++ do { ++ if (gdb_regs[_EBP] < stack_page || ++ gdb_regs[_EBP] > THREAD_SIZE - 2*sizeof(long) + stack_page) ++ return; ++ gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); ++ gdb_regs[_ESP] = gdb_regs[_EBP] + 8; ++ gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; ++ if (!in_sched_functions(gdb_regs[_PC])) ++ return; ++ } while (count++ < 16); ++ return; ++} ++ ++/* Indicate to caller of mem2hex or hex2mem that there has been an ++ error. */ ++static volatile int mem_err = 0; ++static volatile int mem_err_expected = 0; ++static volatile int mem_err_cnt = 0; ++static int garbage_loc = -1; ++ ++int ++get_char(char *addr) ++{ ++ return *addr; ++} ++ ++void ++set_char(char *addr, int val, int may_fault) ++{ ++ /* ++ * This code traps references to the area mapped to the kernel ++ * stack as given by the regs and, instead, stores to the ++ * fn_call_lookaside[cpu].array ++ */ ++ if (may_fault && ++ (unsigned int) addr < OLD_esp && ++ ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { ++ addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); ++ } ++ *addr = val; ++} ++ ++/* convert the memory pointed to by mem into hex, placing result in buf */ ++/* return a pointer to the last char put in buf (null) */ ++/* If MAY_FAULT is non-zero, then we should set mem_err in response to ++ a fault; if zero treat a fault like any other fault in the stub. */ ++char * ++mem2hex(char *mem, char *buf, int count, int may_fault) ++{ ++ int i; ++ unsigned char ch; ++ ++ if (may_fault) { ++ mem_err_expected = 1; ++ mem_err = 0; ++ } ++ for (i = 0; i < count; i++) { ++ /* printk("%lx = ", mem) ; */ ++ ++ ch = get_char(mem++); ++ ++ /* printk("%02x\n", ch & 0xFF) ; */ ++ if (may_fault && mem_err) { ++ if (remote_debug) ++ printk("Mem fault fetching from addr %lx\n", ++ (long) (mem - 1)); ++ *buf = 0; /* truncate buffer */ ++ return (buf); ++ } ++ *buf++ = hexchars[ch >> 4]; ++ *buf++ = hexchars[ch % 16]; ++ } ++ *buf = 0; ++ if (may_fault) ++ mem_err_expected = 0; ++ return (buf); ++} ++ ++/* convert the hex array pointed to by buf into binary to be placed in mem */ ++/* return a pointer to the character AFTER the last byte written */ ++/* NOTE: We use the may fault flag to also indicate if the write is to ++ * the registers (0) or "other" memory (!=0) ++ */ ++char * ++hex2mem(char *buf, char *mem, int count, int may_fault) ++{ ++ int i; ++ unsigned char ch; ++ ++ if (may_fault) { ++ mem_err_expected = 1; ++ mem_err = 0; ++ } ++ for (i = 0; i < count; i++) { ++ ch = hex(*buf++) << 4; ++ ch = ch + hex(*buf++); ++ set_char(mem++, ch, may_fault); ++ ++ if (may_fault && mem_err) { ++ if (remote_debug) ++ printk("Mem fault storing to addr %lx\n", ++ (long) (mem - 1)); ++ return (mem); ++ } ++ } ++ if (may_fault) ++ mem_err_expected = 0; ++ return (mem); ++} ++ ++/**********************************************/ ++/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ ++/* RETURN NUMBER OF CHARS PROCESSED */ ++/**********************************************/ ++int ++hexToInt(char **ptr, int *intValue) ++{ ++ int numChars = 0; ++ int hexValue; ++ ++ *intValue = 0; ++ ++ while (**ptr) { ++ hexValue = hex(**ptr); ++ if (hexValue >= 0) { ++ *intValue = (*intValue << 4) | hexValue; ++ numChars++; ++ } else ++ break; ++ ++ (*ptr)++; ++ } ++ ++ return (numChars); ++} ++ ++#define stubhex(h) hex(h) ++#ifdef old_thread_list ++ ++static int ++stub_unpack_int(char *buff, int fieldlength) ++{ ++ int nibble; ++ int retval = 0; ++ ++ while (fieldlength) { ++ nibble = stubhex(*buff++); ++ retval |= nibble; ++ fieldlength--; ++ if (fieldlength) ++ retval = retval << 4; ++ } ++ return retval; ++} ++#endif ++static char * ++pack_hex_byte(char *pkt, int byte) ++{ ++ *pkt++ = hexchars[(byte >> 4) & 0xf]; ++ *pkt++ = hexchars[(byte & 0xf)]; ++ return pkt; ++} ++ ++#define BUF_THREAD_ID_SIZE 16 ++ ++static char * ++pack_threadid(char *pkt, threadref * id) ++{ ++ char *limit; ++ unsigned char *altid; ++ ++ altid = (unsigned char *) id; ++ limit = pkt + BUF_THREAD_ID_SIZE; ++ while (pkt < limit) ++ pkt = pack_hex_byte(pkt, *altid++); ++ return pkt; ++} ++ ++#ifdef old_thread_list ++static char * ++unpack_byte(char *buf, int *value) ++{ ++ *value = stub_unpack_int(buf, 2); ++ return buf + 2; ++} ++ ++static char * ++unpack_threadid(char *inbuf, threadref * id) ++{ ++ char *altref; ++ char *limit = inbuf + BUF_THREAD_ID_SIZE; ++ int x, y; ++ ++ altref = (char *) id; ++ ++ while (inbuf < limit) { ++ x = stubhex(*inbuf++); ++ y = stubhex(*inbuf++); ++ *altref++ = (x << 4) | y; ++ } ++ return inbuf; ++} ++#endif ++void ++int_to_threadref(threadref * id, int value) ++{ ++ unsigned char *scan; ++ ++ scan = (unsigned char *) id; ++ { ++ int i = 4; ++ while (i--) ++ *scan++ = 0; ++ } ++ *scan++ = (value >> 24) & 0xff; ++ *scan++ = (value >> 16) & 0xff; ++ *scan++ = (value >> 8) & 0xff; ++ *scan++ = (value & 0xff); ++} ++int ++int_to_hex_v(unsigned char * id, int value) ++{ ++ unsigned char *start = id; ++ int shift; ++ int ch; ++ ++ for (shift = 28; shift >= 0; shift -= 4) { ++ if ((ch = (value >> shift) & 0xf) || (id != start)) { ++ *id = hexchars[ch]; ++ id++; ++ } ++ } ++ if (id == start) ++ *id++ = '0'; ++ return id - start; ++} ++#ifdef old_thread_list ++ ++static int ++threadref_to_int(threadref * ref) ++{ ++ int i, value = 0; ++ unsigned char *scan; ++ ++ scan = (char *) ref; ++ scan += 4; ++ i = 4; ++ while (i-- > 0) ++ value = (value << 8) | ((*scan++) & 0xff); ++ return value; ++} ++#endif ++static int ++cmp_str(char *s1, char *s2, int count) ++{ ++ while (count--) { ++ if (*s1++ != *s2++) ++ return 0; ++ } ++ return 1; ++} ++ ++#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ ++extern struct task_struct *kgdb_get_idle(int cpu); ++#define idle_task(cpu) kgdb_get_idle(cpu) ++#else ++#define idle_task(cpu) init_tasks[cpu] ++#endif ++ ++extern int kgdb_pid_init_done; ++ ++struct task_struct * ++getthread(int pid) ++{ ++ struct task_struct *thread; ++ if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { ++ ++ return idle_task(pid - PID_MAX); ++ } else { ++ /* ++ * find_task_by_pid is relatively safe all the time ++ * Other pid functions require lock downs which imply ++ * that we may be interrupting them (as we get here ++ * in the middle of most any lock down). ++ * Still we don't want to call until the table exists! ++ */ ++ if (kgdb_pid_init_done){ ++ thread = find_task_by_pid(pid); ++ if (thread) { ++ return thread; ++ } ++ } ++ } ++ return NULL; ++} ++/* *INDENT-OFF* */ ++struct hw_breakpoint { ++ unsigned enabled; ++ unsigned type; ++ unsigned len; ++ unsigned addr; ++} breakinfo[4] = { {enabled:0}, ++ {enabled:0}, ++ {enabled:0}, ++ {enabled:0}}; ++/* *INDENT-ON* */ ++unsigned hw_breakpoint_status; ++void ++correct_hw_break(void) ++{ ++ int breakno; ++ int correctit; ++ int breakbit; ++ unsigned dr7; ++ ++ asm volatile ("movl %%db7, %0\n":"=r" (dr7) ++ :); ++ /* *INDENT-OFF* */ ++ do { ++ unsigned addr0, addr1, addr2, addr3; ++ asm volatile ("movl %%db0, %0\n" ++ "movl %%db1, %1\n" ++ "movl %%db2, %2\n" ++ "movl %%db3, %3\n" ++ :"=r" (addr0), "=r"(addr1), ++ "=r"(addr2), "=r"(addr3) ++ :); ++ } while (0); ++ /* *INDENT-ON* */ ++ correctit = 0; ++ for (breakno = 0; breakno < 3; breakno++) { ++ breakbit = 2 << (breakno << 1); ++ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 |= breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ dr7 |= (((breakinfo[breakno].len << 2) | ++ breakinfo[breakno].type) << 16) << ++ (breakno << 2); ++ switch (breakno) { ++ case 0: ++ asm volatile ("movl %0, %%dr0\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ ++ case 1: ++ asm volatile ("movl %0, %%dr1\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ ++ case 2: ++ asm volatile ("movl %0, %%dr2\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ ++ case 3: ++ asm volatile ("movl %0, %%dr3\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ } ++ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 &= ~breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ } ++ } ++ if (correctit) { ++ asm volatile ("movl %0, %%db7\n"::"r" (dr7)); ++ } ++} ++ ++int ++remove_hw_break(unsigned breakno) ++{ ++ if (!breakinfo[breakno].enabled) { ++ return -1; ++ } ++ breakinfo[breakno].enabled = 0; ++ return 0; ++} ++ ++int ++set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) ++{ ++ if (breakinfo[breakno].enabled) { ++ return -1; ++ } ++ breakinfo[breakno].enabled = 1; ++ breakinfo[breakno].type = type; ++ breakinfo[breakno].len = len; ++ breakinfo[breakno].addr = addr; ++ return 0; ++} ++ ++#ifdef CONFIG_SMP ++static int in_kgdb_console = 0; ++ ++int ++in_kgdb(struct pt_regs *regs) ++{ ++ unsigned flags; ++ int cpu = smp_processor_id(); ++ in_kgdb_called = 1; ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ ++ in_kgdb_console) { /* or we are doing slow i/o */ ++ return 1; ++ } ++ return 0; ++ } ++ ++ /* As I see it the only reason not to let all cpus spin on ++ * the same spin_lock is to allow selected ones to proceed. ++ * This would be a good thing, so we leave it this way. ++ * Maybe someday.... Done ! ++ ++ * in_kgdb() is called from an NMI so we don't pretend ++ * to have any resources, like printk() for example. ++ */ ++ ++ kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ ++ /* ++ * log arival of this cpu ++ * The NMI keeps on ticking. Protect against recurring more ++ * than once, and ignor the cpu that has the kgdb lock ++ */ ++ in_kgdb_entry_log[cpu]++; ++ in_kgdb_here_log[cpu] = regs; ++ if (cpu == spinlock_cpu || waiting_cpus[cpu].task) { ++ goto exit_in_kgdb; ++ } ++ /* ++ * For protection of the initilization of the spin locks by kgdb ++ * it locks the kgdb spinlock before it gets the wait locks set ++ * up. We wait here for the wait lock to be taken. If the ++ * kgdb lock goes away first?? Well, it could be a slow exit ++ * sequence where the wait lock is removed prior to the kgdb lock ++ * so if kgdb gets unlocked, we just exit. ++ */ ++ while (spin_is_locked(&kgdb_spinlock) && ++ !spin_is_locked(waitlocks + cpu)) ; ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ goto exit_in_kgdb; ++ } ++ waiting_cpus[cpu].task = current; ++ waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); ++ waiting_cpus[cpu].regs = regs; ++ ++ spin_unlock_wait(waitlocks + cpu); ++ /* ++ * log departure of this cpu ++ */ ++ waiting_cpus[cpu].task = 0; ++ waiting_cpus[cpu].pid = 0; ++ waiting_cpus[cpu].regs = 0; ++ correct_hw_break(); ++ exit_in_kgdb: ++ in_kgdb_here_log[cpu] = 0; ++ kgdb_local_irq_restore(flags); ++ return 1; ++ /* ++ spin_unlock(continuelocks + smp_processor_id()); ++ */ ++} ++ ++void ++smp__in_kgdb(struct pt_regs regs) ++{ ++ ack_APIC_irq(); ++ in_kgdb(®s); ++} ++#else ++int ++in_kgdb(struct pt_regs *regs) ++{ ++ return (kgdb_spinlock); ++} ++#endif ++ ++void ++printexceptioninfo(int exceptionNo, int errorcode, char *buffer) ++{ ++ unsigned dr6; ++ int i; ++ switch (exceptionNo) { ++ case 1: /* debug exception */ ++ break; ++ case 3: /* breakpoint */ ++ sprintf(buffer, "Software breakpoint"); ++ return; ++ default: ++ sprintf(buffer, "Details not available"); ++ return; ++ } ++ asm volatile ("movl %%db6, %0\n":"=r" (dr6) ++ :); ++ if (dr6 & 0x4000) { ++ sprintf(buffer, "Single step"); ++ return; ++ } ++ for (i = 0; i < 4; ++i) { ++ if (dr6 & (1 << i)) { ++ sprintf(buffer, "Hardware breakpoint %d", i); ++ return; ++ } ++ } ++ sprintf(buffer, "Unknown trap"); ++ return; ++} ++ ++/* ++ * This function does all command procesing for interfacing to gdb. ++ * ++ * NOTE: The INT nn instruction leaves the state of the interrupt ++ * enable flag UNCHANGED. That means that when this routine ++ * is entered via a breakpoint (INT 3) instruction from code ++ * that has interrupts enabled, then interrupts will STILL BE ++ * enabled when this routine is entered. The first thing that ++ * we do here is disable interrupts so as to prevent recursive ++ * entries and bothersome serial interrupts while we are ++ * trying to run the serial port in polled mode. ++ * ++ * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so ++ * it is always necessary to do a restore_flags before returning ++ * so as to let go of that lock. ++ */ ++int ++kgdb_handle_exception(int exceptionVector, ++ int signo, int err_code, struct pt_regs *linux_regs) ++{ ++ struct task_struct *usethread = NULL; ++ struct task_struct *thread_list_start = 0, *thread = NULL; ++ int addr, length; ++ int breakno, breaktype; ++ char *ptr; ++ int newPC; ++ threadref thref; ++ int threadid; ++ int thread_min = PID_MAX + MAX_NO_CPUS; ++#ifdef old_thread_list ++ int maxthreads; ++#endif ++ int nothreads; ++ unsigned long flags; ++ int gdb_regs[NUMREGBYTES / 4]; ++ int dr6; ++ IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ ++#define NO_NMI 1 ++#define NO_SYNC 2 ++#define regs (*linux_regs) ++#define NUMREGS NUMREGBYTES/4 ++ /* ++ * If the entry is not from the kernel then return to the Linux ++ * trap handler and let it process the interrupt normally. ++ */ ++ if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { ++ printk("ignoring non-kernel exception\n"); ++ print_regs(®s); ++ return (0); ++ } ++ ++ kgdb_local_irq_save(flags); ++ ++ /* Get kgdb spinlock */ ++ ++ KGDB_SPIN_LOCK(&kgdb_spinlock); ++ rdtscll(kgdb_info.entry_tsc); ++ /* ++ * We depend on this spinlock and the NMI watch dog to control the ++ * other cpus. They will arrive at "in_kgdb()" as a result of the ++ * NMI and will wait there for the following spin locks to be ++ * released. ++ */ ++#ifdef CONFIG_SMP ++ ++#if 0 ++ if (cpu_callout_map & ~MAX_CPU_MASK) { ++ printk("kgdb : too many cpus, possibly not mapped" ++ " in contiguous space, change MAX_NO_CPUS" ++ " in kgdb_stub and make new kernel.\n" ++ " cpu_callout_map is %lx\n", cpu_callout_map); ++ goto exit_just_unlock; ++ } ++#endif ++ if (spinlock_count == 1) { ++ int time = 0, end_time, dum = 0; ++ int i; ++ int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) ++ }; ++ if (remote_debug) { ++ printk("kgdb : cpu %d entry, syncing others\n", ++ smp_processor_id()); ++ } ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ /* ++ * Use trylock as we may already hold the lock if ++ * we are holding the cpu. Net result is all ++ * locked. ++ */ ++ spin_trylock(&waitlocks[i]); ++ } ++ for (i = 0; i < MAX_NO_CPUS; i++) ++ cpu_logged_in[i] = 0; ++ /* ++ * Wait for their arrival. We know the watch dog is active if ++ * in_kgdb() has ever been called, as it is always called on a ++ * watchdog tick. ++ */ ++ rdtsc(dum, time); ++ end_time = time + 2; /* Note: we use the High order bits! */ ++ i = 1; ++ if (num_online_cpus() > 1) { ++ int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; ++ smp_send_nmi_allbutself(); ++ while (i < num_online_cpus() && time != end_time) { ++ int j; ++ for (j = 0; j < MAX_NO_CPUS; j++) { ++ if (waiting_cpus[j].task && ++ !cpu_logged_in[j]) { ++ i++; ++ cpu_logged_in[j] = 1; ++ if (remote_debug) { ++ printk ++ ("kgdb : cpu %d arrived at kgdb\n", ++ j); ++ } ++ break; ++ } else if (!waiting_cpus[j].task && ++ !cpu_online(j)) { ++ waiting_cpus[j].task = NOCPU; ++ cpu_logged_in[j] = 1; ++ waiting_cpus[j].hold = 1; ++ break; ++ } ++ if (!waiting_cpus[j].task && ++ in_kgdb_here_log[j]) { ++ ++ int wait = 100000; ++ while (wait--) ; ++ if (!waiting_cpus[j].task && ++ in_kgdb_here_log[j]) { ++ printk ++ ("kgdb : cpu %d stall" ++ " in in_kgdb\n", ++ j); ++ i++; ++ cpu_logged_in[j] = 1; ++ waiting_cpus[j].task = ++ (struct task_struct ++ *) 1; ++ } ++ } ++ } ++ ++ if (in_kgdb_entry_log[smp_processor_id()] > ++ (me_in_kgdb + 10)) { ++ break; ++ } ++ ++ rdtsc(dum, time); ++ } ++ if (i < num_online_cpus()) { ++ printk ++ ("kgdb : time out, proceeding without sync\n"); ++#if 0 ++ printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", ++ waiting_cpus[0].task != 0, ++ waiting_cpus[1].task != 0); ++ printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", ++ cpu_logged_in[0], cpu_logged_in[1]); ++ printk ++ ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", ++ in_kgdb_here_log[0] != 0, ++ in_kgdb_here_log[1] != 0); ++#endif ++ entry_state = NO_SYNC; ++ } else { ++#if 0 ++ int ent = ++ in_kgdb_entry_log[smp_processor_id()] - ++ me_in_kgdb; ++ printk("kgdb : sync after %d entries\n", ent); ++#endif ++ } ++ } else { ++ if (remote_debug) { ++ printk ++ ("kgdb : %d cpus, but watchdog not active\n" ++ "proceeding without locking down other cpus\n", ++ num_online_cpus()); ++ entry_state = NO_NMI; ++ } ++ } ++ } ++#endif ++ ++ if (remote_debug) { ++ unsigned long *lp = (unsigned long *) &linux_regs; ++ ++ printk("handle_exception(exceptionVector=%d, " ++ "signo=%d, err_code=%d, linux_regs=%p)\n", ++ exceptionVector, signo, err_code, linux_regs); ++ if (debug_regs) { ++ print_regs(®s); ++ printk("Stk: %8lx %8lx %8lx %8lx" ++ " %8lx %8lx %8lx %8lx\n", ++ lp[0], lp[1], lp[2], lp[3], ++ lp[4], lp[5], lp[6], lp[7]); ++ printk(" %8lx %8lx %8lx %8lx" ++ " %8lx %8lx %8lx %8lx\n", ++ lp[8], lp[9], lp[10], lp[11], ++ lp[12], lp[13], lp[14], lp[15]); ++ printk(" %8lx %8lx %8lx %8lx " ++ "%8lx %8lx %8lx %8lx\n", ++ lp[16], lp[17], lp[18], lp[19], ++ lp[20], lp[21], lp[22], lp[23]); ++ printk(" %8lx %8lx %8lx %8lx " ++ "%8lx %8lx %8lx %8lx\n", ++ lp[24], lp[25], lp[26], lp[27], ++ lp[28], lp[29], lp[30], lp[31]); ++ } ++ } ++ ++ /* Disable hardware debugging while we are in kgdb */ ++ /* Get the debug register status register */ ++/* *INDENT-OFF* */ ++ __asm__("movl %0,%%db7" ++ : /* no output */ ++ :"r"(0)); ++ ++ asm volatile ("movl %%db6, %0\n" ++ :"=r" (hw_breakpoint_status) ++ :); ++ ++/* *INDENT-ON* */ ++ switch (exceptionVector) { ++ case 0: /* divide error */ ++ case 1: /* debug exception */ ++ case 2: /* NMI */ ++ case 3: /* breakpoint */ ++ case 4: /* overflow */ ++ case 5: /* bounds check */ ++ case 6: /* invalid opcode */ ++ case 7: /* device not available */ ++ case 8: /* double fault (errcode) */ ++ case 10: /* invalid TSS (errcode) */ ++ case 12: /* stack fault (errcode) */ ++ case 16: /* floating point error */ ++ case 17: /* alignment check (errcode) */ ++ default: /* any undocumented */ ++ break; ++ case 11: /* segment not present (errcode) */ ++ case 13: /* general protection (errcode) */ ++ case 14: /* page fault (special errcode) */ ++ case 19: /* cache flush denied */ ++ if (mem_err_expected) { ++ /* ++ * This fault occured because of the ++ * get_char or set_char routines. These ++ * two routines use either eax of edx to ++ * indirectly reference the location in ++ * memory that they are working with. ++ * For a page fault, when we return the ++ * instruction will be retried, so we ++ * have to make sure that these ++ * registers point to valid memory. ++ */ ++ mem_err = 1; /* set mem error flag */ ++ mem_err_expected = 0; ++ mem_err_cnt++; /* helps in debugging */ ++ /* make valid address */ ++ regs.eax = (long) &garbage_loc; ++ /* make valid address */ ++ regs.edx = (long) &garbage_loc; ++ if (remote_debug) ++ printk("Return after memory error: " ++ "mem_err_cnt=%d\n", mem_err_cnt); ++ if (debug_regs) ++ print_regs(®s); ++ goto exit_kgdb; ++ } ++ break; ++ } ++ if (remote_debug) ++ printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); ++ ++ gdb_i386vector = exceptionVector; ++ gdb_i386errcode = err_code; ++ kgdb_info.called_from = __builtin_return_address(0); ++#ifdef CONFIG_SMP ++ /* ++ * OK, we can now communicate, lets tell gdb about the sync. ++ * but only if we had a problem. ++ */ ++ switch (entry_state) { ++ case NO_NMI: ++ to_gdb("NMI not active, other cpus not stopped\n"); ++ break; ++ case NO_SYNC: ++ to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); ++ default:; ++ } ++ ++#endif ++/* ++ * Set up the gdb function call area. ++ */ ++ trap_cpu = smp_processor_id(); ++ OLD_esp = NEW_esp = (int) (&linux_regs->esp); ++ ++ IF_SMP(once_again:) ++ /* reply to host that an exception has occurred */ ++ remcomOutBuffer[0] = 'S'; ++ remcomOutBuffer[1] = hexchars[signo >> 4]; ++ remcomOutBuffer[2] = hexchars[signo % 16]; ++ remcomOutBuffer[3] = 0; ++ ++ putpacket(remcomOutBuffer); ++ ++ while (1 == 1) { ++ error = 0; ++ remcomOutBuffer[0] = 0; ++ getpacket(remcomInBuffer); ++ switch (remcomInBuffer[0]) { ++ case '?': ++ remcomOutBuffer[0] = 'S'; ++ remcomOutBuffer[1] = hexchars[signo >> 4]; ++ remcomOutBuffer[2] = hexchars[signo % 16]; ++ remcomOutBuffer[3] = 0; ++ break; ++ case 'd': ++ remote_debug = !(remote_debug); /* toggle debug flag */ ++ printk("Remote debug %s\n", ++ remote_debug ? "on" : "off"); ++ break; ++ case 'g': /* return the value of the CPU registers */ ++ get_gdb_regs(usethread, ®s, gdb_regs); ++ mem2hex((char *) gdb_regs, ++ remcomOutBuffer, NUMREGBYTES, 0); ++ break; ++ case 'G': /* set the value of the CPU registers - return OK */ ++ hex2mem(&remcomInBuffer[1], ++ (char *) gdb_regs, NUMREGBYTES, 0); ++ if (!usethread || usethread == current) { ++ gdb_regs_to_regs(gdb_regs, ®s); ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "E00"); ++ } ++ break; ++ ++ case 'P':{ /* set the value of a single CPU register - ++ return OK */ ++ /* ++ * For some reason, gdb wants to talk about psudo ++ * registers (greater than 15). These may have ++ * meaning for ptrace, but for us it is safe to ++ * ignor them. We do this by dumping them into ++ * _GS which we also ignor, but do have memory for. ++ */ ++ int regno; ++ ++ ptr = &remcomInBuffer[1]; ++ regs_to_gdb_regs(gdb_regs, ®s); ++ if ((!usethread || usethread == current) && ++ hexToInt(&ptr, ®no) && ++ *ptr++ == '=' && (regno >= 0)) { ++ regno = ++ (regno >= NUMREGS ? _GS : regno); ++ hex2mem(ptr, (char *) &gdb_regs[regno], ++ 4, 0); ++ gdb_regs_to_regs(gdb_regs, ®s); ++ strcpy(remcomOutBuffer, "OK"); ++ break; ++ } ++ strcpy(remcomOutBuffer, "E01"); ++ break; ++ } ++ ++ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ ++ case 'm': ++ /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr) && ++ (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { ++ ptr = 0; ++ /* ++ * hex doubles the byte count ++ */ ++ if (length > (BUFMAX / 2)) ++ length = BUFMAX / 2; ++ mem2hex((char *) addr, ++ remcomOutBuffer, length, 1); ++ if (mem_err) { ++ strcpy(remcomOutBuffer, "E03"); ++ debug_error("memory fault\n", NULL); ++ } ++ } ++ ++ if (ptr) { ++ strcpy(remcomOutBuffer, "E01"); ++ debug_error ++ ("malformed read memory command: %s\n", ++ remcomInBuffer); ++ } ++ break; ++ ++ /* MAA..AA,LLLL: ++ Write LLLL bytes at address AA.AA return OK */ ++ case 'M': ++ /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr) && ++ (*(ptr++) == ',') && ++ (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { ++ hex2mem(ptr, (char *) addr, length, 1); ++ ++ if (mem_err) { ++ strcpy(remcomOutBuffer, "E03"); ++ debug_error("memory fault\n", NULL); ++ } else { ++ strcpy(remcomOutBuffer, "OK"); ++ } ++ ++ ptr = 0; ++ } ++ if (ptr) { ++ strcpy(remcomOutBuffer, "E02"); ++ debug_error ++ ("malformed write memory command: %s\n", ++ remcomInBuffer); ++ } ++ break; ++ case 'S': ++ remcomInBuffer[0] = 's'; ++ case 'C': ++ /* Csig;AA..AA where ;AA..AA is optional ++ * continue with signal ++ * Since signals are meaning less to us, delete that ++ * part and then fall into the 'c' code. ++ */ ++ ptr = &remcomInBuffer[1]; ++ length = 2; ++ while (*ptr && *ptr != ';') { ++ length++; ++ ptr++; ++ } ++ if (*ptr) { ++ do { ++ ptr++; ++ *(ptr - length++) = *ptr; ++ } while (*ptr); ++ } else { ++ remcomInBuffer[1] = 0; ++ } ++ ++ /* cAA..AA Continue at address AA..AA(optional) */ ++ /* sAA..AA Step one instruction from AA..AA(optional) */ ++ /* D detach, reply OK and then continue */ ++ case 'c': ++ case 's': ++ case 'D': ++ ++ /* try to read optional parameter, ++ pc unchanged if no parm */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr)) { ++ if (remote_debug) ++ printk("Changing EIP to 0x%x\n", addr); ++ ++ regs.eip = addr; ++ } ++ ++ newPC = regs.eip; ++ ++ /* clear the trace bit */ ++ regs.eflags &= 0xfffffeff; ++ ++ /* set the trace bit if we're stepping */ ++ if (remcomInBuffer[0] == 's') ++ regs.eflags |= 0x100; ++ ++ /* detach is a friendly version of continue. Note that ++ debugging is still enabled (e.g hit control C) ++ */ ++ if (remcomInBuffer[0] == 'D') { ++ strcpy(remcomOutBuffer, "OK"); ++ putpacket(remcomOutBuffer); ++ } ++ ++ if (remote_debug) { ++ printk("Resuming execution\n"); ++ print_regs(®s); ++ } ++ asm volatile ("movl %%db6, %0\n":"=r" (dr6) ++ :); ++ if (!(dr6 & 0x4000)) { ++ for (breakno = 0; breakno < 4; ++breakno) { ++ if (dr6 & (1 << breakno) && ++ (breakinfo[breakno].type == 0)) { ++ /* Set restore flag */ ++ regs.eflags |= 0x10000; ++ break; ++ } ++ } ++ } ++ correct_hw_break(); ++ asm volatile ("movl %0, %%db6\n"::"r" (0)); ++ goto exit_kgdb; ++ ++ /* kill the program */ ++ case 'k': /* do nothing */ ++ break; ++ ++ /* query */ ++ case 'q': ++ nothreads = 0; ++ switch (remcomInBuffer[1]) { ++ case 'f': ++ threadid = 1; ++ thread_list = 2; ++ thread_list_start = (usethread ? : current); ++ case 's': ++ if (!cmp_str(&remcomInBuffer[2], ++ "ThreadInfo", 10)) ++ break; ++ ++ remcomOutBuffer[nothreads++] = 'm'; ++ for (; threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ thread = getthread(threadid); ++ if (thread) { ++ nothreads += int_to_hex_v( ++ &remcomOutBuffer[ ++ nothreads], ++ threadid); ++ if (thread_min > threadid) ++ thread_min = threadid; ++ remcomOutBuffer[ ++ nothreads] = ','; ++ nothreads++; ++ if (nothreads > BUFMAX - 10) ++ break; ++ } ++ } ++ if (remcomOutBuffer[nothreads - 1] == 'm') { ++ remcomOutBuffer[nothreads - 1] = 'l'; ++ } else { ++ nothreads--; ++ } ++ remcomOutBuffer[nothreads] = 0; ++ break; ++ ++#ifdef old_thread_list /* Old thread info request */ ++ case 'L': ++ /* List threads */ ++ thread_list = 2; ++ thread_list_start = (usethread ? : current); ++ unpack_byte(remcomInBuffer + 3, &maxthreads); ++ unpack_threadid(remcomInBuffer + 5, &thref); ++ do { ++ int buf_thread_limit = ++ (BUFMAX - 22) / BUF_THREAD_ID_SIZE; ++ if (maxthreads > buf_thread_limit) { ++ maxthreads = buf_thread_limit; ++ } ++ } while (0); ++ remcomOutBuffer[0] = 'q'; ++ remcomOutBuffer[1] = 'M'; ++ remcomOutBuffer[4] = '0'; ++ pack_threadid(remcomOutBuffer + 5, &thref); ++ ++ threadid = threadref_to_int(&thref); ++ for (nothreads = 0; ++ nothreads < maxthreads && ++ threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ thread = getthread(threadid); ++ if (thread) { ++ int_to_threadref(&thref, ++ threadid); ++ pack_threadid(remcomOutBuffer + ++ 21 + ++ nothreads * 16, ++ &thref); ++ nothreads++; ++ if (thread_min > threadid) ++ thread_min = threadid; ++ } ++ } ++ ++ if (threadid == PID_MAX + MAX_NO_CPUS) { ++ remcomOutBuffer[4] = '1'; ++ } ++ pack_hex_byte(remcomOutBuffer + 2, nothreads); ++ remcomOutBuffer[21 + nothreads * 16] = '\0'; ++ break; ++#endif ++ case 'C': ++ /* Current thread id */ ++ remcomOutBuffer[0] = 'Q'; ++ remcomOutBuffer[1] = 'C'; ++ threadid = current->pid; ++ if (!threadid) { ++ /* ++ * idle thread ++ */ ++ for (threadid = PID_MAX; ++ threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ if (current == ++ idle_task(threadid - ++ PID_MAX)) ++ break; ++ } ++ } ++ int_to_threadref(&thref, threadid); ++ pack_threadid(remcomOutBuffer + 2, &thref); ++ remcomOutBuffer[18] = '\0'; ++ break; ++ ++ case 'E': ++ /* Print exception info */ ++ printexceptioninfo(exceptionVector, ++ err_code, remcomOutBuffer); ++ break; ++ case 'T':{ ++ char * nptr; ++ /* Thread extra info */ ++ if (!cmp_str(&remcomInBuffer[2], ++ "hreadExtraInfo,", 15)) { ++ break; ++ } ++ ptr = &remcomInBuffer[17]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ nptr = &thread->comm[0]; ++ length = 0; ++ ptr = &remcomOutBuffer[0]; ++ do { ++ length++; ++ ptr = pack_hex_byte(ptr, *nptr++); ++ } while (*nptr && length < 16); ++ /* ++ * would like that 16 to be the size of ++ * task_struct.comm but don't know the ++ * syntax.. ++ */ ++ *ptr = 0; ++ } ++ } ++ break; ++ ++ /* task related */ ++ case 'H': ++ switch (remcomInBuffer[1]) { ++ case 'g': ++ ptr = &remcomInBuffer[2]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ if (!thread) { ++ remcomOutBuffer[0] = 'E'; ++ remcomOutBuffer[1] = '\0'; ++ break; ++ } ++ /* ++ * Just in case I forget what this is all about, ++ * the "thread info" command to gdb causes it ++ * to ask for a thread list. It then switches ++ * to each thread and asks for the registers. ++ * For this (and only this) usage, we want to ++ * fudge the registers of tasks not on the run ++ * list (i.e. waiting) to show the routine that ++ * called schedule. Also, gdb, is a minimalist ++ * in that if the current thread is the last ++ * it will not re-read the info when done. ++ * This means that in this case we must show ++ * the real registers. So here is how we do it: ++ * Each entry we keep track of the min ++ * thread in the list (the last that gdb will) ++ * get info for. We also keep track of the ++ * starting thread. ++ * "thread_list" is cleared when switching back ++ * to the min thread if it is was current, or ++ * if it was not current, thread_list is set ++ * to 1. When the switch to current comes, ++ * if thread_list is 1, clear it, else do ++ * nothing. ++ */ ++ usethread = thread; ++ if ((thread_list == 1) && ++ (thread == thread_list_start)) { ++ thread_list = 0; ++ } ++ if (thread_list && (threadid == thread_min)) { ++ if (thread == thread_list_start) { ++ thread_list = 0; ++ } else { ++ thread_list = 1; ++ } ++ } ++ /* follow through */ ++ case 'c': ++ remcomOutBuffer[0] = 'O'; ++ remcomOutBuffer[1] = 'K'; ++ remcomOutBuffer[2] = '\0'; ++ break; ++ } ++ break; ++ ++ /* Query thread status */ ++ case 'T': ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ if (thread) { ++ remcomOutBuffer[0] = 'O'; ++ remcomOutBuffer[1] = 'K'; ++ remcomOutBuffer[2] = '\0'; ++ if (thread_min > threadid) ++ thread_min = threadid; ++ } else { ++ remcomOutBuffer[0] = 'E'; ++ remcomOutBuffer[1] = '\0'; ++ } ++ break; ++ ++ case 'Y': /* set up a hardware breakpoint */ ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &breakno); ++ ptr++; ++ hexToInt(&ptr, &breaktype); ++ ptr++; ++ hexToInt(&ptr, &length); ++ ptr++; ++ hexToInt(&ptr, &addr); ++ if (set_hw_break(breakno & 0x3, ++ breaktype & 0x3, ++ length & 0x3, addr) == 0) { ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "ERROR"); ++ } ++ break; ++ ++ /* Remove hardware breakpoint */ ++ case 'y': ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &breakno); ++ if (remove_hw_break(breakno & 0x3) == 0) { ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "ERROR"); ++ } ++ break; ++ ++ case 'r': /* reboot */ ++ strcpy(remcomOutBuffer, "OK"); ++ putpacket(remcomOutBuffer); ++ /*to_gdb("Rebooting\n"); */ ++ /* triplefault no return from here */ ++ { ++ static long no_idt[2]; ++ __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); ++ BREAKPOINT; ++ } ++ ++ } /* switch */ ++ ++ /* reply to the request */ ++ putpacket(remcomOutBuffer); ++ } /* while(1==1) */ ++ /* ++ * reached by goto only. ++ */ ++ exit_kgdb: ++ /* ++ * Here is where we set up to trap a gdb function call. NEW_esp ++ * will be changed if we are trying to do this. We handle both ++ * adding and subtracting, thus allowing gdb to put grung on ++ * the stack which it removes later. ++ */ ++ if (NEW_esp != OLD_esp) { ++ int *ptr = END_OF_LOOKASIDE; ++ if (NEW_esp < OLD_esp) ++ ptr -= (OLD_esp - NEW_esp) / sizeof (int); ++ *--ptr = linux_regs->eflags; ++ *--ptr = linux_regs->xcs; ++ *--ptr = linux_regs->eip; ++ *--ptr = linux_regs->ecx; ++ *--ptr = linux_regs->ebx; ++ *--ptr = linux_regs->eax; ++ linux_regs->ecx = NEW_esp - (sizeof (int) * 6); ++ linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; ++ if (NEW_esp < OLD_esp) { ++ linux_regs->eip = (unsigned int) fn_call_stub; ++ } else { ++ linux_regs->eip = (unsigned int) fn_rtn_stub; ++ linux_regs->eax = NEW_esp; ++ } ++ linux_regs->eflags &= ~(IF_BIT | TF_BIT); ++ } ++#ifdef CONFIG_SMP ++ /* ++ * Release gdb wait locks ++ * Sanity check time. Must have at least one cpu to run. Also single ++ * step must not be done if the current cpu is on hold. ++ */ ++ if (spinlock_count == 1) { ++ int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; ++ int cpu_avail = 0; ++ int i; ++ ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (!cpu_online(i)) ++ break; ++ if (!hold_cpu(i)) { ++ cpu_avail = 1; ++ } ++ } ++ /* ++ * Early in the bring up there will be NO cpus on line... ++ */ ++ if (!cpu_avail && !cpus_empty(cpu_online_map)) { ++ to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); ++ goto once_again; ++ } ++ if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { ++ to_gdb ++ ("Current cpu must be unblocked to single step\n"); ++ goto once_again; ++ } ++ if (!(ss_hold)) { ++ int i; ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (!hold_cpu(i)) { ++ spin_unlock(&waitlocks[i]); ++ } ++ } ++ } else { ++ spin_unlock(&waitlocks[smp_processor_id()]); ++ } ++ /* Release kgdb spinlock */ ++ KGDB_SPIN_UNLOCK(&kgdb_spinlock); ++ /* ++ * If this cpu is on hold, this is where we ++ * do it. Note, the NMI will pull us out of here, ++ * but will return as the above lock is not held. ++ * We will stay here till another cpu releases the lock for us. ++ */ ++ spin_unlock_wait(waitlocks + smp_processor_id()); ++ kgdb_local_irq_restore(flags); ++ return (0); ++ } ++#if 0 ++exit_just_unlock: ++#endif ++#endif ++ /* Release kgdb spinlock */ ++ KGDB_SPIN_UNLOCK(&kgdb_spinlock); ++ kgdb_local_irq_restore(flags); ++ return (0); ++} ++ ++/* this function is used to set up exception handlers for tracing and ++ * breakpoints. ++ * This function is not needed as the above line does all that is needed. ++ * We leave it for backward compatitability... ++ */ ++void ++set_debug_traps(void) ++{ ++ /* ++ * linux_debug_hook is defined in traps.c. We store a pointer ++ * to our own exception handler into it. ++ ++ * But really folks, every hear of labeled common, an old Fortran ++ * concept. Lots of folks can reference it and it is define if ++ * anyone does. Only one can initialize it at link time. We do ++ * this with the hook. See the statement above. No need for any ++ * executable code and it is ready as soon as the kernel is ++ * loaded. Very desirable in kernel debugging. ++ ++ linux_debug_hook = handle_exception ; ++ */ ++ ++ /* In case GDB is started before us, ack any packets (presumably ++ "$?#xx") sitting there. ++ putDebugChar ('+'); ++ ++ initialized = 1; ++ */ ++} ++ ++/* This function will generate a breakpoint exception. It is used at the ++ beginning of a program to sync up with a debugger and can be used ++ otherwise as a quick means to stop program execution and "break" into ++ the debugger. */ ++/* But really, just use the BREAKPOINT macro. We will handle the int stuff ++ */ ++ ++#ifdef later ++/* ++ * possibly we should not go thru the traps.c code at all? Someday. ++ */ ++void ++do_kgdb_int3(struct pt_regs *regs, long error_code) ++{ ++ kgdb_handle_exception(3, 5, error_code, regs); ++ return; ++} ++#endif ++#undef regs ++#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS ++asmlinkage void ++bad_sys_call_exit(int stuff) ++{ ++ struct pt_regs *regs = (struct pt_regs *) &stuff; ++ printk("Sys call %d return with %x preempt_count\n", ++ (int) regs->orig_eax, preempt_count()); ++} ++#endif ++#ifdef CONFIG_STACK_OVERFLOW_TEST ++#include ++asmlinkage void ++stack_overflow(void) ++{ ++#ifdef BREAKPOINT ++ BREAKPOINT; ++#else ++ printk("Kernel stack overflow, looping forever\n"); ++#endif ++ while (1) { ++ } ++} ++#endif ++ ++#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) ++char gdbconbuf[BUFMAX]; ++ ++static void ++kgdb_gdb_message(const char *s, unsigned count) ++{ ++ int i; ++ int wcount; ++ char *bufptr; ++ /* ++ * This takes care of NMI while spining out chars to gdb ++ */ ++ IF_SMP(in_kgdb_console = 1); ++ gdbconbuf[0] = 'O'; ++ bufptr = gdbconbuf + 1; ++ while (count > 0) { ++ if ((count << 1) > (BUFMAX - 2)) { ++ wcount = (BUFMAX - 2) >> 1; ++ } else { ++ wcount = count; ++ } ++ count -= wcount; ++ for (i = 0; i < wcount; i++) { ++ bufptr = pack_hex_byte(bufptr, s[i]); ++ } ++ *bufptr = '\0'; ++ s += wcount; ++ ++ putpacket(gdbconbuf); ++ ++ } ++ IF_SMP(in_kgdb_console = 0); ++} ++#endif ++#ifdef CONFIG_SMP ++static void ++to_gdb(const char *s) ++{ ++ int count = 0; ++ while (s[count] && (count++ < BUFMAX)) ; ++ kgdb_gdb_message(s, count); ++} ++#endif ++#ifdef CONFIG_KGDB_CONSOLE ++#include ++#include ++#include ++#include ++#include ++ ++void ++kgdb_console_write(struct console *co, const char *s, unsigned count) ++{ ++ ++ if (gdb_i386vector == -1) { ++ /* ++ * We have not yet talked to gdb. What to do... ++ * lets break, on continue we can do the write. ++ * But first tell him whats up. Uh, well no can do, ++ * as this IS the console. Oh well... ++ * We do need to wait or the messages will be lost. ++ * Other option would be to tell the above code to ++ * ignore this breakpoint and do an auto return, ++ * but that might confuse gdb. Also this happens ++ * early enough in boot up that we don't have the traps ++ * set up yet, so... ++ */ ++ breakpoint(); ++ } ++ kgdb_gdb_message(s, count); ++} ++ ++/* ++ * ------------------------------------------------------------ ++ * Serial KGDB driver ++ * ------------------------------------------------------------ ++ */ ++ ++static struct console kgdbcons = { ++ name:"kgdb", ++ write:kgdb_console_write, ++#ifdef CONFIG_KGDB_USER_CONSOLE ++ device:kgdb_console_device, ++#endif ++ flags:CON_PRINTBUFFER | CON_ENABLED, ++ index:-1, ++}; ++ ++/* ++ * The trick here is that this file gets linked before printk.o ++ * That means we get to peer at the console info in the command ++ * line before it does. If we are up, we register, otherwise, ++ * do nothing. By returning 0, we allow printk to look also. ++ */ ++static int kgdb_console_enabled; ++ ++int __init ++kgdb_console_init(char *str) ++{ ++ if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { ++ register_console(&kgdbcons); ++ kgdb_console_enabled = 1; ++ } ++ return 0; /* let others look at the string */ ++} ++ ++__setup("console=", kgdb_console_init); ++ ++#ifdef CONFIG_KGDB_USER_CONSOLE ++static kdev_t kgdb_console_device(struct console *c); ++/* This stuff sort of works, but it knocks out telnet devices ++ * we are leaving it here in case we (or you) find time to figure it out ++ * better.. ++ */ ++ ++/* ++ * We need a real char device as well for when the console is opened for user ++ * space activities. ++ */ ++ ++static int ++kgdb_consdev_open(struct inode *inode, struct file *file) ++{ ++ return 0; ++} ++ ++static ssize_t ++kgdb_consdev_write(struct file *file, const char *buf, ++ size_t count, loff_t * ppos) ++{ ++ int size, ret = 0; ++ static char kbuf[128]; ++ static DECLARE_MUTEX(sem); ++ ++ /* We are not reentrant... */ ++ if (down_interruptible(&sem)) ++ return -ERESTARTSYS; ++ ++ while (count > 0) { ++ /* need to copy the data from user space */ ++ size = count; ++ if (size > sizeof (kbuf)) ++ size = sizeof (kbuf); ++ if (copy_from_user(kbuf, buf, size)) { ++ ret = -EFAULT; ++ break;; ++ } ++ kgdb_console_write(&kgdbcons, kbuf, size); ++ count -= size; ++ ret += size; ++ buf += size; ++ } ++ ++ up(&sem); ++ ++ return ret; ++} ++ ++struct file_operations kgdb_consdev_fops = { ++ open:kgdb_consdev_open, ++ write:kgdb_consdev_write ++}; ++static kdev_t ++kgdb_console_device(struct console *c) ++{ ++ return MKDEV(TTYAUX_MAJOR, 1); ++} ++ ++/* ++ * This routine gets called from the serial stub in the i386/lib ++ * This is so it is done late in bring up (just before the console open). ++ */ ++void ++kgdb_console_finit(void) ++{ ++ if (kgdb_console_enabled) { ++ char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); ++ char *cp = cptr; ++ while (*cptr && *cptr != '(') ++ cptr++; ++ *cptr = 0; ++ unregister_chrdev(TTYAUX_MAJOR, cp); ++ register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); ++ } ++} ++#endif ++#endif ++#ifdef CONFIG_KGDB_TS ++#include /* time stamp code */ ++#include /* in_interrupt */ ++#ifdef CONFIG_KGDB_TS_64 ++#define DATA_POINTS 64 ++#endif ++#ifdef CONFIG_KGDB_TS_128 ++#define DATA_POINTS 128 ++#endif ++#ifdef CONFIG_KGDB_TS_256 ++#define DATA_POINTS 256 ++#endif ++#ifdef CONFIG_KGDB_TS_512 ++#define DATA_POINTS 512 ++#endif ++#ifdef CONFIG_KGDB_TS_1024 ++#define DATA_POINTS 1024 ++#endif ++#ifndef DATA_POINTS ++#define DATA_POINTS 128 /* must be a power of two */ ++#endif ++#define INDEX_MASK (DATA_POINTS - 1) ++#if (INDEX_MASK & DATA_POINTS) ++#error "CONFIG_KGDB_TS_COUNT must be a power of 2" ++#endif ++struct kgdb_and_then_struct { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ struct task_struct *task; ++ long long at_time; ++ int from_ln; ++ char *in_src; ++ void *from; ++ int *with_shpf; ++ int data0; ++ int data1; ++}; ++struct kgdb_and_then_struct2 { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ struct task_struct *task; ++ long long at_time; ++ int from_ln; ++ char *in_src; ++ void *from; ++ int *with_shpf; ++ struct task_struct *t1; ++ struct task_struct *t2; ++}; ++struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; ++ ++struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; ++int kgdb_and_then_count; ++ ++void ++kgdb_tstamp(int line, char *source, int data0, int data1) ++{ ++ static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; ++ int flags; ++ kgdb_local_irq_save(flags); ++ spin_lock(&ts_spin); ++ rdtscll(kgdb_and_then->at_time); ++#ifdef CONFIG_SMP ++ kgdb_and_then->on_cpu = smp_processor_id(); ++#endif ++ kgdb_and_then->task = current; ++ kgdb_and_then->from_ln = line; ++ kgdb_and_then->in_src = source; ++ kgdb_and_then->from = __builtin_return_address(0); ++ kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | ++ (preempt_count() << 8)); ++ kgdb_and_then->data0 = data0; ++ kgdb_and_then->data1 = data1; ++ kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; ++ spin_unlock(&ts_spin); ++ kgdb_local_irq_restore(flags); ++#ifdef CONFIG_PREEMPT ++ ++#endif ++ return; ++} ++#endif ++typedef int gdb_debug_hook(int exceptionVector, ++ int signo, int err_code, struct pt_regs *linux_regs); ++gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ +diff -puN arch/i386/kernel/Makefile~kgdb-ga arch/i386/kernel/Makefile +--- 25/arch/i386/kernel/Makefile~kgdb-ga 2004-10-21 14:54:15.259603680 -0700 ++++ 25-akpm/arch/i386/kernel/Makefile 2004-10-21 14:54:15.308596232 -0700 +@@ -14,6 +14,7 @@ obj-y += timers/ + obj-$(CONFIG_ACPI_BOOT) += acpi/ + obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o + obj-$(CONFIG_MCA) += mca.o ++obj-$(CONFIG_KGDB) += kgdb_stub.o + obj-$(CONFIG_X86_MSR) += msr.o + obj-$(CONFIG_X86_CPUID) += cpuid.o + obj-$(CONFIG_MICROCODE) += microcode.o +diff -puN arch/i386/kernel/nmi.c~kgdb-ga arch/i386/kernel/nmi.c +--- 25/arch/i386/kernel/nmi.c~kgdb-ga 2004-10-21 14:54:15.261603376 -0700 ++++ 25-akpm/arch/i386/kernel/nmi.c 2004-10-21 14:54:15.308596232 -0700 +@@ -34,7 +34,17 @@ + + #include "mach_traps.h" + ++#ifdef CONFIG_KGDB ++#include ++#ifdef CONFIG_SMP ++unsigned int nmi_watchdog = NMI_IO_APIC; ++#else ++unsigned int nmi_watchdog = NMI_LOCAL_APIC; ++#endif ++#else + unsigned int nmi_watchdog = NMI_NONE; ++#endif ++ + extern int unknown_nmi_panic; + static unsigned int nmi_hz = HZ; + static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ +@@ -466,6 +476,9 @@ void touch_nmi_watchdog (void) + for (i = 0; i < NR_CPUS; i++) + alert_counter[i] = 0; + } ++#ifdef CONFIG_KGDB ++int tune_watchdog = 5*HZ; ++#endif + + extern void die_nmi(struct pt_regs *, const char *msg); + +@@ -481,12 +494,24 @@ void nmi_watchdog_tick (struct pt_regs * + */ + sum = irq_stat[cpu].apic_timer_irqs; + ++#ifdef CONFIG_KGDB ++ if (!in_kgdb(regs) && last_irq_sums[cpu] == sum) { ++ ++#else + if (last_irq_sums[cpu] == sum) { ++#endif + /* + * Ayiee, looks like this CPU is stuck ... + * wait a few IRQs (5 seconds) before doing the oops ... + */ + alert_counter[cpu]++; ++#ifdef CONFIG_KGDB ++ if (alert_counter[cpu] == tune_watchdog) { ++ kgdb_handle_exception(2, SIGPWR, 0, regs); ++ last_irq_sums[cpu] = sum; ++ alert_counter[cpu] = 0; ++ } ++#endif + if (alert_counter[cpu] == 30*nmi_hz) + die_nmi(regs, "NMI Watchdog detected LOCKUP"); + } else { +diff -puN arch/i386/kernel/smp.c~kgdb-ga arch/i386/kernel/smp.c +--- 25/arch/i386/kernel/smp.c~kgdb-ga 2004-10-21 14:54:15.262603224 -0700 ++++ 25-akpm/arch/i386/kernel/smp.c 2004-10-21 14:54:15.309596080 -0700 +@@ -466,7 +466,17 @@ void flush_tlb_all(void) + { + on_each_cpu(do_flush_tlb_all, NULL, 1, 1); + } +- ++#ifdef CONFIG_KGDB ++/* ++ * By using the NMI code instead of a vector we just sneak thru the ++ * word generator coming out with just what we want. AND it does ++ * not matter if clustered_apic_mode is set or not. ++ */ ++void smp_send_nmi_allbutself(void) ++{ ++ send_IPI_allbutself(APIC_DM_NMI); ++} ++#endif + /* + * this function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing +diff -puN arch/i386/kernel/traps.c~kgdb-ga arch/i386/kernel/traps.c +--- 25/arch/i386/kernel/traps.c~kgdb-ga 2004-10-21 14:54:15.264602920 -0700 ++++ 25-akpm/arch/i386/kernel/traps.c 2004-10-21 14:54:15.311595776 -0700 +@@ -105,6 +105,39 @@ int register_die_notifier(struct notifie + return err; + } + ++#ifdef CONFIG_KGDB ++extern void sysenter_past_esp(void); ++#include ++#include ++void set_intr_gate(unsigned int n, void *addr); ++static void set_intr_usr_gate(unsigned int n, void *addr); ++/* ++ * Should be able to call this breakpoint() very early in ++ * bring up. Just hard code the call where needed. ++ * The breakpoint() code is here because set_?_gate() functions ++ * are local (static) to trap.c. They need be done only once, ++ * but it does not hurt to do them over. ++ */ ++void breakpoint(void) ++{ ++ set_intr_usr_gate(3,&int3); /* disable ints on trap */ ++ set_intr_gate(1,&debug); ++ set_intr_gate(14,&page_fault); ++ ++ BREAKPOINT; ++} ++#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \ ++ { \ ++ if (!user_mode(regs) ) \ ++ { \ ++ kgdb_handle_exception(trapnr, signr, error_code, regs); \ ++ after; \ ++ } else if ((trapnr == 3) && (regs->eflags &0x200)) local_irq_enable(); \ ++ } ++#else ++#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) ++#endif ++ + static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) + { + return p > (void *)tinfo && +@@ -332,6 +365,15 @@ void die(const char * str, struct pt_reg + #endif + if (nl) + printk("\n"); ++#ifdef CONFIG_KGDB ++ /* This is about the only place we want to go to kgdb even if in ++ * user mode. But we must go in via a trap so within kgdb we will ++ * always be in kernel mode. ++ */ ++ if (user_mode(regs)) ++ BREAKPOINT; ++#endif ++ CHK_REMOTE_DEBUG(0,SIGTRAP,err,regs,) + notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); + show_registers(regs); + } else +@@ -406,6 +448,7 @@ static inline void do_trap(int trapnr, i + #define DO_ERROR(trapnr, signr, str, name) \ + asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ + { \ ++ CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,) \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ +@@ -429,6 +472,7 @@ asmlinkage void do_##name(struct pt_regs + #define DO_VM86_ERROR(trapnr, signr, str, name) \ + asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ + { \ ++ CHK_REMOTE_DEBUG(trapnr, signr, error_code,regs, return) \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ +@@ -512,7 +556,8 @@ gp_in_vm86: + + gp_in_kernel: + if (!fixup_exception(regs)) { + die: ++ CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,) + if (notify_die(DIE_GPF, "general protection fault", regs, + error_code, 13, SIGSEGV) == NOTIFY_STOP) + return; +@@ -721,8 +766,18 @@ asmlinkage void do_debug(struct pt_regs + * allowing programs to debug themselves without the ptrace() + * interface. + */ ++#ifdef CONFIG_KGDB ++ /* ++ * I think this is the only "real" case of a TF in the kernel ++ * that really belongs to user space. Others are ++ * "Ours all ours!" ++ */ ++ if (((regs->xcs & 3) == 0) && ((void *)regs->eip == sysenter_past_esp)) ++ goto clear_TF_reenable; ++#else + if ((regs->xcs & 3) == 0) + goto clear_TF_reenable; ++#endif + if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) + goto clear_TF; + } +@@ -734,6 +789,17 @@ asmlinkage void do_debug(struct pt_regs + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + ++#ifdef CONFIG_KGDB ++ /* ++ * If this is a kernel mode trap, we need to reset db7 to allow us ++ * to continue sanely ALSO skip the signal delivery ++ */ ++ if ((regs->xcs & 3) == 0) ++ goto clear_dr7; ++ ++ /* if not kernel, allow ints but only if they were on */ ++ if ( regs->eflags & 0x200) local_irq_enable(); ++#endif + /* If this is a kernel mode trap, save the user PC on entry to + * the kernel, that's what the debugger can make sense of. + */ +@@ -748,6 +814,7 @@ clear_dr7: + __asm__("movl %0,%%db7" + : /* no output */ + : "r" (0)); ++ CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,) + return; + + debug_vm86: +@@ -1004,6 +1071,12 @@ static void __init set_task_gate(unsigne + { + _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); + } ++#ifdef CONFIG_KGDB ++void set_intr_usr_gate(unsigned int n, void *addr) ++{ ++ _set_gate(idt_table+n,14,3,addr,__KERNEL_CS); ++} ++#endif + + + void __init trap_init(void) +@@ -1021,7 +1094,11 @@ void __init trap_init(void) + set_trap_gate(0,÷_error); + set_intr_gate(1,&debug); + set_intr_gate(2,&nmi); ++#ifndef CONFIG_KGDB + set_system_intr_gate(3, &int3); /* int3-5 can be called from all */ ++#else ++ set_intr_usr_gate(3,&int3); /* int3-5 can be called from all */ ++#endif + set_system_gate(4,&overflow); + set_system_gate(5,&bounds); + set_trap_gate(6,&invalid_op); +diff -puN /dev/null arch/i386/lib/kgdb_serial.c +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/arch/i386/lib/kgdb_serial.c 2004-10-21 14:54:15.313595472 -0700 +@@ -0,0 +1,485 @@ ++/* ++ * Serial interface GDB stub ++ * ++ * Written (hacked together) by David Grothe (dave@gcom.com) ++ * Modified to allow invokation early in boot see also ++ * kgdb.h for instructions by George Anzinger(george@mvista.com) ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_KGDB_USER_CONSOLE ++extern void kgdb_console_finit(void); ++#endif ++#define PRNT_off ++#define TEST_EXISTANCE ++#ifdef PRNT ++#define dbprintk(s) printk s ++#else ++#define dbprintk(s) ++#endif ++#define TEST_INTERRUPT_off ++#ifdef TEST_INTERRUPT ++#define intprintk(s) printk s ++#else ++#define intprintk(s) ++#endif ++ ++#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) ++ ++#define GDB_BUF_SIZE 512 /* power of 2, please */ ++ ++static char gdb_buf[GDB_BUF_SIZE]; ++static int gdb_buf_in_inx; ++static atomic_t gdb_buf_in_cnt; ++static int gdb_buf_out_inx; ++ ++struct async_struct *gdb_async_info; ++static int gdb_async_irq; ++ ++#define outb_px(a,b) outb_p(b,a) ++ ++static void program_uart(struct async_struct *info); ++static void write_char(struct async_struct *info, int chr); ++/* ++ * Get a byte from the hardware data buffer and return it ++ */ ++static int ++read_data_bfr(struct async_struct *info) ++{ ++ char it = inb_p(info->port + UART_LSR); ++ ++ if (it & UART_LSR_DR) ++ return (inb_p(info->port + UART_RX)); ++ /* ++ * If we have a framing error assume somebody messed with ++ * our uart. Reprogram it and send '-' both ways... ++ */ ++ if (it & 0xc) { ++ program_uart(info); ++ write_char(info, '-'); ++ return ('-'); ++ } ++ return (-1); ++ ++} /* read_data_bfr */ ++ ++/* ++ * Get a char if available, return -1 if nothing available. ++ * Empty the receive buffer first, then look at the interface hardware. ++ ++ * Locking here is a bit of a problem. We MUST not lock out communication ++ * if we are trying to talk to gdb about a kgdb entry. ON the other hand ++ * we can loose chars in the console pass thru if we don't lock. It is also ++ * possible that we could hold the lock or be waiting for it when kgdb ++ * NEEDS to talk. Since kgdb locks down the world, it does not need locks. ++ * We do, of course have possible issues with interrupting a uart operation, ++ * but we will just depend on the uart status to help keep that straight. ++ ++ */ ++static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; ++#ifdef CONFIG_SMP ++extern spinlock_t kgdb_spinlock; ++#endif ++ ++static int ++read_char(struct async_struct *info) ++{ ++ int chr; ++ unsigned long flags; ++ local_irq_save(flags); ++#ifdef CONFIG_SMP ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ spin_lock(&uart_interrupt_lock); ++ } ++#endif ++ if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ ++ chr = gdb_buf[gdb_buf_out_inx++]; ++ gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); ++ atomic_dec(&gdb_buf_in_cnt); ++ } else { ++ chr = read_data_bfr(info); ++ } ++#ifdef CONFIG_SMP ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ spin_unlock(&uart_interrupt_lock); ++ } ++#endif ++ local_irq_restore(flags); ++ return (chr); ++} ++ ++/* ++ * Wait until the interface can accept a char, then write it. ++ */ ++static void ++write_char(struct async_struct *info, int chr) ++{ ++ while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; ++ ++ outb_p(chr, info->port + UART_TX); ++ ++} /* write_char */ ++ ++/* ++ * Mostly we don't need a spinlock, but since the console goes ++ * thru here with interrutps on, well, we need to catch those ++ * chars. ++ */ ++/* ++ * This is the receiver interrupt routine for the GDB stub. ++ * It will receive a limited number of characters of input ++ * from the gdb host machine and save them up in a buffer. ++ * ++ * When the gdb stub routine getDebugChar() is called it ++ * draws characters out of the buffer until it is empty and ++ * then reads directly from the serial port. ++ * ++ * We do not attempt to write chars from the interrupt routine ++ * since the stubs do all of that via putDebugChar() which ++ * writes one byte after waiting for the interface to become ++ * ready. ++ * ++ * The debug stubs like to run with interrupts disabled since, ++ * after all, they run as a consequence of a breakpoint in ++ * the kernel. ++ * ++ * Perhaps someone who knows more about the tty driver than I ++ * care to learn can make this work for any low level serial ++ * driver. ++ */ ++static irqreturn_t ++gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) ++{ ++ struct async_struct *info; ++ unsigned long flags; ++ ++ info = gdb_async_info; ++ if (!info || !info->tty || irq != gdb_async_irq) ++ return IRQ_NONE; ++ ++ local_irq_save(flags); ++ spin_lock(&uart_interrupt_lock); ++ do { ++ int chr = read_data_bfr(info); ++ intprintk(("Debug char on int: %x hex\n", chr)); ++ if (chr < 0) ++ continue; ++ ++ if (chr == 3) { /* Ctrl-C means remote interrupt */ ++ BREAKPOINT; ++ continue; ++ } ++ ++ if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { ++ /* buffer overflow tosses early char */ ++ read_char(info); ++ } ++ gdb_buf[gdb_buf_in_inx++] = chr; ++ gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); ++ } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); ++ spin_unlock(&uart_interrupt_lock); ++ local_irq_restore(flags); ++ return IRQ_HANDLED; ++} /* gdb_interrupt */ ++ ++/* ++ * Just a NULL routine for testing. ++ */ ++void ++gdb_null(void) ++{ ++} /* gdb_null */ ++ ++/* These structure are filled in with values defined in asm/kgdb_local.h ++ */ ++static struct serial_state state = SB_STATE; ++static struct async_struct local_info = SB_INFO; ++static int ok_to_enable_ints = 0; ++static void kgdb_enable_ints_now(void); ++ ++extern char *kgdb_version; ++/* ++ * Hook an IRQ for KGDB. ++ * ++ * This routine is called from putDebugChar, below. ++ */ ++static int ints_disabled = 1; ++int ++gdb_hook_interrupt(struct async_struct *info, int verb) ++{ ++ struct serial_state *state = info->state; ++ unsigned long flags; ++ int port; ++#ifdef TEST_EXISTANCE ++ int scratch, scratch2; ++#endif ++ ++ /* The above fails if memory managment is not set up yet. ++ * Rather than fail the set up, just keep track of the fact ++ * and pick up the interrupt thing later. ++ */ ++ gdb_async_info = info; ++ port = gdb_async_info->port; ++ gdb_async_irq = state->irq; ++ if (verb) { ++ printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", ++ kgdb_version, ++ port, ++ gdb_async_irq, gdb_async_info->state->custom_divisor); ++ } ++ local_irq_save(flags); ++#ifdef TEST_EXISTANCE ++ /* Existance test */ ++ /* Should not need all this, but just in case.... */ ++ ++ scratch = inb_p(port + UART_IER); ++ outb_px(port + UART_IER, 0); ++ outb_px(0xff, 0x080); ++ scratch2 = inb_p(port + UART_IER); ++ outb_px(port + UART_IER, scratch); ++ if (scratch2) { ++ printk ++ ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); ++ local_irq_restore(flags); ++ return 1; /* We failed; there's nothing here */ ++ } ++ scratch2 = inb_p(port + UART_LCR); ++ outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ ++ outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ ++ outb_px(port + UART_LCR, 0); ++ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); ++ scratch = inb_p(port + UART_IIR) >> 6; ++ if (scratch == 1) { ++ printk("gdb_hook_interrupt: Undefined UART type!" ++ " Not a UART! \n"); ++ local_irq_restore(flags); ++ return 1; ++ } else { ++ dbprintk(("gdb_hook_interrupt: UART type " ++ "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); ++ } ++ scratch = inb_p(port + UART_MCR); ++ outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); ++ outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); ++ scratch2 = inb_p(port + UART_MSR) & 0xF0; ++ outb_px(port + UART_MCR, scratch); ++ if (scratch2 != 0x90) { ++ printk("gdb_hook_interrupt: " ++ "Loop back test failed! Not a UART!\n"); ++ local_irq_restore(flags); ++ return scratch2 + 1000; /* force 0 to fail */ ++ } ++#endif /* test existance */ ++ program_uart(info); ++ local_irq_restore(flags); ++ ++ return (0); ++ ++} /* gdb_hook_interrupt */ ++ ++static void ++program_uart(struct async_struct *info) ++{ ++ int port = info->port; ++ ++ (void) inb_p(port + UART_RX); ++ outb_px(port + UART_IER, 0); ++ ++ (void) inb_p(port + UART_RX); /* serial driver comments say */ ++ (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ ++ (void) inb_p(port + UART_MSR); ++ outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); ++ outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ ++ outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ ++ outb_px(port + UART_MCR, info->MCR); ++ ++ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ ++ outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ ++ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ ++ if (!ints_disabled) { ++ intprintk(("KGDB: Sending %d to port %x offset %d\n", ++ gdb_async_info->IER, ++ (int) gdb_async_info->port, UART_IER)); ++ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); ++ } ++ return; ++} ++ ++/* ++ * getDebugChar ++ * ++ * This is a GDB stub routine. It waits for a character from the ++ * serial interface and then returns it. If there is no serial ++ * interface connection then it returns a bogus value which will ++ * almost certainly cause the system to hang. In the ++ */ ++int kgdb_in_isr = 0; ++int kgdb_in_lsr = 0; ++extern spinlock_t kgdb_spinlock; ++ ++/* Caller takes needed protections */ ++ ++int ++getDebugChar(void) ++{ ++ volatile int chr, dum, time, end_time; ++ ++ dbprintk(("getDebugChar(port %x): ", gdb_async_info->port)); ++ ++ if (gdb_async_info == NULL) { ++ gdb_hook_interrupt(&local_info, 0); ++ } ++ /* ++ * This trick says if we wait a very long time and get ++ * no char, return the -1 and let the upper level deal ++ * with it. ++ */ ++ rdtsc(dum, time); ++ end_time = time + 2; ++ while (((chr = read_char(gdb_async_info)) == -1) && ++ (end_time - time) > 0) { ++ rdtsc(dum, time); ++ }; ++ /* ++ * This covers our butts if some other code messes with ++ * our uart, hay, it happens :o) ++ */ ++ if (chr == -1) ++ program_uart(gdb_async_info); ++ ++ dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); ++ return (chr); ++ ++} /* getDebugChar */ ++ ++static int count = 3; ++static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; ++ ++static int __init ++kgdb_enable_ints(void) ++{ ++ if (gdb_async_info == NULL) { ++ gdb_hook_interrupt(&local_info, 1); ++ } ++ ok_to_enable_ints = 1; ++ kgdb_enable_ints_now(); ++#ifdef CONFIG_KGDB_USER_CONSOLE ++ kgdb_console_finit(); ++#endif ++ return 0; ++} ++ ++#ifdef CONFIG_SERIAL_8250 ++void shutdown_for_kgdb(struct async_struct *gdb_async_info); ++#endif ++ ++#ifdef CONFIG_DISCONTIGMEM ++static inline int kgdb_mem_init_done(void) ++{ ++ return highmem_start_page != NULL; ++} ++#else ++static inline int kgdb_mem_init_done(void) ++{ ++ return max_mapnr != 0; ++} ++#endif ++ ++static void ++kgdb_enable_ints_now(void) ++{ ++ if (!spin_trylock(&one_at_atime)) ++ return; ++ if (!ints_disabled) ++ goto exit; ++ if (kgdb_mem_init_done() && ++ ints_disabled) { /* don't try till mem init */ ++#ifdef CONFIG_SERIAL_8250 ++ /* ++ * The ifdef here allows the system to be configured ++ * without the serial driver. ++ * Don't make it a module, however, it will steal the port ++ */ ++ shutdown_for_kgdb(gdb_async_info); ++#endif ++ ints_disabled = request_irq(gdb_async_info->state->irq, ++ gdb_interrupt, ++ IRQ_T(gdb_async_info), ++ "KGDB-stub", NULL); ++ intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); ++ } ++ if (!ints_disabled) { ++ intprintk(("KGDB: Sending %d to port %x offset %d\n", ++ gdb_async_info->IER, ++ (int) gdb_async_info->port, UART_IER)); ++ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); ++ } ++ exit: ++ spin_unlock(&one_at_atime); ++} ++ ++/* ++ * putDebugChar ++ * ++ * This is a GDB stub routine. It waits until the interface is ready ++ * to transmit a char and then sends it. If there is no serial ++ * interface connection then it simply returns to its caller, having ++ * pretended to send the char. Caller takes needed protections. ++ */ ++void ++putDebugChar(int chr) ++{ ++ dbprintk(("putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", ++ gdb_async_info->port, ++ chr, ++ chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); ++ ++ if (gdb_async_info == NULL) { ++ gdb_hook_interrupt(&local_info, 0); ++ } ++ ++ write_char(gdb_async_info, chr); /* this routine will wait */ ++ count = (chr == '#') ? 0 : count + 1; ++ if ((count == 2)) { /* try to enable after */ ++ if (ints_disabled & ok_to_enable_ints) ++ kgdb_enable_ints_now(); /* try to enable after */ ++ ++ /* We do this a lot because, well we really want to get these ++ * interrupts. The serial driver will clear these bits when it ++ * initializes the chip. Every thing else it does is ok, ++ * but this. ++ */ ++ if (!ints_disabled) { ++ outb_px(gdb_async_info->port + UART_IER, ++ gdb_async_info->IER); ++ } ++ } ++ ++} /* putDebugChar */ ++ ++module_init(kgdb_enable_ints); +diff -puN arch/i386/lib/Makefile~kgdb-ga arch/i386/lib/Makefile +--- 25/arch/i386/lib/Makefile~kgdb-ga 2004-10-21 14:54:15.265602768 -0700 ++++ 25-akpm/arch/i386/lib/Makefile 2004-10-21 14:54:15.313595472 -0700 +@@ -8,3 +8,4 @@ lib-y = checksum.o delay.o usercopy.o ge + + lib-$(CONFIG_X86_USE_3DNOW) += mmx.o + lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o ++lib-$(CONFIG_KGDB) += kgdb_serial.o +diff -puN arch/i386/Makefile~kgdb-ga arch/i386/Makefile +--- 25/arch/i386/Makefile~kgdb-ga 2004-10-21 14:54:15.266602616 -0700 ++++ 25-akpm/arch/i386/Makefile 2004-10-21 14:54:15.314595320 -0700 +@@ -99,6 +99,9 @@ core-$(CONFIG_X86_ES7000) := arch/i386/m + # default subarch .h files + mflags-y += -Iinclude/asm-i386/mach-default + ++mflags-$(CONFIG_KGDB) += -gdwarf-2 ++mflags-$(CONFIG_KGDB_MORE) += $(shell echo $(CONFIG_KGDB_OPTIONS) | sed -e 's/"//g') ++ + head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o + + libs-y += arch/i386/lib/ +diff -puN arch/i386/mm/fault.c~kgdb-ga arch/i386/mm/fault.c +--- 25/arch/i386/mm/fault.c~kgdb-ga 2004-10-21 14:54:15.268602312 -0700 ++++ 25-akpm/arch/i386/mm/fault.c 2004-10-21 14:54:15.314595320 -0700 +@@ -430,6 +430,12 @@ no_context: + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ ++#ifdef CONFIG_KGDB ++ if (!user_mode(regs)){ ++ kgdb_handle_exception(14,SIGBUS, error_code, regs); ++ return; ++ } ++#endif + + bust_spinlocks(1); + +diff -puN arch/x86_64/boot/compressed/head.S~kgdb-ga arch/x86_64/boot/compressed/head.S +--- 25/arch/x86_64/boot/compressed/head.S~kgdb-ga 2004-10-21 14:54:15.269602160 -0700 ++++ 25-akpm/arch/x86_64/boot/compressed/head.S 2004-10-21 14:54:15.315595168 -0700 +@@ -26,6 +26,7 @@ + .code32 + .text + ++#define IN_BOOTLOADER + #include + #include + +diff -puN arch/x86_64/boot/compressed/misc.c~kgdb-ga arch/x86_64/boot/compressed/misc.c +--- 25/arch/x86_64/boot/compressed/misc.c~kgdb-ga 2004-10-21 14:54:15.270602008 -0700 ++++ 25-akpm/arch/x86_64/boot/compressed/misc.c 2004-10-21 14:54:15.315595168 -0700 +@@ -9,6 +9,7 @@ + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ + ++#define IN_BOOTLOADER + #include "miscsetup.h" + #include + +diff -puN /dev/null Documentation/i386/kgdb/andthen +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/andthen 2004-10-21 14:54:15.316595016 -0700 +@@ -0,0 +1,100 @@ ++ ++define set_andthen ++ set var $thp=0 ++ set var $thp=(struct kgdb_and_then_struct *)&kgdb_data[0] ++ set var $at_size = (sizeof kgdb_data)/(sizeof *$thp) ++ set var $at_oc=kgdb_and_then_count ++ set var $at_cc=$at_oc ++end ++ ++define andthen_next ++ set var $at_cc=$arg0 ++end ++ ++define andthen ++ andthen_set_edge ++ if ($at_cc >= $at_oc) ++ printf "Outside window. Window size is %d\n",($at_oc-$at_low) ++ else ++ printf "%d: ",$at_cc ++ output *($thp+($at_cc++ % $at_size )) ++ printf "\n" ++ end ++end ++define andthen_set_edge ++ set var $at_oc=kgdb_and_then_count ++ set var $at_low = $at_oc - $at_size ++ if ($at_low < 0 ) ++ set var $at_low = 0 ++ end ++ if (( $at_cc > $at_oc) || ($at_cc < $at_low)) ++ printf "Count outside of window, setting count to " ++ if ($at_cc >= $at_oc) ++ set var $at_cc = $at_oc ++ else ++ set var $at_cc = $at_low ++ end ++ printf "%d\n",$at_cc ++ end ++end ++ ++define beforethat ++ andthen_set_edge ++ if ($at_cc <= $at_low) ++ printf "Outside window. Window size is %d\n",($at_oc-$at_low) ++ else ++ printf "%d: ",$at_cc-1 ++ output *($thp+(--$at_cc % $at_size )) ++ printf "\n" ++ end ++end ++ ++document andthen_next ++ andthen_next ++ . sets the number of the event to display next. If this event ++ . is not in the event pool, either andthen or beforethat will ++ . correct it to the nearest event pool edge. The event pool ++ . ends at the last event recorded and begins ++ . prior to that. If beforethat is used next, it will display ++ . event -1. ++. ++ andthen commands are: set_andthen, andthen_next, andthen and beforethat ++end ++ ++ ++document andthen ++ andthen ++. displays the next event in the list. sets up to display ++. the oldest saved event first. ++. (optional) count of the event to display. ++. note the number of events saved is specified at configure time. ++. if events are saved between calls to andthen the index will change ++. but the displayed event will be the next one (unless the event buffer ++. is overrun). ++. ++. andthen commands are: set_andthen, andthen_next, andthen and beforethat ++end ++ ++document set_andthen ++ set_andthen ++. sets up to use the and commands. ++. if you have defined your own struct, use the above and ++. then enter the following: ++. p $thp=(struct kgdb_and_then_structX *)&kgdb_data[0] ++. where is the name of your structure. ++. ++. andthen commands are: set_andthen, andthen_next, andthen and beforethat ++end ++ ++document beforethat ++ beforethat ++. displays the next prior event in the list. sets up to ++. display the last occuring event first. ++. ++. note the number of events saved is specified at configure time. ++. if events are saved between calls to beforethat the index will change ++. but the displayed event will be the next one (unless the event buffer ++. is overrun). ++. ++. andthen commands are: set_andthen, andthen_next, andthen and beforethat ++end +diff -puN /dev/null Documentation/i386/kgdb/debug-nmi.txt +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/debug-nmi.txt 2004-10-21 14:54:15.316595016 -0700 +@@ -0,0 +1,37 @@ ++Subject: Debugging with NMI ++Date: Mon, 12 Jul 1999 11:28:31 -0500 ++From: David Grothe ++Organization: Gcom, Inc ++To: David Grothe ++ ++Kernel hackers: ++ ++Maybe this is old hat, but it is new to me -- ++ ++On an ISA bus machine, if you short out the A1 and B1 pins of an ISA ++slot you will generate an NMI to the CPU. This interrupts even a ++machine that is hung in a loop with interrupts disabled. Used in ++conjunction with kgdb < ++ftp://ftp.gcom.com/pub/linux/src/kgdb-2.3.35/kgdb-2.3.35.tgz > you can ++gain debugger control of a machine that is hung in the kernel! Even ++without kgdb the kernel will print a stack trace so you can find out ++where it was hung. ++ ++The A1/B1 pins are directly opposite one another and the farthest pins ++towards the bracket end of the ISA bus socket. You can stick a paper ++clip or multi-meter probe between them to short them out. ++ ++I had a spare ISA bus to PC104 bus adapter around. The PC104 end of the ++board consists of two rows of wire wrap pins. So I wired a push button ++between the A1/B1 pins and now have an ISA board that I can stick into ++any ISA bus slot for debugger entry. ++ ++Microsoft has a circuit diagram of a PCI card at ++http://www.microsoft.com/hwdev/DEBUGGING/DMPSW.HTM. If you want to ++build one you will have to mail them and ask for the PAL equations. ++Nobody makes one comercially. ++ ++[THIS TIP COMES WITH NO WARRANTY WHATSOEVER. It works for me, but if ++your machine catches fire, it is your problem, not mine.] ++ ++-- Dave (the kgdb guy) +diff -puN /dev/null Documentation/i386/kgdb/gdb-globals.txt +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/gdb-globals.txt 2004-10-21 14:54:15.317594864 -0700 +@@ -0,0 +1,71 @@ ++Sender: akale@veritas.com ++Date: Fri, 23 Jun 2000 19:26:35 +0530 ++From: "Amit S. Kale" ++Organization: Veritas Software (India) ++To: Dave Grothe , linux-kernel@vger.rutgers.edu ++CC: David Milburn , ++ "Edouard G. Parmelan" , ++ ezannoni@cygnus.com, Keith Owens ++Subject: Re: Module debugging using kgdb ++ ++Dave Grothe wrote: ++> ++> Amit: ++> ++> There is a 2.4.0 version of kgdb on our ftp site: ++> ftp://ftp.gcom.com/pub/linux/src/kgdb. I mirrored your version of gdb ++> and loadmodule.sh there. ++> ++> Have a look at the README file and see if I go it right. If not, send ++> me some corrections and I will update it. ++> ++> Does your version of gdb solve the global variable problem? ++ ++Yes. ++Thanks to Elena Zanoni, gdb (developement version) can now calculate ++correctly addresses of dynamically loaded object files. I have not been ++following gdb developement for sometime and am not sure when symbol ++address calculation fix is going to appear in a gdb stable version. ++ ++Elena, any idea when the fix will make it to a prebuilt gdb from a ++redhat release? ++ ++For the time being I have built a gdb developement version. It can be ++used for module debugging with loadmodule.sh script. ++ ++The problem with calculating of module addresses with previous versions ++of gdb was as follows: ++gdb did not use base address of a section while calculating address of ++a symbol in the section in an object file loaded via 'add-symbol-file'. ++It used address of .text segment instead. Due to this addresses of ++symbols in .data, .bss etc. (e.g. global variables) were calculated incorrectly. ++ ++Above mentioned fix allow gdb to use base address of a segment while ++calculating address of a symbol in it. It adds a parameter '-s' to ++'add-symbol-file' command for specifying base address of a segment. ++ ++loadmodule.sh script works as follows. ++ ++1. Copy a module file to target machine. ++2. Load the module on the target machine using insmod with -m parameter. ++insmod produces a module load map which contains base addresses of all ++sections in the module and addresses of symbols in the module file. ++3. Find all sections and their base addresses in the module from ++the module map. ++4. Generate a script that loads the module file. The script uses ++'add-symbol-file' and specifies address of text segment followed by ++addresses of all segments in the module. ++ ++Here is an example gdb script produced by loadmodule.sh script. ++ ++add-symbol-file foo 0xd082c060 -s .text.lock 0xd08cbfb5 ++-s .fixup 0xd08cfbdf -s .rodata 0xd08cfde0 -s __ex_table 0xd08e3b38 ++-s .data 0xd08e3d00 -s .bss 0xd08ec8c0 -s __ksymtab 0xd08ee838 ++ ++With this command gdb can calculate addresses of symbols in ANY segment ++in a module file. ++ ++Regards. ++-- ++Amit Kale ++Veritas Software ( http://www.veritas.com ) +diff -puN /dev/null Documentation/i386/kgdb/gdbinit +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/gdbinit 2004-10-21 14:54:15.317594864 -0700 +@@ -0,0 +1,14 @@ ++shell echo -e "\003" >/dev/ttyS0 ++set remotebaud 38400 ++target remote /dev/ttyS0 ++define si ++stepi ++printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx ++printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp ++x/i $eip ++end ++define ni ++nexti ++printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx ++printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp ++x/i $eip +diff -puN /dev/null Documentation/i386/kgdb/gdbinit.hw +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/gdbinit.hw 2004-10-21 14:54:15.318594712 -0700 +@@ -0,0 +1,117 @@ ++ ++#Using ia-32 hardware breakpoints. ++# ++#4 hardware breakpoints are available in ia-32 processors. These breakpoints ++#do not need code modification. They are set using debug registers. ++# ++#Each hardware breakpoint can be of one of the ++#three types: execution, write, access. ++#1. An Execution breakpoint is triggered when code at the breakpoint address is ++#executed. ++#2. A write breakpoint ( aka watchpoints ) is triggered when memory location ++#at the breakpoint address is written. ++#3. An access breakpoint is triggered when memory location at the breakpoint ++#address is either read or written. ++# ++#As hardware breakpoints are available in limited number, use software ++#breakpoints ( br command in gdb ) instead of execution hardware breakpoints. ++# ++#Length of an access or a write breakpoint defines length of the datatype to ++#be watched. Length is 1 for char, 2 short , 3 int. ++# ++#For placing execution, write and access breakpoints, use commands ++#hwebrk, hwwbrk, hwabrk ++#To remove a breakpoint use hwrmbrk command. ++# ++#These commands take following types of arguments. For arguments associated ++#with each command, use help command. ++#1. breakpointno: 0 to 3 ++#2. length: 1 to 3 ++#3. address: Memory location in hex ( without 0x ) e.g c015e9bc ++# ++#Use the command exinfo to find which hardware breakpoint occured. ++ ++#hwebrk breakpointno address ++define hwebrk ++ maintenance packet Y$arg0,0,0,$arg1 ++end ++document hwebrk ++ hwebrk
++ Places a hardware execution breakpoint ++ = 0 - 3 ++
= Hex digits without leading "0x". ++end ++ ++#hwwbrk breakpointno length address ++define hwwbrk ++ maintenance packet Y$arg0,1,$arg1,$arg2 ++end ++document hwwbrk ++ hwwbrk
++ Places a hardware write breakpoint ++ = 0 - 3 ++ = 1 (1 byte), 2 (2 byte), 3 (4 byte) ++
= Hex digits without leading "0x". ++end ++ ++#hwabrk breakpointno length address ++define hwabrk ++ maintenance packet Y$arg0,1,$arg1,$arg2 ++end ++document hwabrk ++ hwabrk
++ Places a hardware access breakpoint ++ = 0 - 3 ++ = 1 (1 byte), 2 (2 byte), 3 (4 byte) ++
= Hex digits without leading "0x". ++end ++ ++#hwrmbrk breakpointno ++define hwrmbrk ++ maintenance packet y$arg0 ++end ++document hwrmbrk ++ hwrmbrk ++ = 0 - 3 ++ Removes a hardware breakpoint ++end ++ ++define reboot ++ maintenance packet r ++end ++#exinfo ++define exinfo ++ maintenance packet qE ++end ++document exinfo ++ exinfo ++ Gives information about a breakpoint. ++end ++define get_th ++ p $th=(struct thread_info *)((int)$esp & ~8191) ++end ++document get_th ++ get_tu ++ Gets and prints the current thread_info pointer, Defines th to be it. ++end ++define get_cu ++ p $cu=((struct thread_info *)((int)$esp & ~8191))->task ++end ++document get_cu ++ get_cu ++ Gets and print the "current" value. Defines $cu to be it. ++end ++define int_off ++ set var $flags=$eflags ++ set $eflags=$eflags&~0x200 ++ end ++define int_on ++ set var $eflags|=$flags&0x200 ++ end ++document int_off ++ saves the current interrupt state and clears the processor interrupt ++ flag. Use int_on to restore the saved flag. ++end ++document int_on ++ Restores the interrupt flag saved by int_off. ++end +diff -puN /dev/null Documentation/i386/kgdb/gdbinit-modules +--- /dev/null Thu Apr 11 07:25:15 2002 ++++ 25-akpm/Documentation/i386/kgdb/gdbinit-modules Fri Jan 13 17:54:25 2006 +@@ -0,0 +1,149 @@ ++# ++# Usefull GDB user-command to debug Linux Kernel Modules with gdbstub. ++# ++# This don't work for Linux-2.0 or older. ++# ++# Author Edouard G. Parmelan ++# ++# ++# Fri Apr 30 20:33:29 CEST 1999 ++# First public release. ++# ++# Major cleanup after experiment Linux-2.0 kernel without success. ++# Symbols of a module are not in the correct order, I can't explain ++# why :( ++# ++# Fri Mar 19 15:41:40 CET 1999 ++# Initial version. ++# ++# Thu Jan 6 16:29:03 CST 2000 ++# A little fixing by Dave Grothe ++# ++# Mon Jun 19 09:33:13 CDT 2000 ++# Alignment changes from Edouard Parmelan ++# ++# The basic idea is to find where insmod load the module and inform ++# GDB to load the symbol table of the module with the GDB command ++# ``add-symbol-file
''. ++# ++# The Linux kernel holds the list of all loaded modules in module_list, ++# this list end with &kernel_module (exactly with module->next == NULL, ++# but the last module is not a real module). ++# ++# Insmod allocates the struct module before the object file. Since ++# Linux-2.1, this structure contain his size. The real address of ++# the object file is then (char*)module + module->size_of_struct. ++# ++# You can use three user functions ``mod-list'', ``mod-print-symbols'' ++# and ``add-module-symbols''. ++# ++# mod-list list all loaded modules with the format: ++# ++# ++# As soon as you have found the address of your module, you can ++# print its exported symbols (mod-print-symbols) or inform GDB to add ++# symbols from your module file (mod-add-symbols). ++# ++# The argument that you give to mod-print-symbols or mod-add-symbols ++# is the from the mod-list command. ++# ++# When using the mod-add-symbols command you must also give the full ++# pathname of the modules object code file. ++# ++# The command mod-add-lis is an example of how to make this easier. ++# You can edit this macro to contain the path name of your own ++# favorite module and then use it as a shorthand to load it. You ++# still need the module-address, however. ++# ++# The internal function ``mod-validate'' set the GDB variable $mod ++# as a ``struct module*'' if the kernel known the module otherwise ++# $mod is set to NULL. This ensure to not add symbols for a wrong ++# address. ++# ++# ++# Sat Feb 12 20:05:47 CET 2005 ++# ++# Adapted to the 2.6.* module data structure. ++# (Getting miffed at gdb for not having "offsetof" in the process :-/ ) ++# ++# Autogenerate add-symbol-file statements from the module list instead ++# of relying on a no-longer-working loadmodule.sh program. ++# ++# Matthias Urlichs ++# ++# ++# Have a nice hacking day ! ++# ++# ++define mod-list ++ set $lmod = modules->next ++ # This is a circular data structure ++ while $lmod != &modules ++ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) ++ printf "%p\t%s\n", $mod, $mod->name ++ set $lmod = $lmod->next ++ end ++end ++document mod-list ++mod-list ++List all modules in the form: ++Use the as the argument for the other ++mod-commands: mod-print-symbols, mod-add-symbols. ++end ++ ++define mod-list-syms ++ set $lmod = modules->next ++ # This is a circular data structure ++ while $lmod != &modules ++ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) ++ printf "add-symbol-file %s.ko %p\n", $mod->name, $mod->module_core ++ set $lmod = $lmod->next ++ end ++end ++document mod-list-syms ++mod-list-syms ++List all modules in the form: add-symbol-file ++for adding modules' symbol tables without loadmodule.sh. ++end ++ ++define mod-validate ++ set $lmod = modules->next ++ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) ++ while ($lmod != &modules) && ($mod != $arg0) ++ set $lmod = $lmod->next ++ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) ++ end ++ if $lmod == &modules ++ set $mod = 0 ++ printf "%p is not a module\n", $arg0 ++ end ++end ++document mod-validate ++mod-validate ++Internal user-command used to validate the module parameter. ++If is a real loaded module, set $mod to it, otherwise set $mod ++to 0. ++end ++ ++define mod-print-symbols ++ mod-validate $arg0 ++ if $mod != 0 ++ set $i = 0 ++ while $i < $mod->num_syms ++ set $sym = $mod->syms[$i] ++ printf "%p\t%s\n", $sym->value, $sym->name ++ set $i = $i + 1 ++ end ++ set $i = 0 ++ while $i < $mod->num_gpl_syms ++ set $sym = $mod->gpl_syms[$i] ++ printf "%p\t%s\n", $sym->value, $sym->name ++ set $i = $i + 1 ++ end ++ end ++end ++document mod-print-symbols ++mod-print-symbols ++Print all exported symbols of the module. See mod-list ++end ++ +diff -puN /dev/null Documentation/i386/kgdb/kgdb.txt +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/kgdb.txt 2004-10-21 14:54:15.324593800 -0700 +@@ -0,0 +1,775 @@ ++Last edit: <20030806.1637.12> ++This file has information specific to the i386 kgdb option. Other ++platforms with the kgdb option may behave in a similar fashion. ++ ++New features: ++============ ++20030806.1557.37 ++This version was made against the 2.6.0-test2 kernel. We have made the ++following changes: ++ ++- The getthread() code in the stub calls find_task_by_pid(). It fails ++ if we are early in the bring up such that the pid arrays have yet to ++ be allocated. We have added a line to kernel/pid.c to make ++ "kgdb_pid_init_done" true once the arrays are allocated. This way the ++ getthread() code knows not to call. This is only used by the thread ++ debugging stuff and threads will not yet exist at this point in the ++ boot. ++ ++- For some reason, gdb was not asking for a new thread list when the ++ "info thread" command was given. We changed to the newer version of ++ the thread info command and gdb now seems to ask when needed. Result, ++ we now get all threads in the thread list. ++ ++- We now respond to the ThreadExtraInfo request from gdb with the thread ++ name from task_struct .comm. This then appears in the thread list. ++ Thoughts on additional options for this are welcome. Things such as ++ "has BKL" and "Preempted" come to mind. I think we could have a flag ++ word that could enable different bits of info here. ++ ++- We now honor, sort of, the C and S commands. These are continue and ++ single set after delivering a signal. We ignore the signal and do the ++ requested action. This only happens when we told gdb that a signal ++ was the reason for entry, which is only done on memory faults. The ++ result is that you can now continue into the Oops. ++ ++- We changed the -g to -gdwarf-2. This seems to be the same as -ggdb, ++ but it is more exact on what language to use. ++ ++- We added two dwarf2 include files and a bit of code at the end of ++ entry.S. This does not yet work, so it is disabled. Still we want to ++ keep track of the code and "maybe" someone out there can fix it. ++ ++- Randy Dunlap sent some fix ups for this file which are now merged. ++ ++- Hugh Dickins sent a fix to a bit of code in traps.c that prevents a ++ compiler warning if CONFIG_KGDB is off (now who would do that :). ++ ++- Andrew Morton sent a fix for the serial driver which is now merged. ++ ++- Andrew also sent a change to the stub around the cpu managment code ++ which is also merged. ++ ++- Andrew also sent a patch to make "f" as well as "g" work as SysRq ++ commands to enter kgdb, merged. ++ ++- If CONFIG_KGDB and CONFIG_DEBUG_SPINLOCKS are both set we added a ++ "who" field to the spinlock data struct. This is filled with ++ "current" when ever the spinlock suceeds. Useful if you want to know ++ who has the lock. ++ ++_ And last, but not least, we fixed the "get_cu" macro to properly get ++ the current value of "current". ++ ++New features: ++============ ++20030505.1827.27 ++We are starting to align with the sourceforge version, at least in ++commands. To this end, the boot command string to start kgdb at ++boot time has been changed from "kgdb" to "gdb". ++ ++Andrew Morton sent a couple of patches which are now included as follows: ++1.) We now return a flag to the interrupt handler. ++2.) We no longer use smp_num_cpus (a conflict with the lock meter). ++3.) And from William Lee Irwin III code to make ++ sure high-mem is set up before we attempt to register our interrupt ++ handler. ++We now include asm/kgdb.h from config.h so you will most likely never ++have to include it. It also 'NULLS' the kgdb macros you might have in ++your code when CONFIG_KGDB is not defined. This allows you to just ++turn off CONFIG_KGDB to turn off all the kgdb_ts() calls and such. ++This include is conditioned on the machine being an x86 so as to not ++mess with other archs. ++ ++20020801.1129.03 ++This is currently the version for the 2.4.18 (and beyond?) kernel. ++ ++We have several new "features" beginning with this version: ++ ++1.) Kgdb now syncs the "other" CPUs with a cross-CPU NMI. No more ++ waiting and it will pull that guy out of an IRQ off spin lock :) ++ ++2.) We doctored up the code that tells where a task is waiting and ++ included it so that the "info thread" command will show a bit more ++ than "schedule()". Try it... ++ ++3.) Added the ability to call a function from gdb. All the standard gdb ++ issues apply, i.e. if you hit a breakpoint in the function, you are ++ not allowed to call another (gdb limitation, not kgdb). To help ++ this capability we added a memory allocation function. Gdb does not ++ return this memory (it is used for strings that you pass to that function ++ you are calling from gdb) so we fixed up a way to allow you to ++ manually return the memory (see below). ++ ++4.) Kgdb time stamps (kgdb_ts()) are enhanced to expand what was the ++ interrupt flag to now also include the preemption count and the ++ "in_interrupt" info. The flag is now called "with_pif" to indicate ++ the order, preempt_count, in_interrupt, flag. The preempt_count is ++ shifted left by 4 bits so you can read the count in hex by dropping ++ the low order digit. In_interrupt is in bit 1, and the flag is in ++ bit 0. ++ ++5.) The command: "p kgdb_info" is now expanded and prints something ++ like: ++(gdb) p kgdb_info ++$2 = {used_malloc = 0, called_from = 0xc0107506, entry_tsc = 67468627259, ++ errcode = 0, vector = 3, print_debug_info = 0, hold_on_sstep = 1, ++ cpus_waiting = {{task = 0xc027a000, pid = 32768, hold = 0, ++ regs = 0xc027bf84}, {task = 0x0, pid = 0, hold = 0, regs = 0x0}}} ++ ++ Things to note here: a.) used_malloc is the amount of memory that ++ has been malloc'ed to do calls from gdb. You can reclaim this ++ memory like this: "p kgdb_info.used_malloc=0" Cool, huh? b.) ++ cpus_waiting is now "sized" by the number of CPUs you enter at ++ configure time in the kgdb configure section. This is NOT used ++ anywhere else in the system, but it is "nice" here. c.) The task's ++ "pid" is now in the structure. This is the pid you will need to use ++ to decode to the thread id to get gdb to look at that thread. ++ Remember that the "info thread" command prints a list of threads ++ wherein it numbers each thread with its reference number followed ++ by the thread's pid. Note that the per-CPU idle threads actually ++ have pids of 0 (yes, there is more than one pid 0 in an SMP system). ++ To avoid confusion, kgdb numbers these threads with numbers beyond ++ the MAX_PID. That is why you see 32768 and above. ++ ++6.) A subtle change, we now provide the complete register set for tasks ++ that are active on the other CPUs. This allows better trace back on ++ those tasks. ++ ++ And, let's mention what we could not fix. Back-trace from all but the ++ thread that we trapped will, most likely, have a bogus entry in it. ++ The problem is that gdb does not recognize the entry code for ++ functions that use "current" near (at all?) the entry. The compiler ++ is putting the "current" decode as the first two instructions of the ++ function where gdb expects to find %ebp changing code. Back trace ++ also has trouble with interrupt frames. I am talking with Daniel ++ Jacobowitz about some way to fix this, but don't hold your breath. ++ ++20011220.0050.35 ++Major enhancement with this version is the ability to hold one or more ++CPUs in an SMP system while allowing the others to continue. Also, by ++default only the current CPU is enabled on single-step commands (please ++note that gdb issues single-step commands at times other than when you ++use the si command). ++ ++Another change is to collect some useful information in ++a global structure called "kgdb_info". You should be able to just: ++ ++p kgdb_info ++ ++although I have seen cases where the first time this is done gdb just ++prints the first member but prints the whole structure if you then enter ++CR (carriage return or enter). This also works: ++ ++p *&kgdb_info ++ ++Here is a sample: ++(gdb) p kgdb_info ++$4 = {called_from = 0xc010732c, entry_tsc = 32804123790856, errcode = 0, ++ vector = 3, print_debug_info = 0} ++ ++"Called_from" is the return address from the current entry into kgdb. ++Sometimes it is useful to know why you are in kgdb, for example, was ++it an NMI or a real breakpoint? The simple way to interrogate this ++return address is: ++ ++l *0xc010732c ++ ++which will print the surrounding few lines of source code. ++ ++"Entry_tsc" is the CPU TSC on entry to kgdb (useful to compare to the ++kgdb_ts entries). ++ ++"errcode" and "vector" are other entry parameters which may be helpful on ++some traps. ++ ++"print_debug_info" is the internal debugging kgdb print enable flag. Yes, ++you can modify it. ++ ++In SMP systems kgdb_info also includes the "cpus_waiting" structure and ++"hold_on_step": ++ ++(gdb) p kgdb_info ++$7 = {called_from = 0xc0112739, entry_tsc = 1034936624074, errcode = 0, ++ vector = 2, print_debug_info = 0, hold_on_sstep = 1, cpus_waiting = {{ ++ task = 0x0, hold = 0, regs = 0x0}, {task = 0xc71b8000, hold = 0, ++ regs = 0xc71b9f70}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, ++ hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, ++ hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, ++ hold = 0, regs = 0x0}}} ++ ++"Cpus_waiting" has an entry for each CPU other than the current one that ++has been stopped. Each entry contains the task_struct address for that ++CPU, the address of the regs for that task and a hold flag. All these ++have the proper typing so that, for example: ++ ++p *kgdb_info.cpus_waiting[1].regs ++ ++will print the registers for CPU 1. ++ ++"Hold_on_sstep" is a new feature with this version and comes up set or ++true. What this means is that whenever kgdb is asked to single-step all ++other CPUs are held (i.e. not allowed to execute). The flag applies to ++all but the current CPU and, again, can be changed: ++ ++p kgdb_info.hold_on_sstep=0 ++ ++restores the old behavior of letting all CPUs run during single-stepping. ++ ++Likewise, each CPU has a "hold" flag, which if set, locks that CPU out ++of execution. Note that this has some risk in cases where the CPUs need ++to communicate with each other. If kgdb finds no CPU available on exit, ++it will push a message thru gdb and stay in kgdb. Note that it is legal ++to hold the current CPU as long as at least one CPU can execute. ++ ++20010621.1117.09 ++This version implements an event queue. Events are signaled by calling ++a function in the kgdb stub and may be examined from gdb. See EVENTS ++below for details. This version also tightens up the interrupt and SMP ++handling to not allow interrupts on the way to kgdb from a breakpoint ++trap. It is fine to allow these interrupts for user code, but not ++system debugging. ++ ++Version ++======= ++ ++This version of the kgdb package was developed and tested on ++kernel version 2.4.16. It will not install on any earlier kernels. ++It is possible that it will continue to work on later versions ++of 2.4 and then versions of 2.5 (I hope). ++ ++ ++Debugging Setup ++=============== ++ ++Designate one machine as the "development" machine. This is the ++machine on which you run your compiles and which has your source ++code for the kernel. Designate a second machine as the "target" ++machine. This is the machine that will run your experimental ++kernel. ++ ++The two machines will be connected together via a serial line out ++one or the other of the COM ports of the PC. You will need the ++appropriate modem eliminator (null modem) cable(s) for this. ++ ++Decide on which tty port you want the machines to communicate, then ++connect them up back-to-back using the null modem cable. COM1 is ++/dev/ttyS0 and COM2 is /dev/ttyS1. You should test this connection ++with the two machines prior to trying to debug a kernel. Once you ++have it working, on the TARGET machine, enter: ++ ++setserial /dev/ttyS0 (or what ever tty you are using) ++ ++and record the port address and the IRQ number. ++ ++On the DEVELOPMENT machine you need to apply the patch for the kgdb ++hooks. You have probably already done that if you are reading this ++file. ++ ++On your DEVELOPMENT machine, go to your kernel source directory and do ++"make Xconfig" where X is one of "x", "menu", or "". If you are ++configuring in the standard serial driver, it must not be a module. ++Either yes or no is ok, but making the serial driver a module means it ++will initialize after kgdb has set up the UART interrupt code and may ++cause a failure of the control-C option discussed below. The configure ++question for the serial driver is under the "Character devices" heading ++and is: ++ ++"Standard/generic (8250/16550 and compatible UARTs) serial support" ++ ++Go down to the kernel debugging menu item and open it up. Enable the ++kernel kgdb stub code by selecting that item. You can also choose to ++turn on the "-ggdb -O1" compile options. The -ggdb causes the compiler ++to put more debug info (like local symbols) in the object file. On the ++i386 -g and -ggdb are the same so this option just reduces to "O1". The ++-O1 reduces the optimization level. This may be helpful in some cases, ++be aware, however, that this may also mask the problem you are looking ++for. ++ ++The baud rate. Default is 115200. What ever you choose be sure that ++the host machine is set to the same speed. I recommend the default. ++ ++The port. This is the I/O address of the serial UART that you should ++have gotten using setserial as described above. The standard COM1 port ++(3f8) using IRQ 4 is default. COM2 is 2f8 which by convention uses IRQ ++3. ++ ++The port IRQ (see above). ++ ++Stack overflow test. This option makes a minor change in the trap, ++system call and interrupt code to detect stack overflow and transfer ++control to kgdb if it happens. (Some platforms have this in the ++baseline code, but the i386 does not.) ++ ++You can also configure the system to recognize the boot option ++"console=kgdb" which if given will cause all console output during ++booting to be put thru gdb as well as other consoles. This option ++requires that gdb and kgdb be connected prior to sending console output ++so, if they are not, a breakpoint is executed to force the connection. ++This will happen before any kernel output (it is going thru gdb, right), ++and will stall the boot until the connection is made. ++ ++You can also configure in a patch to SysRq to enable the kGdb SysRq. ++This request generates a breakpoint. Since the serial port IRQ line is ++set up after any serial drivers, it is possible that this command will ++work when the control-C will not. ++ ++Save and exit the Xconfig program. Then do "make clean" , "make dep" ++and "make bzImage" (or whatever target you want to make). This gets the ++kernel compiled with the "-g" option set -- necessary for debugging. ++ ++You have just built the kernel on your DEVELOPMENT machine that you ++intend to run on your TARGET machine. ++ ++To install this new kernel, use the following installation procedure. ++Remember, you are on the DEVELOPMENT machine patching the kernel source ++for the kernel that you intend to run on the TARGET machine. ++ ++Copy this kernel to your target machine using your usual procedures. I ++usually arrange to copy development: ++/usr/src/linux/arch/i386/boot/bzImage to /vmlinuz on the TARGET machine ++via a LAN based NFS access. That is, I run the cp command on the target ++and copy from the development machine via the LAN. Run Lilo (see "man ++lilo" for details on how to set this up) on the new kernel on the target ++machine so that it will boot! Then boot the kernel on the target ++machine. ++ ++On the DEVELOPMENT machine, create a file called .gdbinit in the ++directory /usr/src/linux. An example .gdbinit file looks like this: ++ ++shell echo -e "\003" >/dev/ttyS0 ++set remotebaud 38400 (or what ever speed you have chosen) ++target remote /dev/ttyS0 ++ ++ ++Change the "echo" and "target" definition so that it specifies the tty ++port that you intend to use. Change the "remotebaud" definition to ++match the data rate that you are going to use for the com line. ++ ++You are now ready to try it out. ++ ++Boot your target machine with "kgdb" in the boot command i.e. something ++like: ++ ++lilo> test kgdb ++ ++or if you also want console output thru gdb: ++ ++lilo> test kgdb console=kgdb ++ ++You should see the lilo message saying it has loaded the kernel and then ++all output stops. The kgdb stub is trying to connect with gdb. Start ++gdb something like this: ++ ++ ++On your DEVELOPMENT machine, cd /usr/src/linux and enter "gdb vmlinux". ++When gdb gets the symbols loaded it will read your .gdbinit file and, if ++everything is working correctly, you should see gdb print out a few ++lines indicating that a breakpoint has been taken. It will actually ++show a line of code in the target kernel inside the kgdb activation ++code. ++ ++The gdb interaction should look something like this: ++ ++ linux-dev:/usr/src/linux# gdb vmlinux ++ GDB is free software and you are welcome to distribute copies of it ++ under certain conditions; type "show copying" to see the conditions. ++ There is absolutely no warranty for GDB; type "show warranty" for details. ++ GDB 4.15.1 (i486-slackware-linux), ++ Copyright 1995 Free Software Foundation, Inc... ++ breakpoint () at i386-stub.c:750 ++ 750 } ++ (gdb) ++ ++You can now use whatever gdb commands you like to set breakpoints. ++Enter "continue" to start your target machine executing again. At this ++point the target system will run at full speed until it encounters ++your breakpoint or gets a segment violation in the kernel, or whatever. ++ ++If you have the kgdb console enabled when you continue, gdb will print ++out all the console messages. ++ ++The above example caused a breakpoint relatively early in the boot ++process. For the i386 kgdb it is possible to code a break instruction ++as the first C-language point in init/main.c, i.e. as the first instruction ++in start_kernel(). This could be done as follows: ++ ++#include ++ breakpoint(); ++ ++This breakpoint() is really a function that sets up the breakpoint and ++single-step hardware trap cells and then executes a breakpoint. Any ++early hard coded breakpoint will need to use this function. Once the ++trap cells are set up they need not be set again, but doing it again ++does not hurt anything, so you don't need to be concerned about which ++breakpoint is hit first. Once the trap cells are set up (and the kernel ++sets them up in due course even if breakpoint() is never called) the ++macro: ++ ++BREAKPOINT; ++ ++will generate an inline breakpoint. This may be more useful as it stops ++the processor at the instruction instead of in a function a step removed ++from the location of interest. In either case must be ++included to define both breakpoint() and BREAKPOINT. ++ ++Triggering kgdbstub at other times ++================================== ++ ++Often you don't need to enter the debugger until much later in the boot ++or even after the machine has been running for some time. Once the ++kernel is booted and interrupts are on, you can force the system to ++enter the debugger by sending a control-C to the debug port. This is ++what the first line of the recommended .gdbinit file does. This allows ++you to start gdb any time after the system is up as well as when the ++system is already at a breakpoint. (In the case where the system is ++already at a breakpoint the control-C is not needed, however, it will ++be ignored by the target so no harm is done. Also note the the echo ++command assumes that the port speed is already set. This will be true ++once gdb has connected, but it is best to set the port speed before you ++run gdb.) ++ ++Another simple way to do this is to put the following file in you ~/bin ++directory: ++ ++#!/bin/bash ++echo -e "\003" > /dev/ttyS0 ++ ++Here, the ttyS0 should be replaced with what ever port you are using. ++The "\003" is control-C. Once you are connected with gdb, you can enter ++control-C at the command prompt. ++ ++An alternative way to get control to the debugger is to enable the kGdb ++SysRq command. Then you would enter Alt-SysRq-g (all three keys at the ++same time, but push them down in the order given). To refresh your ++memory of the available SysRq commands try Alt-SysRq-=. Actually any ++undefined command could replace the "=", but I like to KNOW that what I ++am pushing will never be defined. ++ ++Debugging hints ++=============== ++ ++You can break into the target machine at any time from the development ++machine by typing ^C (see above paragraph). If the target machine has ++interrupts enabled this will stop it in the kernel and enter the ++debugger. ++ ++There is unfortunately no way of breaking into the kernel if it is ++in a loop with interrupts disabled, so if this happens to you then ++you need to place exploratory breakpoints or printk's into the kernel ++to find out where it is looping. The exploratory breakpoints can be ++entered either thru gdb or hard coded into the source. This is very ++handy if you do something like: ++ ++if () BREAKPOINT; ++ ++ ++There is a copy of an e-mail in the Documentation/i386/kgdb/ directory ++(debug-nmi.txt) which describes how to create an NMI on an ISA bus ++machine using a paper clip. I have a sophisticated version of this made ++by wiring a push button switch into a PC104/ISA bus adapter card. The ++adapter card nicely furnishes wire wrap pins for all the ISA bus ++signals. ++ ++When you are done debugging the kernel on the target machine it is a ++good idea to leave it in a running state. This makes reboots faster, ++bypassing the fsck. So do a gdb "continue" as the last gdb command if ++this is possible. To terminate gdb itself on the development machine ++and leave the target machine running, first clear all breakpoints and ++continue, then type ^Z to suspend gdb and then kill it with "kill %1" or ++something similar. ++ ++If gdbstub Does Not Work ++======================== ++ ++If it doesn't work, you will have to troubleshoot it. Do the easy ++things first like double checking your cabling and data rates. You ++might try some non-kernel based programs to see if the back-to-back ++connection works properly. Just something simple like cat /etc/hosts ++>/dev/ttyS0 on one machine and cat /dev/ttyS0 on the other will tell you ++if you can send data from one machine to the other. Make sure it works ++in both directions. There is no point in tearing out your hair in the ++kernel if the line doesn't work. ++ ++All of the real action takes place in the file ++/usr/src/linux/arch/i386/kernel/kgdb_stub.c. That is the code on the target ++machine that interacts with gdb on the development machine. In gdb you can ++turn on a debug switch with the following command: ++ ++ set remotedebug ++ ++This will print out the protocol messages that gdb is exchanging with ++the target machine. ++ ++Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c. This is ++the code that talks to the serial port on the target side. There might ++be a problem there. In particular there is a section of this code that ++tests the UART which will tell you what UART you have if you define ++"PRNT" (just remove "_off" from the #define PRNT_off). To view this ++report you will need to boot the system without any beakpoints. This ++allows the kernel to run to the point where it calls kgdb to set up ++interrupts. At this time kgdb will test the UART and print out the type ++it finds. (You need to wait so that the printks are actually being ++printed. Early in the boot they are cached, waiting for the console to ++be enabled. Also, if kgdb is entered thru a breakpoint it is possible ++to cause a dead lock by calling printk when the console is locked. The ++stub thus avoids doing printks from breakpoints, especially in the ++serial code.) At this time, if the UART fails to do the expected thing, ++kgdb will print out (using printk) information on what failed. (These ++messages will be buried in all the other boot up messages. Look for ++lines that start with "gdb_hook_interrupt:". You may want to use dmesg ++once the system is up to view the log. If this fails or if you still ++don't connect, review your answers for the port address. Use: ++ ++setserial /dev/ttyS0 ++ ++to get the current port and IRQ information. This command will also ++tell you what the system found for the UART type. The stub recognizes ++the following UART types: ++ ++16450, 16550, and 16550A ++ ++If you are really desperate you can use printk debugging in the ++kgdbstub code in the target kernel until you get it working. In particular, ++there is a global variable in /usr/src/linux/arch/i386/kernel/kgdb_stub.c ++named "remote_debug". Compile your kernel with this set to 1, rather ++than 0 and the debug stub will print out lots of stuff as it does ++what it does. Likewise there are debug printks in the kgdb_serial.c ++code that can be turned on with simple changes in the macro defines. ++ ++ ++Debugging Loadable Modules ++========================== ++ ++This technique comes courtesy of Edouard Parmelan ++ ++ ++When you run gdb, enter the command ++ ++source gdbinit-modules ++ ++This will read in a file of gdb macros that was installed in your ++kernel source directory when kgdb was installed. This file implements ++the following commands: ++ ++mod-list ++ Lists the loaded modules in the form ++ ++mod-print-symbols ++ Prints all the symbols in the indicated module. ++ ++mod-add-symbols ++ Loads the symbols from the object file and associates them ++ with the indicated module. ++ ++After you have loaded the module that you want to debug, use the command ++mod-list to find the of your module. Then use that ++address in the mod-add-symbols command to load your module's symbols. ++From that point onward you can debug your module as if it were a part ++of the kernel. ++ ++The file gdbinit-modules also contains a command named mod-add-lis as ++an example of how to construct a command of your own to load your ++favorite module. The idea is to "can" the pathname of the module ++in the command so you don't have to type so much. ++ ++Threads ++======= ++ ++Each process in a target machine is seen as a gdb thread. gdb thread ++related commands (info threads, thread n) can be used. ++ ++ia-32 hardware breakpoints ++========================== ++ ++kgdb stub contains support for hardware breakpoints using debugging features ++of ia-32(x86) processors. These breakpoints do not need code modification. ++They use debugging registers. 4 hardware breakpoints are available in ia-32 ++processors. ++ ++Each hardware breakpoint can be of one of the following three types. ++ ++1. Execution breakpoint - An Execution breakpoint is triggered when code ++ at the breakpoint address is executed. ++ ++ As limited number of hardware breakpoints are available, it is ++ advisable to use software breakpoints ( break command ) instead ++ of execution hardware breakpoints, unless modification of code ++ is to be avoided. ++ ++2. Write breakpoint - A write breakpoint is triggered when memory ++ location at the breakpoint address is written. ++ ++ A write or can be placed for data of variable length. Length of ++ a write breakpoint indicates length of the datatype to be ++ watched. Length is 1 for 1 byte data , 2 for 2 byte data, 3 for ++ 4 byte data. ++ ++3. Access breakpoint - An access breakpoint is triggered when memory ++ location at the breakpoint address is either read or written. ++ ++ Access breakpoints also have lengths similar to write breakpoints. ++ ++IO breakpoints in ia-32 are not supported. ++ ++Since gdb stub at present does not use the protocol used by gdb for hardware ++breakpoints, hardware breakpoints are accessed through gdb macros. gdb macros ++for hardware breakpoints are described below. ++ ++hwebrk - Places an execution breakpoint ++ hwebrk breakpointno address ++hwwbrk - Places a write breakpoint ++ hwwbrk breakpointno length address ++hwabrk - Places an access breakpoint ++ hwabrk breakpointno length address ++hwrmbrk - Removes a breakpoint ++ hwrmbrk breakpointno ++exinfo - Tells whether a software or hardware breakpoint has occurred. ++ Prints number of the hardware breakpoint if a hardware breakpoint has ++ occurred. ++ ++Arguments required by these commands are as follows ++breakpointno - 0 to 3 ++length - 1 to 3 ++address - Memory location in hex digits ( without 0x ) e.g c015e9bc ++ ++SMP support ++========== ++ ++When a breakpoint occurs or user issues a break ( Ctrl + C ) to gdb ++client, all the processors are forced to enter the debugger. Current ++thread corresponds to the thread running on the processor where ++breakpoint occurred. Threads running on other processor(s) appear ++similar to other non-running threads in the 'info threads' output. ++Within the kgdb stub there is a structure "waiting_cpus" in which kgdb ++records the values of "current" and "regs" for each CPU other than the ++one that hit the breakpoint. "current" is a pointer to the task ++structure for the task that CPU is running, while "regs" points to the ++saved registers for the task. This structure can be examined with the ++gdb "p" command. ++ ++ia-32 hardware debugging registers on all processors are set to same ++values. Hence any hardware breakpoints may occur on any processor. ++ ++gdb troubleshooting ++=================== ++ ++1. gdb hangs ++Kill it. restart gdb. Connect to target machine. ++ ++2. gdb cannot connect to target machine (after killing a gdb and ++restarting another) If the target machine was not inside debugger when ++you killed gdb, gdb cannot connect because the target machine won't ++respond. In this case echo "Ctrl+C"(ASCII 3) to the serial line. ++e.g. echo -e "\003" > /dev/ttyS1 ++This forces that target machine into the debugger, after which you ++can connect. ++ ++3. gdb cannot connect even after echoing Ctrl+C into serial line ++Try changing serial line settings min to 1 and time to 0 ++e.g. stty min 1 time 0 < /dev/ttyS1 ++Try echoing again ++ ++Check serial line speed and set it to correct value if required ++e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 ++ ++EVENTS ++====== ++ ++Ever want to know the order of things happening? Which CPU did what and ++when? How did the spinlock get the way it is? Then events are for ++you. Events are defined by calls to an event collection interface and ++saved for later examination. In this case, kgdb events are saved by a ++very fast bit of code in kgdb which is fully SMP and interrupt protected ++and they are examined by using gdb to display them. Kgdb keeps only ++the last N events, where N must be a power of two and is defined at ++configure time. ++ ++ ++Events are signaled to kgdb by calling: ++ ++kgdb_ts(data0,data1) ++ ++For each call kgdb records each call in an array along with other info. ++Here is the array definition: ++ ++struct kgdb_and_then_struct { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ long long at_time; ++ int from_ln; ++ char * in_src; ++ void *from; ++ int with_if; ++ int data0; ++ int data1; ++}; ++ ++For SMP machines the CPU is recorded, for all machines the TSC is ++recorded (gets a time stamp) as well as the line number and source file ++the call was made from. The address of the (from), the "if" (interrupt ++flag) and the two data items are also recorded. The macro kgdb_ts casts ++the types to int, so you can put any 32-bit values here. There is a ++configure option to select the number of events you want to keep. A ++nice number might be 128, but you can keep up to 1024 if you want. The ++number must be a power of two. An "andthen" macro library is provided ++for gdb to help you look at these events. It is also possible to define ++a different structure for the event storage and cast the data to this ++structure. For example the following structure is defined in kgdb: ++ ++struct kgdb_and_then_struct2 { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ long long at_time; ++ int from_ln; ++ char * in_src; ++ void *from; ++ int with_if; ++ struct task_struct *t1; ++ struct task_struct *t2; ++}; ++ ++If you use this for display, the data elements will be displayed as ++pointers to task_struct entries. You may want to define your own ++structure to use in casting. You should only change the last two items ++and you must keep the structure size the same. Kgdb will handle these ++as 32-bit ints, but within that constraint you can define a structure to ++cast to any 32-bit quantity. This need only be available to gdb and is ++only used for casting in the display code. ++ ++Final Items ++=========== ++ ++I picked up this code from Amit S. Kale and enhanced it. ++ ++If you make some really cool modification to this stuff, or if you ++fix a bug, please let me know. ++ ++George Anzinger ++ ++ ++Amit S. Kale ++ ++ ++(First kgdb by David Grothe ) ++ ++(modified by Tigran Aivazian ) ++ Putting gdbstub into the kernel config menu. ++ ++(modified by Scott Foehner ) ++ Hooks for entering gdbstub at boot time. ++ ++(modified by Amit S. Kale ) ++ Threads, ia-32 hw debugging, mp support, console support, ++ nmi watchdog handling. ++ ++(modified by George Anzinger ) ++ Extended threads to include the idle threads. ++ Enhancements to allow breakpoint() at first C code. ++ Use of module_init() and __setup() to automate the configure. ++ Enhanced the cpu "collection" code to work in early bring-up. ++ Added ability to call functions from gdb ++ Print info thread stuff without going back to schedule() ++ Now collect the "other" cpus with an IPI/ NMI. +diff -puN /dev/null Documentation/i386/kgdb/loadmodule.sh +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/Documentation/i386/kgdb/loadmodule.sh 2004-10-21 14:54:15.325593648 -0700 +@@ -0,0 +1,78 @@ ++#/bin/sh ++# This script loads a module on a target machine and generates a gdb script. ++# source generated gdb script to load the module file at appropriate addresses ++# in gdb. ++# ++# Usage: ++# Loading the module on target machine and generating gdb script) ++# [foo]$ loadmodule.sh ++# ++# Loading the module file into gdb ++# (gdb) source ++# ++# Modify following variables according to your setup. ++# TESTMACHINE - Name of the target machine ++# GDBSCRIPTS - The directory where a gdb script will be generated ++# ++# Author: Amit S. Kale (akale@veritas.com). ++# ++# If you run into problems, please check files pointed to by following ++# variables. ++# ERRFILE - /tmp/.errs contains stderr output of insmod ++# MAPFILE - /tmp/.map contains stdout output of insmod ++# GDBSCRIPT - $GDBSCRIPTS/load gdb script. ++ ++TESTMACHINE=foo ++GDBSCRIPTS=/home/bar ++ ++if [ $# -lt 1 ] ; then { ++ echo Usage: $0 modulefile ++ exit ++} ; fi ++ ++MODULEFILE=$1 ++MODULEFILEBASENAME=`basename $1` ++ ++if [ $MODULEFILE = $MODULEFILEBASENAME ] ; then { ++ MODULEFILE=`pwd`/$MODULEFILE ++} fi ++ ++ERRFILE=/tmp/$MODULEFILEBASENAME.errs ++MAPFILE=/tmp/$MODULEFILEBASENAME.map ++GDBSCRIPT=$GDBSCRIPTS/load$MODULEFILEBASENAME ++ ++function findaddr() { ++ local ADDR=0x$(echo "$SEGMENTS" | \ ++ grep "$1" | sed 's/^[^ ]*[ ]*[^ ]*[ ]*//' | \ ++ sed 's/[ ]*[^ ]*$//') ++ echo $ADDR ++} ++ ++function checkerrs() { ++ if [ "`cat $ERRFILE`" != "" ] ; then { ++ cat $ERRFILE ++ exit ++ } fi ++} ++ ++#load the module ++echo Copying $MODULEFILE to $TESTMACHINE ++rcp $MODULEFILE root@${TESTMACHINE}: ++ ++echo Loading module $MODULEFILE ++rsh -l root $TESTMACHINE /sbin/insmod -m ./`basename $MODULEFILE` \ ++ > $MAPFILE 2> $ERRFILE ++checkerrs ++ ++SEGMENTS=`head -n 11 $MAPFILE | tail -n 10` ++TEXTADDR=$(findaddr "\\.text[^.]") ++LOADSTRING="add-symbol-file $MODULEFILE $TEXTADDR" ++SEGADDRS=`echo "$SEGMENTS" | awk '//{ ++ if ($1 != ".text" && $1 != ".this" && ++ $1 != ".kstrtab" && $1 != ".kmodtab") { ++ print " -s " $1 " 0x" $3 " " ++ } ++}'` ++LOADSTRING="$LOADSTRING $SEGADDRS" ++echo Generating script $GDBSCRIPT ++echo $LOADSTRING > $GDBSCRIPT +diff -puN drivers/char/keyboard.c~kgdb-ga drivers/char/keyboard.c +--- 25/drivers/char/keyboard.c~kgdb-ga 2004-10-21 14:54:15.273601552 -0700 ++++ 25-akpm/drivers/char/keyboard.c 2004-10-21 14:54:15.326593496 -0700 +@@ -1081,6 +1081,9 @@ void kbd_keycode(unsigned int keycode, i + } + if (sysrq_down && down && !rep) { + handle_sysrq(kbd_sysrq_xlate[keycode], regs, tty); ++#ifdef CONFIG_KGDB_SYSRQ ++ sysrq_down = 0; /* in case we miss the "up" event */ ++#endif + return; + } + #endif +diff -puN drivers/char/sysrq.c~kgdb-ga drivers/char/sysrq.c +--- 25/drivers/char/sysrq.c~kgdb-ga 2004-10-21 14:54:15.275601248 -0700 ++++ 25-akpm/drivers/char/sysrq.c 2004-10-21 14:54:15.326593496 -0700 +@@ -35,6 +35,25 @@ + #include + + #include ++#ifdef CONFIG_KGDB_SYSRQ ++ ++#define GDB_OP &kgdb_op ++static void kgdb_sysrq(int key, struct pt_regs *pt_regs, struct tty_struct *tty) ++{ ++ printk("kgdb sysrq\n"); ++ breakpoint(); ++} ++ ++static struct sysrq_key_op kgdb_op = { ++ .handler = kgdb_sysrq, ++ .help_msg = "kGdb|Fgdb", ++ .action_msg = "Debug breakpoint\n", ++}; ++ ++#else ++#define GDB_OP NULL ++#endif ++ + + extern void reset_vc(unsigned int); + +@@ -238,8 +257,8 @@ static struct sysrq_key_op *sysrq_key_ta + /* c */ NULL, + /* d */ NULL, + /* e */ &sysrq_term_op, +-/* f */ NULL, +-/* g */ NULL, ++/* f */ GDB_OP, ++/* g */ GDB_OP, + /* h */ NULL, + /* i */ &sysrq_kill_op, + /* j */ NULL, +diff -puN drivers/serial/8250.c~kgdb-ga drivers/serial/8250.c +--- 25/drivers/serial/8250.c~kgdb-ga 2004-10-21 14:54:15.276601096 -0700 ++++ 25-akpm/drivers/serial/8250.c 2004-10-21 14:54:15.328593192 -0700 +@@ -983,7 +983,7 @@ receive_chars(struct uart_8250_port *up, + if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { + tty->flip.work.func((void *)tty); + if (tty->flip.count >= TTY_FLIPBUF_SIZE) +- return; // if TTY_DONT_FLIP is set ++ return; /* if TTY_DONT_FLIP is set */ + } + ch = serial_inp(up, UART_RX); + *tty->flip.char_buf_ptr = ch; +@@ -1348,12 +1348,21 @@ static void serial8250_break_ctl(struct + spin_unlock_irqrestore(&up->port.lock, flags); + } + ++#ifdef CONFIG_KGDB ++static int kgdb_irq = -1; ++#endif ++ + static int serial8250_startup(struct uart_port *port) + { + struct uart_8250_port *up = (struct uart_8250_port *)port; + unsigned long flags; + int retval; + ++#ifdef CONFIG_KGDB ++ if (up->port.irq == kgdb_irq) ++ return -EBUSY; ++#endif ++ + up->capabilities = uart_config[up->port.type].flags; + up->mcr = 0; + +@@ -1990,6 +1999,10 @@ serial8250_register_ports(struct uart_dr + for (i = 0; i < UART_NR; i++) { + struct uart_8250_port *up = &serial8250_ports[i]; + ++#ifdef CONFIG_KGDB ++ if (up->port.irq == kgdb_irq) ++ up->port.kgdb = 1; ++#endif + up->port.line = i; + up->port.ops = &serial8250_pops; + up->port.dev = dev; +@@ -2376,6 +2389,31 @@ void serial8250_unregister_port(int line + } + EXPORT_SYMBOL(serial8250_unregister_port); + ++#ifdef CONFIG_KGDB ++/* ++ * Find all the ports using the given irq and shut them down. ++ * Result should be that the irq will be released. ++ */ ++void shutdown_for_kgdb(struct async_struct * info) ++{ ++ int irq = info->state->irq; ++ struct uart_8250_port *up; ++ int ttyS; ++ ++ kgdb_irq = irq; /* save for later init */ ++ for (ttyS = 0; ttyS < UART_NR; ttyS++){ ++ up = &serial8250_ports[ttyS]; ++ if (up->port.irq == irq && (irq_lists + irq)->head) { ++#ifdef CONFIG_DEBUG_SPINLOCK /* ugly business... */ ++ if(up->port.lock.magic != SPINLOCK_MAGIC) ++ spin_lock_init(&up->port.lock); ++#endif ++ serial8250_shutdown(&up->port); ++ } ++ } ++} ++#endif /* CONFIG_KGDB */ ++ + static int __init serial8250_init(void) + { + int ret, i; +diff -puN drivers/serial/serial_core.c~kgdb-ga drivers/serial/serial_core.c +--- 25/drivers/serial/serial_core.c~kgdb-ga 2004-10-21 14:54:15.278600792 -0700 ++++ 25-akpm/drivers/serial/serial_core.c 2004-10-21 14:54:15.330592888 -0700 +@@ -1976,6 +1976,11 @@ uart_configure_port(struct uart_driver * + { + unsigned int flags; + ++#ifdef CONFIG_KGDB ++ if (port->kgdb) ++ return; ++#endif ++ + /* + * If there isn't a port here, don't do anything further. + */ +diff -puN include/asm-i386/bugs.h~kgdb-ga include/asm-i386/bugs.h +--- 25/include/asm-i386/bugs.h~kgdb-ga 2004-10-21 14:54:15.279600640 -0700 ++++ 25-akpm/include/asm-i386/bugs.h 2004-10-21 14:54:15.331592736 -0700 +@@ -1,11 +1,11 @@ + /* + * include/asm-i386/bugs.h + * +- * Copyright (C) 1994 Linus Torvalds ++ * Copyright (C) 1994 Linus Torvalds + * + * Cyrix stuff, June 1998 by: + * - Rafael R. Reilova (moved everything from head.S), +- * ++ * + * - Channing Corn (tests & fixes), + * - Andrew D. Balsa (code cleanup). + * +@@ -25,7 +25,20 @@ + #include + #include + #include +- ++#ifdef CONFIG_KGDB ++/* ++ * Provied the command line "gdb" initial break ++ */ ++int __init kgdb_initial_break(char * str) ++{ ++ if (*str == '\0'){ ++ breakpoint(); ++ return 1; ++ } ++ return 0; ++} ++__setup("gdb",kgdb_initial_break); ++#endif + static int __init no_halt(char *s) + { + boot_cpu_data.hlt_works_ok = 0; +@@ -140,7 +153,7 @@ static void __init check_popad(void) + : "ecx", "edi" ); + /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ + if (res != 12345678) printk( "Buggy.\n" ); +- else printk( "OK.\n" ); ++ else printk( "OK.\n" ); + #endif + } + +diff -puN /dev/null include/asm-i386/kgdb.h +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/include/asm-i386/kgdb.h 2004-10-21 14:54:15.331592736 -0700 +@@ -0,0 +1,59 @@ ++#ifndef __KGDB ++#define __KGDB ++ ++/* ++ * This file should not include ANY others. This makes it usable ++ * most anywhere without the fear of include order or inclusion. ++ * Make it so! ++ * ++ * This file may be included all the time. It is only active if ++ * CONFIG_KGDB is defined, otherwise it stubs out all the macros ++ * and entry points. ++ */ ++#if defined(CONFIG_KGDB) && !defined(__ASSEMBLY__) ++ ++extern void breakpoint(void); ++#define INIT_KGDB_INTS kgdb_enable_ints() ++ ++#ifndef BREAKPOINT ++#define BREAKPOINT asm(" int $3") ++#endif ++/* ++ * GDB debug stub (or any debug stub) can point the 'linux_debug_hook' ++ * pointer to its routine and it will be entered as the first thing ++ * when a trap occurs. ++ * ++ * Return values are, at present, undefined. ++ * ++ * The debug hook routine does not necessarily return to its caller. ++ * It has the register image and thus may choose to resume execution ++ * anywhere it pleases. ++ */ ++struct pt_regs; ++ ++extern int kgdb_handle_exception(int trapno, ++ int signo, int err_code, struct pt_regs *regs); ++extern int in_kgdb(struct pt_regs *regs); ++ ++#ifdef CONFIG_KGDB_TS ++void kgdb_tstamp(int line, char *source, int data0, int data1); ++/* ++ * This is the time stamp function. The macro adds the source info and ++ * does a cast on the data to allow most any 32-bit value. ++ */ ++ ++#define kgdb_ts(data0,data1) kgdb_tstamp(__LINE__,__FILE__,(int)data0,(int)data1) ++#else ++#define kgdb_ts(data0,data1) ++#endif ++#else /* CONFIG_KGDB && ! __ASSEMBLY__ ,stubs follow... */ ++#ifndef BREAKPOINT ++#define BREAKPOINT ++#endif ++#define kgdb_ts(data0,data1) ++#define in_kgdb ++#define kgdb_handle_exception ++#define breakpoint ++#define INIT_KGDB_INTS ++#endif ++#endif /* __KGDB */ +diff -puN /dev/null include/asm-i386/kgdb_local.h +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/include/asm-i386/kgdb_local.h 2004-10-21 14:54:15.332592584 -0700 +@@ -0,0 +1,102 @@ ++#ifndef __KGDB_LOCAL ++#define ___KGDB_LOCAL ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define PORT 0x3f8 ++#ifdef CONFIG_KGDB_PORT ++#undef PORT ++#define PORT CONFIG_KGDB_PORT ++#endif ++#define IRQ 4 ++#ifdef CONFIG_KGDB_IRQ ++#undef IRQ ++#define IRQ CONFIG_KGDB_IRQ ++#endif ++#define SB_CLOCK 1843200 ++#define SB_BASE (SB_CLOCK/16) ++#define SB_BAUD9600 SB_BASE/9600 ++#define SB_BAUD192 SB_BASE/19200 ++#define SB_BAUD384 SB_BASE/38400 ++#define SB_BAUD576 SB_BASE/57600 ++#define SB_BAUD1152 SB_BASE/115200 ++#ifdef CONFIG_KGDB_9600BAUD ++#define SB_BAUD SB_BAUD9600 ++#endif ++#ifdef CONFIG_KGDB_19200BAUD ++#define SB_BAUD SB_BAUD192 ++#endif ++#ifdef CONFIG_KGDB_38400BAUD ++#define SB_BAUD SB_BAUD384 ++#endif ++#ifdef CONFIG_KGDB_57600BAUD ++#define SB_BAUD SB_BAUD576 ++#endif ++#ifdef CONFIG_KGDB_115200BAUD ++#define SB_BAUD SB_BAUD1152 ++#endif ++#ifndef SB_BAUD ++#define SB_BAUD SB_BAUD1152 /* Start with this if not given */ ++#endif ++ ++#ifndef CONFIG_X86_TSC ++#undef rdtsc ++#define rdtsc(a,b) if (a++ > 10000){a = 0; b++;} ++#undef rdtscll ++#define rdtscll(s) s++ ++#endif ++ ++#ifdef _raw_read_unlock /* must use a name that is "define"ed, not an inline */ ++#undef spin_lock ++#undef spin_trylock ++#undef spin_unlock ++#define spin_lock _raw_spin_lock ++#define spin_trylock _raw_spin_trylock ++#define spin_unlock _raw_spin_unlock ++#else ++#endif ++#undef spin_unlock_wait ++#define spin_unlock_wait(x) do { cpu_relax(); barrier();} \ ++ while(spin_is_locked(x)) ++ ++#define SB_IER 1 ++#define SB_MCR UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS ++ ++#define FLAGS 0 ++#define SB_STATE { \ ++ magic: SSTATE_MAGIC, \ ++ baud_base: SB_BASE, \ ++ port: PORT, \ ++ irq: IRQ, \ ++ flags: FLAGS, \ ++ custom_divisor:SB_BAUD} ++#define SB_INFO { \ ++ magic: SERIAL_MAGIC, \ ++ port: PORT,0,FLAGS, \ ++ state: &state, \ ++ tty: (struct tty_struct *)&state, \ ++ IER: SB_IER, \ ++ MCR: SB_MCR} ++extern void putDebugChar(int); ++/* RTAI support needs us to really stop/start interrupts */ ++ ++#define kgdb_sti() __asm__ __volatile__("sti": : :"memory") ++#define kgdb_cli() __asm__ __volatile__("cli": : :"memory") ++#define kgdb_local_save_flags(x) __asm__ __volatile__(\ ++ "pushfl ; popl %0":"=g" (x): /* no input */) ++#define kgdb_local_irq_restore(x) __asm__ __volatile__(\ ++ "pushl %0 ; popfl": \ ++ /* no output */ :"g" (x):"memory", "cc") ++#define kgdb_local_irq_save(x) kgdb_local_save_flags(x); kgdb_cli() ++ ++#ifdef CONFIG_SERIAL ++extern void shutdown_for_kgdb(struct async_struct *info); ++#endif ++#define INIT_KDEBUG putDebugChar("+"); ++#endif /* __KGDB_LOCAL */ +diff -puN include/linux/config.h~kgdb-ga include/linux/config.h +--- 25/include/linux/config.h~kgdb-ga 2004-10-21 14:54:15.281600336 -0700 ++++ 25-akpm/include/linux/config.h 2004-10-21 14:54:15.332592584 -0700 +@@ -2,6 +2,9 @@ + #define _LINUX_CONFIG_H + + #include ++#if defined(__i386__) && !defined(IN_BOOTLOADER) ++#include ++#endif + #if !defined (__KERNEL__) && !defined(__KERNGLUE__) + #error including kernel header in userspace; use the glibc headers instead! + #endif +diff -puN /dev/null include/linux/dwarf2.h +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/include/linux/dwarf2.h 2004-10-21 14:54:15.336591976 -0700 +@@ -0,0 +1,738 @@ ++/* Declarations and definitions of codes relating to the DWARF2 symbolic ++ debugging information format. ++ Copyright (C) 1992, 1993, 1995, 1996, 1997, 1999, 2000, 2001, 2002 ++ Free Software Foundation, Inc. ++ ++ Written by Gary Funck (gary@intrepid.com) The Ada Joint Program ++ Office (AJPO), Florida State Unviversity and Silicon Graphics Inc. ++ provided support for this effort -- June 21, 1995. ++ ++ Derived from the DWARF 1 implementation written by Ron Guilmette ++ (rfg@netcom.com), November 1990. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it under ++ the terms of the GNU General Public License as published by the Free ++ Software Foundation; either version 2, or (at your option) any later ++ version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING. If not, write to the Free ++ Software Foundation, 59 Temple Place - Suite 330, Boston, MA ++ 02111-1307, USA. */ ++ ++/* This file is derived from the DWARF specification (a public document) ++ Revision 2.0.0 (July 27, 1993) developed by the UNIX International ++ Programming Languages Special Interest Group (UI/PLSIG) and distributed ++ by UNIX International. Copies of this specification are available from ++ UNIX International, 20 Waterview Boulevard, Parsippany, NJ, 07054. ++ ++ This file also now contains definitions from the DWARF 3 specification. */ ++ ++/* This file is shared between GCC and GDB, and should not contain ++ prototypes. */ ++ ++#ifndef _ELF_DWARF2_H ++#define _ELF_DWARF2_H ++ ++/* Structure found in the .debug_line section. */ ++#ifndef __ASSEMBLY__ ++typedef struct ++{ ++ unsigned char li_length [4]; ++ unsigned char li_version [2]; ++ unsigned char li_prologue_length [4]; ++ unsigned char li_min_insn_length [1]; ++ unsigned char li_default_is_stmt [1]; ++ unsigned char li_line_base [1]; ++ unsigned char li_line_range [1]; ++ unsigned char li_opcode_base [1]; ++} ++DWARF2_External_LineInfo; ++ ++typedef struct ++{ ++ unsigned long li_length; ++ unsigned short li_version; ++ unsigned int li_prologue_length; ++ unsigned char li_min_insn_length; ++ unsigned char li_default_is_stmt; ++ int li_line_base; ++ unsigned char li_line_range; ++ unsigned char li_opcode_base; ++} ++DWARF2_Internal_LineInfo; ++ ++/* Structure found in .debug_pubnames section. */ ++typedef struct ++{ ++ unsigned char pn_length [4]; ++ unsigned char pn_version [2]; ++ unsigned char pn_offset [4]; ++ unsigned char pn_size [4]; ++} ++DWARF2_External_PubNames; ++ ++typedef struct ++{ ++ unsigned long pn_length; ++ unsigned short pn_version; ++ unsigned long pn_offset; ++ unsigned long pn_size; ++} ++DWARF2_Internal_PubNames; ++ ++/* Structure found in .debug_info section. */ ++typedef struct ++{ ++ unsigned char cu_length [4]; ++ unsigned char cu_version [2]; ++ unsigned char cu_abbrev_offset [4]; ++ unsigned char cu_pointer_size [1]; ++} ++DWARF2_External_CompUnit; ++ ++typedef struct ++{ ++ unsigned long cu_length; ++ unsigned short cu_version; ++ unsigned long cu_abbrev_offset; ++ unsigned char cu_pointer_size; ++} ++DWARF2_Internal_CompUnit; ++ ++typedef struct ++{ ++ unsigned char ar_length [4]; ++ unsigned char ar_version [2]; ++ unsigned char ar_info_offset [4]; ++ unsigned char ar_pointer_size [1]; ++ unsigned char ar_segment_size [1]; ++} ++DWARF2_External_ARange; ++ ++typedef struct ++{ ++ unsigned long ar_length; ++ unsigned short ar_version; ++ unsigned long ar_info_offset; ++ unsigned char ar_pointer_size; ++ unsigned char ar_segment_size; ++} ++DWARF2_Internal_ARange; ++ ++#define ENUM(name) enum name { ++#define IF_NOT_ASM(a) a ++#define COMMA , ++#else ++#define ENUM(name) ++#define IF_NOT_ASM(a) ++#define COMMA ++ ++#endif ++ ++/* Tag names and codes. */ ++ENUM(dwarf_tag) ++ ++ DW_TAG_padding = 0x00 COMMA ++ DW_TAG_array_type = 0x01 COMMA ++ DW_TAG_class_type = 0x02 COMMA ++ DW_TAG_entry_point = 0x03 COMMA ++ DW_TAG_enumeration_type = 0x04 COMMA ++ DW_TAG_formal_parameter = 0x05 COMMA ++ DW_TAG_imported_declaration = 0x08 COMMA ++ DW_TAG_label = 0x0a COMMA ++ DW_TAG_lexical_block = 0x0b COMMA ++ DW_TAG_member = 0x0d COMMA ++ DW_TAG_pointer_type = 0x0f COMMA ++ DW_TAG_reference_type = 0x10 COMMA ++ DW_TAG_compile_unit = 0x11 COMMA ++ DW_TAG_string_type = 0x12 COMMA ++ DW_TAG_structure_type = 0x13 COMMA ++ DW_TAG_subroutine_type = 0x15 COMMA ++ DW_TAG_typedef = 0x16 COMMA ++ DW_TAG_union_type = 0x17 COMMA ++ DW_TAG_unspecified_parameters = 0x18 COMMA ++ DW_TAG_variant = 0x19 COMMA ++ DW_TAG_common_block = 0x1a COMMA ++ DW_TAG_common_inclusion = 0x1b COMMA ++ DW_TAG_inheritance = 0x1c COMMA ++ DW_TAG_inlined_subroutine = 0x1d COMMA ++ DW_TAG_module = 0x1e COMMA ++ DW_TAG_ptr_to_member_type = 0x1f COMMA ++ DW_TAG_set_type = 0x20 COMMA ++ DW_TAG_subrange_type = 0x21 COMMA ++ DW_TAG_with_stmt = 0x22 COMMA ++ DW_TAG_access_declaration = 0x23 COMMA ++ DW_TAG_base_type = 0x24 COMMA ++ DW_TAG_catch_block = 0x25 COMMA ++ DW_TAG_const_type = 0x26 COMMA ++ DW_TAG_constant = 0x27 COMMA ++ DW_TAG_enumerator = 0x28 COMMA ++ DW_TAG_file_type = 0x29 COMMA ++ DW_TAG_friend = 0x2a COMMA ++ DW_TAG_namelist = 0x2b COMMA ++ DW_TAG_namelist_item = 0x2c COMMA ++ DW_TAG_packed_type = 0x2d COMMA ++ DW_TAG_subprogram = 0x2e COMMA ++ DW_TAG_template_type_param = 0x2f COMMA ++ DW_TAG_template_value_param = 0x30 COMMA ++ DW_TAG_thrown_type = 0x31 COMMA ++ DW_TAG_try_block = 0x32 COMMA ++ DW_TAG_variant_part = 0x33 COMMA ++ DW_TAG_variable = 0x34 COMMA ++ DW_TAG_volatile_type = 0x35 COMMA ++ /* DWARF 3. */ ++ DW_TAG_dwarf_procedure = 0x36 COMMA ++ DW_TAG_restrict_type = 0x37 COMMA ++ DW_TAG_interface_type = 0x38 COMMA ++ DW_TAG_namespace = 0x39 COMMA ++ DW_TAG_imported_module = 0x3a COMMA ++ DW_TAG_unspecified_type = 0x3b COMMA ++ DW_TAG_partial_unit = 0x3c COMMA ++ DW_TAG_imported_unit = 0x3d COMMA ++ /* SGI/MIPS Extensions. */ ++ DW_TAG_MIPS_loop = 0x4081 COMMA ++ /* GNU extensions. */ ++ DW_TAG_format_label = 0x4101 COMMA /* For FORTRAN 77 and Fortran 90. */ ++ DW_TAG_function_template = 0x4102 COMMA /* For C++. */ ++ DW_TAG_class_template = 0x4103 COMMA /* For C++. */ ++ DW_TAG_GNU_BINCL = 0x4104 COMMA ++ DW_TAG_GNU_EINCL = 0x4105 COMMA ++ /* Extensions for UPC. See: http://upc.gwu.edu/~upc. */ ++ DW_TAG_upc_shared_type = 0x8765 COMMA ++ DW_TAG_upc_strict_type = 0x8766 COMMA ++ DW_TAG_upc_relaxed_type = 0x8767 ++IF_NOT_ASM(};) ++ ++#define DW_TAG_lo_user 0x4080 ++#define DW_TAG_hi_user 0xffff ++ ++/* Flag that tells whether entry has a child or not. */ ++#define DW_children_no 0 ++#define DW_children_yes 1 ++ ++/* Form names and codes. */ ++ENUM(dwarf_form) ++ ++ DW_FORM_addr = 0x01 COMMA ++ DW_FORM_block2 = 0x03 COMMA ++ DW_FORM_block4 = 0x04 COMMA ++ DW_FORM_data2 = 0x05 COMMA ++ DW_FORM_data4 = 0x06 COMMA ++ DW_FORM_data8 = 0x07 COMMA ++ DW_FORM_string = 0x08 COMMA ++ DW_FORM_block = 0x09 COMMA ++ DW_FORM_block1 = 0x0a COMMA ++ DW_FORM_data1 = 0x0b COMMA ++ DW_FORM_flag = 0x0c COMMA ++ DW_FORM_sdata = 0x0d COMMA ++ DW_FORM_strp = 0x0e COMMA ++ DW_FORM_udata = 0x0f COMMA ++ DW_FORM_ref_addr = 0x10 COMMA ++ DW_FORM_ref1 = 0x11 COMMA ++ DW_FORM_ref2 = 0x12 COMMA ++ DW_FORM_ref4 = 0x13 COMMA ++ DW_FORM_ref8 = 0x14 COMMA ++ DW_FORM_ref_udata = 0x15 COMMA ++ DW_FORM_indirect = 0x16 ++IF_NOT_ASM(};) ++ ++/* Attribute names and codes. */ ++ ++ENUM(dwarf_attribute) ++ ++ DW_AT_sibling = 0x01 COMMA ++ DW_AT_location = 0x02 COMMA ++ DW_AT_name = 0x03 COMMA ++ DW_AT_ordering = 0x09 COMMA ++ DW_AT_subscr_data = 0x0a COMMA ++ DW_AT_byte_size = 0x0b COMMA ++ DW_AT_bit_offset = 0x0c COMMA ++ DW_AT_bit_size = 0x0d COMMA ++ DW_AT_element_list = 0x0f COMMA ++ DW_AT_stmt_list = 0x10 COMMA ++ DW_AT_low_pc = 0x11 COMMA ++ DW_AT_high_pc = 0x12 COMMA ++ DW_AT_language = 0x13 COMMA ++ DW_AT_member = 0x14 COMMA ++ DW_AT_discr = 0x15 COMMA ++ DW_AT_discr_value = 0x16 COMMA ++ DW_AT_visibility = 0x17 COMMA ++ DW_AT_import = 0x18 COMMA ++ DW_AT_string_length = 0x19 COMMA ++ DW_AT_common_reference = 0x1a COMMA ++ DW_AT_comp_dir = 0x1b COMMA ++ DW_AT_const_value = 0x1c COMMA ++ DW_AT_containing_type = 0x1d COMMA ++ DW_AT_default_value = 0x1e COMMA ++ DW_AT_inline = 0x20 COMMA ++ DW_AT_is_optional = 0x21 COMMA ++ DW_AT_lower_bound = 0x22 COMMA ++ DW_AT_producer = 0x25 COMMA ++ DW_AT_prototyped = 0x27 COMMA ++ DW_AT_return_addr = 0x2a COMMA ++ DW_AT_start_scope = 0x2c COMMA ++ DW_AT_stride_size = 0x2e COMMA ++ DW_AT_upper_bound = 0x2f COMMA ++ DW_AT_abstract_origin = 0x31 COMMA ++ DW_AT_accessibility = 0x32 COMMA ++ DW_AT_address_class = 0x33 COMMA ++ DW_AT_artificial = 0x34 COMMA ++ DW_AT_base_types = 0x35 COMMA ++ DW_AT_calling_convention = 0x36 COMMA ++ DW_AT_count = 0x37 COMMA ++ DW_AT_data_member_location = 0x38 COMMA ++ DW_AT_decl_column = 0x39 COMMA ++ DW_AT_decl_file = 0x3a COMMA ++ DW_AT_decl_line = 0x3b COMMA ++ DW_AT_declaration = 0x3c COMMA ++ DW_AT_discr_list = 0x3d COMMA ++ DW_AT_encoding = 0x3e COMMA ++ DW_AT_external = 0x3f COMMA ++ DW_AT_frame_base = 0x40 COMMA ++ DW_AT_friend = 0x41 COMMA ++ DW_AT_identifier_case = 0x42 COMMA ++ DW_AT_macro_info = 0x43 COMMA ++ DW_AT_namelist_items = 0x44 COMMA ++ DW_AT_priority = 0x45 COMMA ++ DW_AT_segment = 0x46 COMMA ++ DW_AT_specification = 0x47 COMMA ++ DW_AT_static_link = 0x48 COMMA ++ DW_AT_type = 0x49 COMMA ++ DW_AT_use_location = 0x4a COMMA ++ DW_AT_variable_parameter = 0x4b COMMA ++ DW_AT_virtuality = 0x4c COMMA ++ DW_AT_vtable_elem_location = 0x4d COMMA ++ /* DWARF 3 values. */ ++ DW_AT_allocated = 0x4e COMMA ++ DW_AT_associated = 0x4f COMMA ++ DW_AT_data_location = 0x50 COMMA ++ DW_AT_stride = 0x51 COMMA ++ DW_AT_entry_pc = 0x52 COMMA ++ DW_AT_use_UTF8 = 0x53 COMMA ++ DW_AT_extension = 0x54 COMMA ++ DW_AT_ranges = 0x55 COMMA ++ DW_AT_trampoline = 0x56 COMMA ++ DW_AT_call_column = 0x57 COMMA ++ DW_AT_call_file = 0x58 COMMA ++ DW_AT_call_line = 0x59 COMMA ++ /* SGI/MIPS extensions. */ ++ DW_AT_MIPS_fde = 0x2001 COMMA ++ DW_AT_MIPS_loop_begin = 0x2002 COMMA ++ DW_AT_MIPS_tail_loop_begin = 0x2003 COMMA ++ DW_AT_MIPS_epilog_begin = 0x2004 COMMA ++ DW_AT_MIPS_loop_unroll_factor = 0x2005 COMMA ++ DW_AT_MIPS_software_pipeline_depth = 0x2006 COMMA ++ DW_AT_MIPS_linkage_name = 0x2007 COMMA ++ DW_AT_MIPS_stride = 0x2008 COMMA ++ DW_AT_MIPS_abstract_name = 0x2009 COMMA ++ DW_AT_MIPS_clone_origin = 0x200a COMMA ++ DW_AT_MIPS_has_inlines = 0x200b COMMA ++ /* GNU extensions. */ ++ DW_AT_sf_names = 0x2101 COMMA ++ DW_AT_src_info = 0x2102 COMMA ++ DW_AT_mac_info = 0x2103 COMMA ++ DW_AT_src_coords = 0x2104 COMMA ++ DW_AT_body_begin = 0x2105 COMMA ++ DW_AT_body_end = 0x2106 COMMA ++ DW_AT_GNU_vector = 0x2107 COMMA ++ /* VMS extensions. */ ++ DW_AT_VMS_rtnbeg_pd_address = 0x2201 COMMA ++ /* UPC extension. */ ++ DW_AT_upc_threads_scaled = 0x3210 ++IF_NOT_ASM(};) ++ ++#define DW_AT_lo_user 0x2000 /* Implementation-defined range start. */ ++#define DW_AT_hi_user 0x3ff0 /* Implementation-defined range end. */ ++ ++/* Location atom names and codes. */ ++ENUM(dwarf_location_atom) ++ ++ DW_OP_addr = 0x03 COMMA ++ DW_OP_deref = 0x06 COMMA ++ DW_OP_const1u = 0x08 COMMA ++ DW_OP_const1s = 0x09 COMMA ++ DW_OP_const2u = 0x0a COMMA ++ DW_OP_const2s = 0x0b COMMA ++ DW_OP_const4u = 0x0c COMMA ++ DW_OP_const4s = 0x0d COMMA ++ DW_OP_const8u = 0x0e COMMA ++ DW_OP_const8s = 0x0f COMMA ++ DW_OP_constu = 0x10 COMMA ++ DW_OP_consts = 0x11 COMMA ++ DW_OP_dup = 0x12 COMMA ++ DW_OP_drop = 0x13 COMMA ++ DW_OP_over = 0x14 COMMA ++ DW_OP_pick = 0x15 COMMA ++ DW_OP_swap = 0x16 COMMA ++ DW_OP_rot = 0x17 COMMA ++ DW_OP_xderef = 0x18 COMMA ++ DW_OP_abs = 0x19 COMMA ++ DW_OP_and = 0x1a COMMA ++ DW_OP_div = 0x1b COMMA ++ DW_OP_minus = 0x1c COMMA ++ DW_OP_mod = 0x1d COMMA ++ DW_OP_mul = 0x1e COMMA ++ DW_OP_neg = 0x1f COMMA ++ DW_OP_not = 0x20 COMMA ++ DW_OP_or = 0x21 COMMA ++ DW_OP_plus = 0x22 COMMA ++ DW_OP_plus_uconst = 0x23 COMMA ++ DW_OP_shl = 0x24 COMMA ++ DW_OP_shr = 0x25 COMMA ++ DW_OP_shra = 0x26 COMMA ++ DW_OP_xor = 0x27 COMMA ++ DW_OP_bra = 0x28 COMMA ++ DW_OP_eq = 0x29 COMMA ++ DW_OP_ge = 0x2a COMMA ++ DW_OP_gt = 0x2b COMMA ++ DW_OP_le = 0x2c COMMA ++ DW_OP_lt = 0x2d COMMA ++ DW_OP_ne = 0x2e COMMA ++ DW_OP_skip = 0x2f COMMA ++ DW_OP_lit0 = 0x30 COMMA ++ DW_OP_lit1 = 0x31 COMMA ++ DW_OP_lit2 = 0x32 COMMA ++ DW_OP_lit3 = 0x33 COMMA ++ DW_OP_lit4 = 0x34 COMMA ++ DW_OP_lit5 = 0x35 COMMA ++ DW_OP_lit6 = 0x36 COMMA ++ DW_OP_lit7 = 0x37 COMMA ++ DW_OP_lit8 = 0x38 COMMA ++ DW_OP_lit9 = 0x39 COMMA ++ DW_OP_lit10 = 0x3a COMMA ++ DW_OP_lit11 = 0x3b COMMA ++ DW_OP_lit12 = 0x3c COMMA ++ DW_OP_lit13 = 0x3d COMMA ++ DW_OP_lit14 = 0x3e COMMA ++ DW_OP_lit15 = 0x3f COMMA ++ DW_OP_lit16 = 0x40 COMMA ++ DW_OP_lit17 = 0x41 COMMA ++ DW_OP_lit18 = 0x42 COMMA ++ DW_OP_lit19 = 0x43 COMMA ++ DW_OP_lit20 = 0x44 COMMA ++ DW_OP_lit21 = 0x45 COMMA ++ DW_OP_lit22 = 0x46 COMMA ++ DW_OP_lit23 = 0x47 COMMA ++ DW_OP_lit24 = 0x48 COMMA ++ DW_OP_lit25 = 0x49 COMMA ++ DW_OP_lit26 = 0x4a COMMA ++ DW_OP_lit27 = 0x4b COMMA ++ DW_OP_lit28 = 0x4c COMMA ++ DW_OP_lit29 = 0x4d COMMA ++ DW_OP_lit30 = 0x4e COMMA ++ DW_OP_lit31 = 0x4f COMMA ++ DW_OP_reg0 = 0x50 COMMA ++ DW_OP_reg1 = 0x51 COMMA ++ DW_OP_reg2 = 0x52 COMMA ++ DW_OP_reg3 = 0x53 COMMA ++ DW_OP_reg4 = 0x54 COMMA ++ DW_OP_reg5 = 0x55 COMMA ++ DW_OP_reg6 = 0x56 COMMA ++ DW_OP_reg7 = 0x57 COMMA ++ DW_OP_reg8 = 0x58 COMMA ++ DW_OP_reg9 = 0x59 COMMA ++ DW_OP_reg10 = 0x5a COMMA ++ DW_OP_reg11 = 0x5b COMMA ++ DW_OP_reg12 = 0x5c COMMA ++ DW_OP_reg13 = 0x5d COMMA ++ DW_OP_reg14 = 0x5e COMMA ++ DW_OP_reg15 = 0x5f COMMA ++ DW_OP_reg16 = 0x60 COMMA ++ DW_OP_reg17 = 0x61 COMMA ++ DW_OP_reg18 = 0x62 COMMA ++ DW_OP_reg19 = 0x63 COMMA ++ DW_OP_reg20 = 0x64 COMMA ++ DW_OP_reg21 = 0x65 COMMA ++ DW_OP_reg22 = 0x66 COMMA ++ DW_OP_reg23 = 0x67 COMMA ++ DW_OP_reg24 = 0x68 COMMA ++ DW_OP_reg25 = 0x69 COMMA ++ DW_OP_reg26 = 0x6a COMMA ++ DW_OP_reg27 = 0x6b COMMA ++ DW_OP_reg28 = 0x6c COMMA ++ DW_OP_reg29 = 0x6d COMMA ++ DW_OP_reg30 = 0x6e COMMA ++ DW_OP_reg31 = 0x6f COMMA ++ DW_OP_breg0 = 0x70 COMMA ++ DW_OP_breg1 = 0x71 COMMA ++ DW_OP_breg2 = 0x72 COMMA ++ DW_OP_breg3 = 0x73 COMMA ++ DW_OP_breg4 = 0x74 COMMA ++ DW_OP_breg5 = 0x75 COMMA ++ DW_OP_breg6 = 0x76 COMMA ++ DW_OP_breg7 = 0x77 COMMA ++ DW_OP_breg8 = 0x78 COMMA ++ DW_OP_breg9 = 0x79 COMMA ++ DW_OP_breg10 = 0x7a COMMA ++ DW_OP_breg11 = 0x7b COMMA ++ DW_OP_breg12 = 0x7c COMMA ++ DW_OP_breg13 = 0x7d COMMA ++ DW_OP_breg14 = 0x7e COMMA ++ DW_OP_breg15 = 0x7f COMMA ++ DW_OP_breg16 = 0x80 COMMA ++ DW_OP_breg17 = 0x81 COMMA ++ DW_OP_breg18 = 0x82 COMMA ++ DW_OP_breg19 = 0x83 COMMA ++ DW_OP_breg20 = 0x84 COMMA ++ DW_OP_breg21 = 0x85 COMMA ++ DW_OP_breg22 = 0x86 COMMA ++ DW_OP_breg23 = 0x87 COMMA ++ DW_OP_breg24 = 0x88 COMMA ++ DW_OP_breg25 = 0x89 COMMA ++ DW_OP_breg26 = 0x8a COMMA ++ DW_OP_breg27 = 0x8b COMMA ++ DW_OP_breg28 = 0x8c COMMA ++ DW_OP_breg29 = 0x8d COMMA ++ DW_OP_breg30 = 0x8e COMMA ++ DW_OP_breg31 = 0x8f COMMA ++ DW_OP_regx = 0x90 COMMA ++ DW_OP_fbreg = 0x91 COMMA ++ DW_OP_bregx = 0x92 COMMA ++ DW_OP_piece = 0x93 COMMA ++ DW_OP_deref_size = 0x94 COMMA ++ DW_OP_xderef_size = 0x95 COMMA ++ DW_OP_nop = 0x96 COMMA ++ /* DWARF 3 extensions. */ ++ DW_OP_push_object_address = 0x97 COMMA ++ DW_OP_call2 = 0x98 COMMA ++ DW_OP_call4 = 0x99 COMMA ++ DW_OP_call_ref = 0x9a COMMA ++ /* GNU extensions. */ ++ DW_OP_GNU_push_tls_address = 0xe0 ++IF_NOT_ASM(};) ++ ++#define DW_OP_lo_user 0xe0 /* Implementation-defined range start. */ ++#define DW_OP_hi_user 0xff /* Implementation-defined range end. */ ++ ++/* Type encodings. */ ++ENUM(dwarf_type) ++ ++ DW_ATE_void = 0x0 COMMA ++ DW_ATE_address = 0x1 COMMA ++ DW_ATE_boolean = 0x2 COMMA ++ DW_ATE_complex_float = 0x3 COMMA ++ DW_ATE_float = 0x4 COMMA ++ DW_ATE_signed = 0x5 COMMA ++ DW_ATE_signed_char = 0x6 COMMA ++ DW_ATE_unsigned = 0x7 COMMA ++ DW_ATE_unsigned_char = 0x8 COMMA ++ /* DWARF 3. */ ++ DW_ATE_imaginary_float = 0x9 ++IF_NOT_ASM(};) ++ ++#define DW_ATE_lo_user 0x80 ++#define DW_ATE_hi_user 0xff ++ ++/* Array ordering names and codes. */ ++ENUM(dwarf_array_dim_ordering) ++ ++ DW_ORD_row_major = 0 COMMA ++ DW_ORD_col_major = 1 ++IF_NOT_ASM(};) ++ ++/* Access attribute. */ ++ENUM(dwarf_access_attribute) ++ ++ DW_ACCESS_public = 1 COMMA ++ DW_ACCESS_protected = 2 COMMA ++ DW_ACCESS_private = 3 ++IF_NOT_ASM(};) ++ ++/* Visibility. */ ++ENUM(dwarf_visibility_attribute) ++ ++ DW_VIS_local = 1 COMMA ++ DW_VIS_exported = 2 COMMA ++ DW_VIS_qualified = 3 ++IF_NOT_ASM(};) ++ ++/* Virtuality. */ ++ENUM(dwarf_virtuality_attribute) ++ ++ DW_VIRTUALITY_none = 0 COMMA ++ DW_VIRTUALITY_virtual = 1 COMMA ++ DW_VIRTUALITY_pure_virtual = 2 ++IF_NOT_ASM(};) ++ ++/* Case sensitivity. */ ++ENUM(dwarf_id_case) ++ ++ DW_ID_case_sensitive = 0 COMMA ++ DW_ID_up_case = 1 COMMA ++ DW_ID_down_case = 2 COMMA ++ DW_ID_case_insensitive = 3 ++IF_NOT_ASM(};) ++ ++/* Calling convention. */ ++ENUM(dwarf_calling_convention) ++ ++ DW_CC_normal = 0x1 COMMA ++ DW_CC_program = 0x2 COMMA ++ DW_CC_nocall = 0x3 ++IF_NOT_ASM(};) ++ ++#define DW_CC_lo_user 0x40 ++#define DW_CC_hi_user 0xff ++ ++/* Inline attribute. */ ++ENUM(dwarf_inline_attribute) ++ ++ DW_INL_not_inlined = 0 COMMA ++ DW_INL_inlined = 1 COMMA ++ DW_INL_declared_not_inlined = 2 COMMA ++ DW_INL_declared_inlined = 3 ++IF_NOT_ASM(};) ++ ++/* Discriminant lists. */ ++ENUM(dwarf_discrim_list) ++ ++ DW_DSC_label = 0 COMMA ++ DW_DSC_range = 1 ++IF_NOT_ASM(};) ++ ++/* Line number opcodes. */ ++ENUM(dwarf_line_number_ops) ++ ++ DW_LNS_extended_op = 0 COMMA ++ DW_LNS_copy = 1 COMMA ++ DW_LNS_advance_pc = 2 COMMA ++ DW_LNS_advance_line = 3 COMMA ++ DW_LNS_set_file = 4 COMMA ++ DW_LNS_set_column = 5 COMMA ++ DW_LNS_negate_stmt = 6 COMMA ++ DW_LNS_set_basic_block = 7 COMMA ++ DW_LNS_const_add_pc = 8 COMMA ++ DW_LNS_fixed_advance_pc = 9 COMMA ++ /* DWARF 3. */ ++ DW_LNS_set_prologue_end = 10 COMMA ++ DW_LNS_set_epilogue_begin = 11 COMMA ++ DW_LNS_set_isa = 12 ++IF_NOT_ASM(};) ++ ++/* Line number extended opcodes. */ ++ENUM(dwarf_line_number_x_ops) ++ ++ DW_LNE_end_sequence = 1 COMMA ++ DW_LNE_set_address = 2 COMMA ++ DW_LNE_define_file = 3 ++IF_NOT_ASM(};) ++ ++/* Call frame information. */ ++ENUM(dwarf_call_frame_info) ++ ++ DW_CFA_advance_loc = 0x40 COMMA ++ DW_CFA_offset = 0x80 COMMA ++ DW_CFA_restore = 0xc0 COMMA ++ DW_CFA_nop = 0x00 COMMA ++ DW_CFA_set_loc = 0x01 COMMA ++ DW_CFA_advance_loc1 = 0x02 COMMA ++ DW_CFA_advance_loc2 = 0x03 COMMA ++ DW_CFA_advance_loc4 = 0x04 COMMA ++ DW_CFA_offset_extended = 0x05 COMMA ++ DW_CFA_restore_extended = 0x06 COMMA ++ DW_CFA_undefined = 0x07 COMMA ++ DW_CFA_same_value = 0x08 COMMA ++ DW_CFA_register = 0x09 COMMA ++ DW_CFA_remember_state = 0x0a COMMA ++ DW_CFA_restore_state = 0x0b COMMA ++ DW_CFA_def_cfa = 0x0c COMMA ++ DW_CFA_def_cfa_register = 0x0d COMMA ++ DW_CFA_def_cfa_offset = 0x0e COMMA ++ ++ /* DWARF 3. */ ++ DW_CFA_def_cfa_expression = 0x0f COMMA ++ DW_CFA_expression = 0x10 COMMA ++ DW_CFA_offset_extended_sf = 0x11 COMMA ++ DW_CFA_def_cfa_sf = 0x12 COMMA ++ DW_CFA_def_cfa_offset_sf = 0x13 COMMA ++ ++ /* SGI/MIPS specific. */ ++ DW_CFA_MIPS_advance_loc8 = 0x1d COMMA ++ ++ /* GNU extensions. */ ++ DW_CFA_GNU_window_save = 0x2d COMMA ++ DW_CFA_GNU_args_size = 0x2e COMMA ++ DW_CFA_GNU_negative_offset_extended = 0x2f ++IF_NOT_ASM(};) ++ ++#define DW_CIE_ID 0xffffffff ++#define DW_CIE_VERSION 1 ++ ++#define DW_CFA_extended 0 ++#define DW_CFA_lo_user 0x1c ++#define DW_CFA_hi_user 0x3f ++ ++#define DW_CHILDREN_no 0x00 ++#define DW_CHILDREN_yes 0x01 ++ ++#define DW_ADDR_none 0 ++ ++/* Source language names and codes. */ ++ENUM(dwarf_source_language) ++ ++ DW_LANG_C89 = 0x0001 COMMA ++ DW_LANG_C = 0x0002 COMMA ++ DW_LANG_Ada83 = 0x0003 COMMA ++ DW_LANG_C_plus_plus = 0x0004 COMMA ++ DW_LANG_Cobol74 = 0x0005 COMMA ++ DW_LANG_Cobol85 = 0x0006 COMMA ++ DW_LANG_Fortran77 = 0x0007 COMMA ++ DW_LANG_Fortran90 = 0x0008 COMMA ++ DW_LANG_Pascal83 = 0x0009 COMMA ++ DW_LANG_Modula2 = 0x000a COMMA ++ DW_LANG_Java = 0x000b COMMA ++ /* DWARF 3. */ ++ DW_LANG_C99 = 0x000c COMMA ++ DW_LANG_Ada95 = 0x000d COMMA ++ DW_LANG_Fortran95 = 0x000e COMMA ++ /* MIPS. */ ++ DW_LANG_Mips_Assembler = 0x8001 COMMA ++ /* UPC. */ ++ DW_LANG_Upc = 0x8765 ++IF_NOT_ASM(};) ++ ++#define DW_LANG_lo_user 0x8000 /* Implementation-defined range start. */ ++#define DW_LANG_hi_user 0xffff /* Implementation-defined range start. */ ++ ++/* Names and codes for macro information. */ ++ENUM(dwarf_macinfo_record_type) ++ ++ DW_MACINFO_define = 1 COMMA ++ DW_MACINFO_undef = 2 COMMA ++ DW_MACINFO_start_file = 3 COMMA ++ DW_MACINFO_end_file = 4 COMMA ++ DW_MACINFO_vendor_ext = 255 ++IF_NOT_ASM(};) ++ ++/* @@@ For use with GNU frame unwind information. */ ++ ++#define DW_EH_PE_absptr 0x00 ++#define DW_EH_PE_omit 0xff ++ ++#define DW_EH_PE_uleb128 0x01 ++#define DW_EH_PE_udata2 0x02 ++#define DW_EH_PE_udata4 0x03 ++#define DW_EH_PE_udata8 0x04 ++#define DW_EH_PE_sleb128 0x09 ++#define DW_EH_PE_sdata2 0x0A ++#define DW_EH_PE_sdata4 0x0B ++#define DW_EH_PE_sdata8 0x0C ++#define DW_EH_PE_signed 0x08 ++ ++#define DW_EH_PE_pcrel 0x10 ++#define DW_EH_PE_textrel 0x20 ++#define DW_EH_PE_datarel 0x30 ++#define DW_EH_PE_funcrel 0x40 ++#define DW_EH_PE_aligned 0x50 ++ ++#define DW_EH_PE_indirect 0x80 ++ ++#endif /* _ELF_DWARF2_H */ +diff -puN /dev/null include/linux/dwarf2-lang.h +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/include/linux/dwarf2-lang.h 2004-10-21 14:54:15.337591824 -0700 +@@ -0,0 +1,132 @@ ++#ifndef DWARF2_LANG ++#define DWARF2_LANG ++#include ++ ++/* ++ * This is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License as published by the Free ++ * Software Foundation; either version 2, or (at your option) any later ++ * version. ++ */ ++/* ++ * This file defines macros that allow generation of DWARF debug records ++ * for asm files. This file is platform independent. Register numbers ++ * (which are about the only thing that is platform dependent) are to be ++ * supplied by a platform defined file. ++ */ ++#define DWARF_preamble() .section .debug_frame,"",@progbits ++/* ++ * This macro starts a debug frame section. The debug_frame describes ++ * where to find the registers that the enclosing function saved on ++ * entry. ++ * ++ * ORD is use by the label generator and should be the same as what is ++ * passed to CFI_postamble. ++ * ++ * pc, pc register gdb ordinal. ++ * ++ * code_align this is the factor used to define locations or regions ++ * where the given definitions apply. If you use labels to define these ++ * this should be 1. ++ * ++ * data_align this is the factor used to define register offsets. If ++ * you use struct offset, this should be the size of the register in ++ * bytes or the negative of that. This is how it is used: you will ++ * define a register as the reference register, say the stack pointer, ++ * then you will say where a register is located relative to this ++ * reference registers value, say 40 for register 3 (the gdb register ++ * number). The <40> will be multiplied by to define the ++ * byte offset of the given register (3, in this example). So if your ++ * <40> is the byte offset and the reference register points at the ++ * begining, you would want 1 for the data_offset. If <40> was the 40th ++ * 4-byte element in that structure you would want 4. And if your ++ * reference register points at the end of the structure you would want ++ * a negative data_align value(and you would have to do other math as ++ * well). ++ */ ++ ++#define CFI_preamble(ORD, pc, code_align, data_align) \ ++.section .debug_frame,"",@progbits ; \ ++frame/**/_/**/ORD: \ ++ .long end/**/_/**/ORD-start/**/_/**/ORD; \ ++start/**/_/**/ORD: \ ++ .long DW_CIE_ID; \ ++ .byte DW_CIE_VERSION; \ ++ .byte 0 ; \ ++ .uleb128 code_align; \ ++ .sleb128 data_align; \ ++ .byte pc; ++ ++/* ++ * After the above macro and prior to the CFI_postamble, you need to ++ * define the initial state. This starts with defining the reference ++ * register and, usually the pc. Here are some helper macros: ++ */ ++ ++#define CFA_define_reference(reg, offset) \ ++ .byte DW_CFA_def_cfa; \ ++ .uleb128 reg; \ ++ .uleb128 (offset); ++ ++#define CFA_define_offset(reg, offset) \ ++ .byte (DW_CFA_offset + reg); \ ++ .uleb128 (offset); ++ ++#define CFI_postamble(ORD) \ ++ .align 4; \ ++end/**/_/**/ORD: ++/* ++ * So now your code pushs stuff on the stack, you need a new location ++ * and the rules for what to do. This starts a running description of ++ * the call frame. You need to describe what changes with respect to ++ * the call registers as the location of the pc moves through the code. ++ * The following builds an FDE (fram descriptor entry?). Like the ++ * above, it has a preamble and a postamble. It also is tied to the CFI ++ * above. ++ * The first entry after the preamble must be the location in the code ++ * that the call frame is being described for. ++ */ ++#define FDE_preamble(ORD, fde_no, initial_address, length) \ ++ .long FDE_end/**/_/**/fde_no-FDE_start/**/_/**/fde_no; \ ++FDE_start/**/_/**/fde_no: \ ++ .long frame/**/_/**/ORD; \ ++ .long initial_address; \ ++ .long length; ++ ++#define FDE_postamble(fde_no) \ ++ .align 4; \ ++FDE_end/**/_/**/fde_no: ++/* ++ * That done, you can now add registers, subtract registers, move the ++ * reference and even change the reference. You can also define a new ++ * area of code the info applies to. For discontinuous bits you should ++ * start a new FDE. You may have as many as you like. ++ */ ++ ++/* ++ * To advance the address by ++ */ ++ ++#define FDE_advance(bytes) \ ++ .byte DW_CFA_advance_loc4 \ ++ .long bytes ++ ++ ++ ++/* ++ * With the above you can define all the register locations. But ++ * suppose the reference register moves... Takes the new offset NOT an ++ * increment. This is how esp is tracked if it is not saved. ++ */ ++ ++#define CFA_define_cfa_offset(offset) \ ++ .byte $DW_CFA_def_cfa_offset; \ ++ .uleb128 (offset); ++/* ++ * Or suppose you want to use a different reference register... ++ */ ++#define CFA_define_cfa_register(reg) \ ++ .byte DW_CFA_def_cfa_register; \ ++ .uleb128 reg; ++ ++#endif +diff -puN include/linux/serial_core.h~kgdb-ga include/linux/serial_core.h +--- 25/include/linux/serial_core.h~kgdb-ga 2004-10-21 14:54:15.282600184 -0700 ++++ 25-akpm/include/linux/serial_core.h 2004-10-21 14:54:15.338591672 -0700 +@@ -172,7 +172,9 @@ struct uart_port { + unsigned char x_char; /* xon/xoff char */ + unsigned char regshift; /* reg offset shift */ + unsigned char iotype; /* io access style */ +- ++#ifdef CONFIG_KGDB ++ int kgdb; /* in use by kgdb */ ++#endif + #define UPIO_PORT (0) + #define UPIO_HUB6 (1) + #define UPIO_MEM (2) +diff -puN include/linux/spinlock.h~kgdb-ga include/linux/spinlock.h +--- 25/include/linux/spinlock.h~kgdb-ga 2004-10-21 14:54:15.284599880 -0700 ++++ 25-akpm/include/linux/spinlock.h 2004-10-21 14:54:15.338591672 -0700 +@@ -15,6 +15,12 @@ + + #include /* for cpu relax */ + #include ++#ifdef CONFIG_KGDB ++#include ++#define SET_WHO(x, him) (x)->who = him; ++#else ++#define SET_WHO(x, him) ++#endif + + /* + * Must define these before including other files, inline functions need them +@@ -88,6 +94,9 @@ typedef struct { + const char *module; + char *owner; + int oline; ++#ifdef CONFIG_KGDB ++ struct task_struct *who; ++#endif + } spinlock_t; + #define SPIN_LOCK_UNLOCKED (spinlock_t) { SPINLOCK_MAGIC, 0, 10, __FILE__ , NULL, 0} + +@@ -99,6 +108,7 @@ typedef struct { + (x)->module = __FILE__; \ + (x)->owner = NULL; \ + (x)->oline = 0; \ ++ SET_WHO(x, NULL) \ + } while (0) + + #define CHECK_LOCK(x) \ +@@ -121,6 +131,7 @@ typedef struct { + (x)->lock = 1; \ + (x)->owner = __FILE__; \ + (x)->oline = __LINE__; \ ++ SET_WHO(x, current) \ + } while (0) + + /* without debugging, spin_is_locked on UP always says +@@ -151,6 +162,7 @@ typedef struct { + (x)->lock = 1; \ + (x)->owner = __FILE__; \ + (x)->oline = __LINE__; \ ++ SET_WHO(x, current) \ + 1; \ + }) + +diff -puN kernel/pid.c~kgdb-ga kernel/pid.c +--- 25/kernel/pid.c~kgdb-ga 2004-10-21 14:54:15.285599728 -0700 ++++ 25-akpm/kernel/pid.c 2004-10-21 14:54:15.339591520 -0700 +@@ -252,6 +252,9 @@ void switch_exec_pids(task_t *leader, ta + * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or + * more. + */ ++#ifdef CONFIG_KGDB ++int kgdb_pid_init_done; /* so we don't call prior to... */ ++#endif + void __init pidhash_init(void) + { + int i, j, pidhash_size; +@@ -273,6 +276,9 @@ void __init pidhash_init(void) + for (j = 0; j < pidhash_size; j++) + INIT_HLIST_HEAD(&pid_hash[i][j]); + } ++#ifdef CONFIG_KGDB ++ kgdb_pid_init_done++; ++#endif + } + + void __init pidmap_init(void) +diff -puN kernel/sched.c~kgdb-ga kernel/sched.c +--- 25/kernel/sched.c~kgdb-ga 2004-10-21 14:54:15.287599424 -0700 ++++ 25-akpm/kernel/sched.c 2004-10-21 14:54:15.342591064 -0700 +@@ -2931,6 +2931,13 @@ out_unlock: + + EXPORT_SYMBOL(set_user_nice); + ++#ifdef CONFIG_KGDB ++struct task_struct *kgdb_get_idle(int this_cpu) ++{ ++ return cpu_rq(this_cpu)->idle; ++} ++#endif ++ + #ifdef __ARCH_WANT_SYS_NICE + + /* +diff -puN MAINTAINERS~kgdb-ga MAINTAINERS +--- 25/MAINTAINERS~kgdb-ga 2004-10-21 14:54:15.288599272 -0700 ++++ 25-akpm/MAINTAINERS 2004-10-21 14:54:15.344590760 -0700 +@@ -1242,6 +1242,12 @@ W: http://sf.net/projects/kernel-janitor + W: http://developer.osdl.org/rddunlap/kj-patches/ + S: Maintained + ++KGDB FOR I386 PLATFORM ++P: George Anzinger ++M: george@mvista.com ++L: linux-net@vger.kernel.org ++S: Supported ++ + KERNEL NFSD + P: Neil Brown + M: neilb@cse.unsw.edu.au +diff -puN arch/i386/Kconfig.debug~kgdb-ga arch/i386/Kconfig.debug +--- 25/arch/i386/Kconfig.debug~kgdb-ga 2004-10-21 14:54:15.290598968 -0700 ++++ 25-akpm/arch/i386/Kconfig.debug 2004-10-21 14:54:15.344590760 -0700 +@@ -65,4 +65,6 @@ config X86_MPPARSE + depends on X86_LOCAL_APIC && !X86_VISWS + default y + ++source "arch/i386/Kconfig.kgdb" ++ + endmenu +diff -puN /dev/null arch/i386/Kconfig.kgdb +--- /dev/null 2003-09-15 06:40:47.000000000 -0700 ++++ 25-akpm/arch/i386/Kconfig.kgdb 2004-10-21 14:54:15.345590608 -0700 +@@ -0,0 +1,175 @@ ++config KGDB ++ bool "Include kgdb kernel debugger" ++ depends on DEBUG_KERNEL ++ help ++ If you say Y here, the system will be compiled with the debug ++ option (-g) and a debugging stub will be included in the ++ kernel. This stub communicates with gdb on another (host) ++ computer via a serial port. The host computer should have ++ access to the kernel binary file (vmlinux) and a serial port ++ that is connected to the target machine. Gdb can be made to ++ configure the serial port or you can use stty and setserial to ++ do this. See the 'target' command in gdb. This option also ++ configures in the ability to request a breakpoint early in the ++ boot process. To request the breakpoint just include 'kgdb' ++ as a boot option when booting the target machine. The system ++ will then break as soon as it looks at the boot options. This ++ option also installs a breakpoint in panic and sends any ++ kernel faults to the debugger. For more information see the ++ Documentation/i386/kgdb/kgdb.txt file. ++ ++choice ++ depends on KGDB ++ prompt "Debug serial port BAUD" ++ default KGDB_115200BAUD ++ help ++ Gdb and the kernel stub need to agree on the baud rate to be ++ used. Some systems (x86 family at this writing) allow this to ++ be configured. ++ ++config KGDB_9600BAUD ++ bool "9600" ++ ++config KGDB_19200BAUD ++ bool "19200" ++ ++config KGDB_38400BAUD ++ bool "38400" ++ ++config KGDB_57600BAUD ++ bool "57600" ++ ++config KGDB_115200BAUD ++ bool "115200" ++endchoice ++ ++config KGDB_PORT ++ hex "hex I/O port address of the debug serial port" ++ depends on KGDB ++ default 3f8 ++ help ++ Some systems (x86 family at this writing) allow the port ++ address to be configured. The number entered is assumed to be ++ hex, don't put 0x in front of it. The standard address are: ++ COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx ++ will tell you what you have. It is good to test the serial ++ connection with a live system before trying to debug. ++ ++config KGDB_IRQ ++ int "IRQ of the debug serial port" ++ depends on KGDB ++ default 4 ++ help ++ This is the irq for the debug port. If everything is working ++ correctly and the kernel has interrupts on a control C to the ++ port should cause a break into the kernel debug stub. ++ ++config DEBUG_INFO ++ bool ++ depends on KGDB ++ default y ++ ++config KGDB_MORE ++ bool "Add any additional compile options" ++ depends on KGDB ++ default n ++ help ++ Saying yes here turns on the ability to enter additional ++ compile options. ++ ++ ++config KGDB_OPTIONS ++ depends on KGDB_MORE ++ string "Additional compile arguments" ++ default "-O1" ++ help ++ This option allows you enter additional compile options for ++ the whole kernel compile. Each platform will have a default ++ that seems right for it. For example on PPC "-ggdb -O1", and ++ for i386 "-O1". Note that by configuring KGDB "-g" is already ++ turned on. In addition, on i386 platforms ++ "-fomit-frame-pointer" is deleted from the standard compile ++ options. ++ ++config NO_KGDB_CPUS ++ int "Number of CPUs" ++ depends on KGDB && SMP ++ default NR_CPUS ++ help ++ ++ This option sets the number of cpus for kgdb ONLY. It is used ++ to prune some internal structures so they look "nice" when ++ displayed with gdb. This is to overcome possibly larger ++ numbers that may have been entered above. Enter the real ++ number to get nice clean kgdb_info displays. ++ ++config KGDB_TS ++ bool "Enable kgdb time stamp macros?" ++ depends on KGDB ++ default n ++ help ++ Kgdb event macros allow you to instrument your code with calls ++ to the kgdb event recording function. The event log may be ++ examined with gdb at a break point. Turning on this ++ capability also allows you to choose how many events to ++ keep. Kgdb always keeps the lastest events. ++ ++choice ++ depends on KGDB_TS ++ prompt "Max number of time stamps to save?" ++ default KGDB_TS_128 ++ ++config KGDB_TS_64 ++ bool "64" ++ ++config KGDB_TS_128 ++ bool "128" ++ ++config KGDB_TS_256 ++ bool "256" ++ ++config KGDB_TS_512 ++ bool "512" ++ ++config KGDB_TS_1024 ++ bool "1024" ++ ++endchoice ++ ++config STACK_OVERFLOW_TEST ++ bool "Turn on kernel stack overflow testing?" ++ depends on KGDB ++ default n ++ help ++ This option enables code in the front line interrupt handlers ++ to check for kernel stack overflow on interrupts and system ++ calls. This is part of the kgdb code on x86 systems. ++ ++config KGDB_CONSOLE ++ bool "Enable serial console thru kgdb port" ++ depends on KGDB ++ default n ++ help ++ This option enables the command line "console=kgdb" option. ++ When the system is booted with this option in the command line ++ all kernel printk output is sent to gdb (as well as to other ++ consoles). For this to work gdb must be connected. For this ++ reason, this command line option will generate a breakpoint if ++ gdb has not yet connected. After the gdb continue command is ++ given all pent up console output will be printed by gdb on the ++ host machine. Neither this option, nor KGDB require the ++ serial driver to be configured. ++ ++config KGDB_SYSRQ ++ bool "Turn on SysRq 'G' command to do a break?" ++ depends on KGDB ++ default y ++ help ++ This option includes an option in the SysRq code that allows ++ you to enter SysRq G which generates a breakpoint to the KGDB ++ stub. This will work if the keyboard is alive and can ++ interrupt the system. Because of constraints on when the ++ serial port interrupt can be enabled, this code may allow you ++ to interrupt the system before the serial port control C is ++ available. Just say yes here. ++ +_ diff --git a/lustre/kernel_patches/series/2.6-rhel4.series b/lustre/kernel_patches/series/2.6-rhel4.series index 9b5c368..5b03878 100644 --- a/lustre/kernel_patches/series/2.6-rhel4.series +++ b/lustre/kernel_patches/series/2.6-rhel4.series @@ -19,3 +19,4 @@ qsnet-rhel4-2.6.patch linux-2.6-binutils-2.16.patch compile-fixes-2.6.9-rhel4-22.patch vm-tunables-rhel4.patch +2.6-rhel4-kgdb-ga.patch diff --git a/lustre/mdt/Makefile.in b/lustre/mdt/Makefile.in new file mode 100644 index 0000000..468e35f --- /dev/null +++ b/lustre/mdt/Makefile.in @@ -0,0 +1,4 @@ +MODULES := mdt +mdt-objs := mdt_handler.o + +@INCLUDE_RULES@ diff --git a/lustre/mdt/autoMakefile.am b/lustre/mdt/autoMakefile.am new file mode 100644 index 0000000..22c79cf --- /dev/null +++ b/lustre/mdt/autoMakefile.am @@ -0,0 +1,11 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if MODULES +modulefs_DATA = mdt$(KMODEXT) +endif + +MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ +DIST_SOURCES := $(mdt-objs:%.o=%.c) #mdt.h diff --git a/lustre/mdt/mdt.h b/lustre/mdt/mdt.h new file mode 100644 index 0000000..5d28baa --- /dev/null +++ b/lustre/mdt/mdt.h @@ -0,0 +1,88 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ + +#ifndef _MDT_H +#define _MDT_H + +#if defined(__KERNEL__) + +#include + +#define LUSTRE_MDT0_NAME "mdt0" + +struct md_device_operations; + +struct ptlrpc_service_conf { + int psc_nbufs; + int psc_bufsize; + int psc_max_req_size; + int psc_max_reply_size; + int psc_req_portal; + int psc_rep_portal; + int psc_watchdog_timeout; /* in ms */ + int psc_num_threads; +}; + +struct md_device { + struct lu_device md_lu_dev; + struct md_device_operations *md_ops; +}; + +struct md_device_operations { + int (*mdo_root_get)(struct md_device *m, struct lfid *f); +}; + +struct mdt_device { + /* super-class */ + struct md_device mdt_md_dev; + struct ptlrpc_service *mdt_service; + struct ptlrpc_service_conf mdt_service_conf; + /* DLM name-space for meta-data locks maintained by this server */ + struct ldlm_namespace *mdt_namespace; + /* DLM handle for MDS->client connections (for lock ASTs). */ + struct ldlm_client mdt_ldlm_client; + /* underlying device */ + struct md_device *mdt_mdd; +}; + +/* + * Meta-data stacking. + */ + +struct md_object; +struct md_device; + +struct md_object { + struct lu_object mo_lu; +}; + +static inline struct md_object *lu2md(struct lu_object *o) +{ + return container_of(o, struct md_object, mo_lu); +} + +static inline struct md_device *md_device_get(struct md_object *o) +{ + return container_of(o->mo_lu.lo_dev, struct md_device, md_lu); +} + +struct mdt_object { + struct lu_object_header mot_header; + struct md_object mot_obj; +}; + +struct mdd_object { + struct md_object mod_obj; +}; + +struct osd_object { + struct lu_object oo_lu; + struct dentry *oo_dentry; +}; + +int md_device_init(struct md_device *md); +void md_device_fini(struct md_device *md); + +#endif /* __KERNEL__ */ +#endif /* _MDT_H */ diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c new file mode 100644 index 0000000..c387e27 --- /dev/null +++ b/lustre/mdt/mdt_handler.c @@ -0,0 +1,1273 @@ +#if 0 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/mds/handler.c + * Lustre Metadata Target (mdt) request handler + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Peter Braam + * Author: Andreas Dilger + * Author: Phil Schwan + * Author: Mike Shaver + * Author: Nikita Danilov + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include +#include + +#include "mdt.h" + +int mdt_num_threads; + +static int mdt_connect_internal(struct obd_export *exp, + struct obd_connect_data *data) +{ + struct obd_device *obd = exp->exp_obd; + if (data != NULL) { + data->ocd_connect_flags &= MDS_CONNECT_SUPPORTED; + data->ocd_ibits_known &= MDS_INODELOCK_FULL; + + /* If no known bits (which should not happen, probably, + as everybody should support LOOKUP and UPDATE bits at least) + revert to compat mode with plain locks. */ + if (!data->ocd_ibits_known && + data->ocd_connect_flags & OBD_CONNECT_IBITS) + data->ocd_connect_flags &= ~OBD_CONNECT_IBITS; + + if (!obd->u.mds.mdt_fl_acl) + data->ocd_connect_flags &= ~OBD_CONNECT_ACL; + + if (!obd->u.mds.mdt_fl_user_xattr) + data->ocd_connect_flags &= ~OBD_CONNECT_XATTR; + + exp->exp_connect_flags = data->ocd_connect_flags; + data->ocd_version = LUSTRE_VERSION_CODE; + exp->exp_mdt_data.med_ibits_known = data->ocd_ibits_known; + } + + if (obd->u.mds.mdt_fl_acl && + ((exp->exp_connect_flags & OBD_CONNECT_ACL) == 0)) { + CWARN("%s: MDS requires ACL support but client does not\n", + obd->obd_name); + return -EBADE; + } + return 0; +} + +static int mdt_reconnect(struct obd_export *exp, struct obd_device *obd, + struct obd_uuid *cluuid, + struct obd_connect_data *data) +{ + int rc; + ENTRY; + + if (exp == NULL || obd == NULL || cluuid == NULL) + RETURN(-EINVAL); + + rc = mdt_connect_internal(exp, data); + + RETURN(rc); +} + +/* Establish a connection to the MDS. + * + * This will set up an export structure for the client to hold state data + * about that client, like open files, the last operation number it did + * on the server, etc. + */ +static int mdt_connect(struct lustre_handle *conn, struct obd_device *obd, + struct obd_uuid *cluuid, struct obd_connect_data *data) +{ + struct obd_export *exp; + struct mdt_export_data *med; + struct mdt_client_data *mcd = NULL; + int rc, abort_recovery; + ENTRY; + + if (!conn || !obd || !cluuid) + RETURN(-EINVAL); + + /* Check for aborted recovery. */ + spin_lock_bh(&obd->obd_processing_task_lock); + abort_recovery = obd->obd_abort_recovery; + spin_unlock_bh(&obd->obd_processing_task_lock); + if (abort_recovery) + target_abort_recovery(obd); + + /* XXX There is a small race between checking the list and adding a + * new connection for the same UUID, but the real threat (list + * corruption when multiple different clients connect) is solved. + * + * There is a second race between adding the export to the list, + * and filling in the client data below. Hence skipping the case + * of NULL mcd above. We should already be controlling multiple + * connects at the client, and we can't hold the spinlock over + * memory allocations without risk of deadlocking. + */ + rc = class_connect(conn, obd, cluuid); + if (rc) + RETURN(rc); + exp = class_conn2export(conn); + LASSERT(exp); + med = &exp->exp_mdt_data; + + rc = mdt_connect_internal(exp, data); + if (rc) + GOTO(out, rc); + + OBD_ALLOC_PTR(mcd); + if (!mcd) + GOTO(out, rc = -ENOMEM); + + memcpy(mcd->mcd_uuid, cluuid, sizeof(mcd->mcd_uuid)); + med->med_mcd = mcd; + + rc = mdt_client_add(obd, &obd->u.mds, med, -1); + GOTO(out, rc); + +out: + if (rc) { + if (mcd) { + OBD_FREE_PTR(mcd); + med->med_mcd = NULL; + } + class_disconnect(exp); + } else { + class_export_put(exp); + } + + RETURN(rc); +} + +int mdt_init_export(struct obd_export *exp) +{ + struct mdt_export_data *med = &exp->exp_mdt_data; + + INIT_LIST_HEAD(&med->med_open_head); + spin_lock_init(&med->med_open_lock); + exp->exp_connecting = 1; + RETURN(0); +} + +static int mdt_destroy_export(struct obd_export *export) +{ + struct mdt_export_data *med; + struct obd_device *obd = export->exp_obd; + struct lvfs_run_ctxt saved; + int rc = 0; + ENTRY; + + med = &export->exp_mdt_data; + target_destroy_export(export); + + if (obd_uuid_equals(&export->exp_client_uuid, &obd->obd_uuid)) + RETURN(0); + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + /* Close any open files (which may also cause orphan unlinking). */ + spin_lock(&med->med_open_lock); + while (!list_empty(&med->med_open_head)) { + struct list_head *tmp = med->med_open_head.next; + struct mdt_file_data *mfd = + list_entry(tmp, struct mdt_file_data, mfd_list); + struct dentry *dentry = mfd->mfd_dentry; + + /* Remove mfd handle so it can't be found again. + * We are consuming the mfd_list reference here. */ + mdt_mfd_unlink(mfd, 0); + spin_unlock(&med->med_open_lock); + + /* If you change this message, be sure to update + * replay_single:test_46 */ + CDEBUG(D_INODE|D_IOCTL, "%s: force closing file handle for " + "%.*s (ino %lu)\n", obd->obd_name, dentry->d_name.len, + dentry->d_name.name, dentry->d_inode->i_ino); + /* child orphan sem protects orphan_dec_test and + * is_orphan race, mdt_mfd_close drops it */ + MDT_DOWN_WRITE_ORPHAN_SEM(dentry->d_inode); + rc = mdt_mfd_close(NULL, MDS_REQ_REC_OFF, obd, mfd, + !(export->exp_flags & OBD_OPT_FAILOVER)); + + if (rc) + CDEBUG(D_INODE|D_IOCTL, "Error closing file: %d\n", rc); + spin_lock(&med->med_open_lock); + } + spin_unlock(&med->med_open_lock); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + mdt_client_free(export); + + RETURN(rc); +} + +static int mdt_disconnect(struct obd_export *exp) +{ + unsigned long irqflags; + int rc; + ENTRY; + + LASSERT(exp); + class_export_get(exp); + + /* Disconnect early so that clients can't keep using export */ + rc = class_disconnect(exp); + ldlm_cancel_locks_for_export(exp); + + /* complete all outstanding replies */ + spin_lock_irqsave(&exp->exp_lock, irqflags); + while (!list_empty(&exp->exp_outstanding_replies)) { + struct ptlrpc_reply_state *rs = + list_entry(exp->exp_outstanding_replies.next, + struct ptlrpc_reply_state, rs_exp_list); + struct ptlrpc_service *svc = rs->rs_service; + + spin_lock(&svc->srv_lock); + list_del_init(&rs->rs_exp_list); + ptlrpc_schedule_difficult_reply(rs); + spin_unlock(&svc->srv_lock); + } + spin_unlock_irqrestore(&exp->exp_lock, irqflags); + + class_export_put(exp); + RETURN(rc); +} + +static int mdt_getstatus(struct mdt_thread_info *info, + struct ptlrpc_request *req) +{ + struct md_device *mdd = info->mti_mdt->mdt_mdd; + int size = sizeof *body; + struct mds_body *body; + int result; + + ENTRY; + + result = lustre_pack_reply(req, 1, &size, NULL); + if (result) + CERROR(LUSTRE_MDT0_NAME" out of memory for message: size=%d\n", + size); + else if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETSTATUS_PACK)) + result = -ENOMEM; + else { + body = lustre_msg_buf(req->rq_repmsg, 0, sizeof *body); + result = mdd->md_ops->mdo_root_get(mdd, &body->fid1); + } + + /* the last_committed and last_xid fields are filled in for all + * replies already - no need to do so here also. + */ + RETURN(result); +} + +static int mdt_getattr_internal(struct obd_device *obd, struct dentry *dentry, + struct ptlrpc_request *req, + struct mds_body *reqbody, int reply_off) +{ + struct mds_body *body; + struct inode *inode = dentry->d_inode; + int rc = 0; + ENTRY; + + if (inode == NULL) + RETURN(-ENOENT); + + body = lustre_msg_buf(req->rq_repmsg, reply_off, sizeof(*body)); + LASSERT(body != NULL); /* caller prepped reply */ + + mdt_pack_inode2fid(&body->fid1, inode); + mdt_pack_inode2body(body, inode); + reply_off++; + + if ((S_ISREG(inode->i_mode) && (reqbody->valid & OBD_MD_FLEASIZE)) || + (S_ISDIR(inode->i_mode) && (reqbody->valid & OBD_MD_FLDIREA))) { + rc = mdt_pack_md(obd, req->rq_repmsg, reply_off, body, + inode, 1); + + /* If we have LOV EA data, the OST holds size, atime, mtime */ + if (!(body->valid & OBD_MD_FLEASIZE) && + !(body->valid & OBD_MD_FLDIREA)) + body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLATIME | OBD_MD_FLMTIME); + + lustre_shrink_reply(req, reply_off, body->eadatasize, 0); + if (body->eadatasize) + reply_off++; + } else if (S_ISLNK(inode->i_mode) && + (reqbody->valid & OBD_MD_LINKNAME) != 0) { + char *symname = lustre_msg_buf(req->rq_repmsg, reply_off, 0); + int len; + + LASSERT (symname != NULL); /* caller prepped reply */ + len = req->rq_repmsg->buflens[reply_off]; + + rc = inode->i_op->readlink(dentry, symname, len); + if (rc < 0) { + CERROR("readlink failed: %d\n", rc); + } else if (rc != len - 1) { + CERROR ("Unexpected readlink rc %d: expecting %d\n", + rc, len - 1); + rc = -EINVAL; + } else { + CDEBUG(D_INODE, "read symlink dest %s\n", symname); + body->valid |= OBD_MD_LINKNAME; + body->eadatasize = rc + 1; + symname[rc] = 0; /* NULL terminate */ + rc = 0; + } + reply_off++; + } + + if (reqbody->valid & OBD_MD_FLMODEASIZE) { + struct mdt_obd *mds = mdt_req2mds(req); + body->max_cookiesize = mds->mdt_max_cookiesize; + body->max_mdsize = mds->mdt_max_mdsize; + body->valid |= OBD_MD_FLMODEASIZE; + } + + if (rc) + RETURN(rc); + + RETURN(rc); +} + +static int mdt_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, + int offset) +{ + struct mdt_obd *mds = mdt_req2mds(req); + struct mds_body *body; + int rc, size[2] = {sizeof(*body)}, bufcount = 1; + ENTRY; + + body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body)); + LASSERT(body != NULL); /* checked by caller */ + LASSERT_REQSWABBED(req, offset); /* swabbed by caller */ + + if ((S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) || + (S_ISDIR(inode->i_mode) && (body->valid & OBD_MD_FLDIREA))) { + LOCK_INODE_MUTEX(inode); + rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0, + "lov"); + UNLOCK_INODE_MUTEX(inode); + CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n", + rc, inode->i_ino); + if (rc < 0) { + if (rc != -ENODATA) { + CERROR("error getting inode %lu MD: rc = %d\n", + inode->i_ino, rc); + RETURN(rc); + } + size[bufcount] = 0; + } else if (rc > mds->mdt_max_mdsize) { + size[bufcount] = 0; + CERROR("MD size %d larger than maximum possible %u\n", + rc, mds->mdt_max_mdsize); + } else { + size[bufcount] = rc; + } + bufcount++; + } else if (S_ISLNK(inode->i_mode) && (body->valid & OBD_MD_LINKNAME)) { + if (inode->i_size + 1 != body->eadatasize) + CERROR("symlink size: %Lu, reply space: %d\n", + inode->i_size + 1, body->eadatasize); + size[bufcount] = min_t(int, inode->i_size+1, body->eadatasize); + bufcount++; + CDEBUG(D_INODE, "symlink size: %Lu, reply space: %d\n", + inode->i_size + 1, body->eadatasize); + } + + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETATTR_PACK)) { + CERROR("failed MDT_GETATTR_PACK test\n"); + req->rq_status = -ENOMEM; + RETURN(-ENOMEM); + } + + rc = lustre_pack_reply(req, bufcount, size, NULL); + if (rc) { + CERROR("lustre_pack_reply failed: rc %d\n", rc); + req->rq_status = rc; + RETURN(rc); + } + + RETURN(0); +} + +static int mdt_getattr_name(int offset, struct ptlrpc_request *req, + int child_part, struct lustre_handle *child_lockh) +{ + struct obd_device *obd = req->rq_export->exp_obd; + struct mdt_obd *mds = &obd->u.mds; + struct ldlm_reply *rep = NULL; + struct lvfs_run_ctxt saved; + struct mds_body *body; + struct dentry *dparent = NULL, *dchild = NULL; + struct lvfs_ucred uc = {NULL,}; + struct lustre_handle parent_lockh; + int namesize; + int rc = 0, cleanup_phase = 0, resent_req = 0; + char *name; + ENTRY; + + LASSERT(!strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME)); + + /* Swab now, before anyone looks inside the request */ + + body = lustre_swab_reqbuf(req, offset, sizeof(*body), + lustre_swab_mdt_body); + if (body == NULL) { + CERROR("Can't swab mdt_body\n"); + RETURN(-EFAULT); + } + + LASSERT_REQSWAB(req, offset + 1); + name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0); + if (name == NULL) { + CERROR("Can't unpack name\n"); + RETURN(-EFAULT); + } + namesize = lustre_msg_buflen(req->rq_reqmsg, offset + 1); + + rc = mdt_init_ucred(&uc, req, offset); + if (rc) + GOTO(cleanup, rc); + + LASSERT (offset == MDS_REQ_REC_OFF || offset == MDS_REQ_INTENT_REC_OFF); + /* if requests were at offset 2, the getattr reply goes back at 1 */ + if (offset == MDS_REQ_INTENT_REC_OFF) { + rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); + offset = 1; + } + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); + cleanup_phase = 1; /* kernel context */ + intent_set_disposition(rep, DISP_LOOKUP_EXECD); + + if (lustre_handle_is_used(child_lockh)) { + LASSERT(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT); + resent_req = 1; + } + + if (resent_req == 0) { + if (name) { + rc = mdt_get_parent_child_locked(obd, &obd->u.mds, &body->fid1, + &parent_lockh, &dparent, + LCK_CR, + MDS_INODELOCK_UPDATE, + name, namesize, + child_lockh, &dchild, LCK_CR, + child_part); + } else { + /* For revalidate by fid we always take UPDATE lock */ + dchild = mdt_fid2locked_dentry(obd, &body->fid2, NULL, + LCK_CR, child_lockh, + NULL, 0, + MDT_INODELOCK_UPDATE); + LASSERT(dchild); + if (IS_ERR(dchild)) + rc = PTR_ERR(dchild); + } + if (rc) + GOTO(cleanup, rc); + } else { + struct ldlm_lock *granted_lock; + struct ll_fid child_fid; + struct ldlm_resource *res; + DEBUG_REQ(D_DLMTRACE, req, "resent, not enqueuing new locks"); + granted_lock = ldlm_handle2lock(child_lockh); + LASSERTF(granted_lock != NULL, LPU64"/%u lockh "LPX64"\n", + body->fid1.id, body->fid1.generation, + child_lockh->cookie); + + + res = granted_lock->l_resource; + child_fid.id = res->lr_name.name[0]; + child_fid.generation = res->lr_name.name[1]; + dchild = mdt_fid2dentry(&obd->u.mds, &child_fid, NULL); + LASSERT(!IS_ERR(dchild)); + LDLM_LOCK_PUT(granted_lock); + } + + cleanup_phase = 2; /* dchild, dparent, locks */ + + if (dchild->d_inode == NULL) { + intent_set_disposition(rep, DISP_LOOKUP_NEG); + /* in the intent case, the policy clears this error: + the disposition is enough */ + GOTO(cleanup, rc = -ENOENT); + } else { + intent_set_disposition(rep, DISP_LOOKUP_POS); + } + + if (req->rq_repmsg == NULL) { + rc = mdt_getattr_pack_msg(req, dchild->d_inode, offset); + if (rc != 0) { + CERROR ("mdt_getattr_pack_msg: %d\n", rc); + GOTO (cleanup, rc); + } + } + + rc = mdt_getattr_internal(obd, dchild, req, body, offset); + GOTO(cleanup, rc); /* returns the lock to the client */ + + cleanup: + switch (cleanup_phase) { + case 2: + if (resent_req == 0) { + if (rc && dchild->d_inode) + ldlm_lock_decref(child_lockh, LCK_CR); + ldlm_lock_decref(&parent_lockh, LCK_CR); + l_dput(dparent); + } + l_dput(dchild); + case 1: + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); + default: + mds_exit_ucred(&uc, mds); + if (req->rq_reply_state == NULL) { + req->rq_status = rc; + lustre_pack_reply(req, 0, NULL, NULL); + } + } + return rc; +} + +static int mds_getattr(struct ptlrpc_request *req, int offset) +{ + struct mds_obd *mds = mds_req2mds(req); + struct obd_device *obd = req->rq_export->exp_obd; + struct lvfs_run_ctxt saved; + struct dentry *de; + struct mds_body *body; + struct lvfs_ucred uc = {NULL,}; + int rc = 0; + ENTRY; + + body = lustre_swab_reqbuf(req, offset, sizeof(*body), + lustre_swab_mds_body); + if (body == NULL) + RETURN(-EFAULT); + + rc = mds_init_ucred(&uc, req, offset); + if (rc) + GOTO(out_ucred, rc); + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); + de = mds_fid2dentry(mds, &body->fid1, NULL); + if (IS_ERR(de)) { + rc = req->rq_status = PTR_ERR(de); + GOTO(out_pop, rc); + } + + rc = mds_getattr_pack_msg(req, de->d_inode, offset); + if (rc != 0) { + CERROR("mds_getattr_pack_msg: %d\n", rc); + GOTO(out_pop, rc); + } + + req->rq_status = mds_getattr_internal(obd, de, req, body, 0); + + l_dput(de); + GOTO(out_pop, rc); +out_pop: + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); +out_ucred: + if (req->rq_reply_state == NULL) { + req->rq_status = rc; + lustre_pack_reply(req, 0, NULL, NULL); + } + mds_exit_ucred(&uc, mds); + return rc; +} + + +static int mds_obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, + unsigned long max_age) +{ + int rc; + + spin_lock(&obd->obd_osfs_lock); + rc = fsfilt_statfs(obd, obd->u.obt.obt_sb, max_age); + if (rc == 0) + memcpy(osfs, &obd->obd_osfs, sizeof(*osfs)); + spin_unlock(&obd->obd_osfs_lock); + + return rc; +} + +static int mds_statfs(struct ptlrpc_request *req) +{ + struct obd_device *obd = req->rq_export->exp_obd; + int rc, size = sizeof(struct obd_statfs); + ENTRY; + + /* This will trigger a watchdog timeout */ + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP, + (MDS_SERVICE_WATCHDOG_TIMEOUT / 1000) + 1); + + rc = lustre_pack_reply(req, 1, &size, NULL); + if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_STATFS_PACK)) { + CERROR("mds: statfs lustre_pack_reply failed: rc = %d\n", rc); + GOTO(out, rc); + } + + /* We call this so that we can cache a bit - 1 jiffie worth */ + rc = mds_obd_statfs(obd, lustre_msg_buf(req->rq_repmsg, 0, size), + jiffies - HZ); + if (rc) { + CERROR("mds_obd_statfs failed: rc %d\n", rc); + GOTO(out, rc); + } + + EXIT; +out: + req->rq_status = rc; + return 0; +} + +static int mds_set_info(struct obd_export *exp, struct ptlrpc_request *req) +{ + char *key; + __u32 *val; + int keylen, rc = 0; + ENTRY; + + key = lustre_msg_buf(req->rq_reqmsg, 0, 1); + if (key == NULL) { + DEBUG_REQ(D_HA, req, "no set_info key"); + RETURN(-EFAULT); + } + keylen = req->rq_reqmsg->buflens[0]; + + val = lustre_msg_buf(req->rq_reqmsg, 1, sizeof(*val)); + if (val == NULL) { + DEBUG_REQ(D_HA, req, "no set_info val"); + RETURN(-EFAULT); + } + + rc = lustre_pack_reply(req, 0, NULL, NULL); + if (rc) + RETURN(rc); + req->rq_repmsg->status = 0; + + if (keylen < strlen("read-only") || + memcmp(key, "read-only", keylen) != 0) + RETURN(-EINVAL); + + if (*val) + exp->exp_connect_flags |= OBD_CONNECT_RDONLY; + else + exp->exp_connect_flags &= ~OBD_CONNECT_RDONLY; + + RETURN(0); +} + +enum mdt_handler_flags { + /* + * struct mds_body is passed in the 0-th incoming buffer. + */ + HABEO_CORPUS = (1 << 0) +}; + +struct mdt_handler { + const char *mh_name; + int mh_fail_id; + __u32 mh_opc; + __u32 mh_flags; + int (*mh_act)(struct mdt_thread_info *info, struct ptlrpc_request *req); +}; + +#define DEF_HNDL(prefix, base, flags, name, fn) \ +[prefix ## name - prefix ## base] = { \ + .mh_name = #name, \ + .mh_fail_id = OBD_FAIL_ ## prefix ## _ ## opc ## _NET, \ + .mh_opc = prefix ## _ ## opc, \ + .mh_flags = flags, \ + .mh_act = fn \ +} + +#define DEF_MDT_HNDL(flags, name, fn) DEF_HNDL(mdt, CONNECT, flags, name, fn) + +static struct mdt_handler mdt_mds_ops[] = { + DEF_MDT_HNDL(0, CONNECT, mdt_connect), + DEF_MDT_HNDL(0, DISCONNECT, mdt_disconnect), + DEF_MDT_HNDL(0, GETSTATUS, mdt_getstatus), + DEF_MDT_HNDL(HABEO_CORPUS, GETATTR, mdt_getattr), + DEF_MDT_HNDL(HABEO_CORPUS, GETATTR_NAME, mdt_getattr_name), + DEF_MDT_HNDL(HABEO_CORPUS, SETXATTR, mdt_setxattr), + DEF_MDT_HNDL(HABEO_CORPUS, GETXATTR, mdt_getxattr), + DEF_MDT_HNDL(0, STATFS, mdt_statfs), + DEF_MDT_HNDL(HABEO_CORPUS, READPAGE, mdt_readpage), + DEF_MDT_HNDL(0, REINT, mdt_reint), + DEF_MDT_HNDL(HABEO_CORPUS, CLOSE, mdt_close), + DEF_MDT_HNDL(HABEO_CORPUS, DONE_WRITING, mdt_done_writing), + DEF_MDT_HNDL(0, PIN, mdt_pin), + DEF_MDT_HNDL(HABEO_CORPUS, SYNC, mdt_sync), + DEF_MDT_HNDL(0, 0 /*SET_INFO*/, mdt_set_info), + DEF_MDT_HNDL(0, QUOTACHECK, mdt_handle_quotacheck), + DEF_MDT_HNDL(0, QUOTACTL, mdt_handle_quotactl) +}; + +static struct mdt_handler mdt_obd_ops[] = { +}; + +static struct mdt_handler mdt_dlm_ops[] = { +}; + +static struct mdt_handler mdt_llog_ops[] = { +}; + +static struct mdt_opc_slice { + __u32 mos_opc_start; + int mos_opc_end; + struct mdt_handler *mos_hs; +} mdt_handlers[] = { + { + .mos_opc_start = MDS_GETATTR, + .mos_opc_end = MDS_LAST_OPC, + .mos_hs = mdt_mds_ops + }, + { + .mos_opc_start = OBD_PING, + .mos_opc_end = OBD_LAST_OPC, + .mos_hs = mdt_obd_ops + }, + { + .mos_opc_start = LDLM_ENQUEUE, + .mos_opc_end = LDLM_LAST_OPC, + .mos_hs = mdt_dlm_ops + }, + { + .mos_opc_start = LLOG_ORIGIN_HANDLE_CREATE, + .mos_opc_end = LLOG_LAST_OPC, + .mos_hs = mdt_llog_ops + } +}; + +enum { + MDT_REP_BUF_NR_MAX = 8 +}; + +/* + * Common data shared by mdt-level handlers. This is allocated per-thread to + * reduce stack consumption. + */ +struct mdt_thread_info { + struct mdt_device *mti_mdt; + /* + * number of buffers in reply message. + */ + int mti_rep_buf_nr; + /* + * sizes of reply buffers. + */ + int mti_rep_buf_size[MDT_REP_BUF_NR_MAX]; + /* + * Body for "habeo corpus" operations. + */ + struct mds_body *mti_body; + /* + * Host object. This is released at the end of mdt_handler(). + */ + struct mdt_object *mti_object; + /* + * Additional fail id that can be set by handler. Passed to + * target_send_reply(). + */ + int mti_fail_id; + /* + * Offset of incoming buffers. 0 for top-level request processing. +ve + * for intent handling. + */ + int mti_offset; +}; + +struct mdt_handler *mdt_handler_find(__u32 opc) +{ + int i; + struct mdt_opc_slice *s; + struct mdt_handler *h; + + h = NULL; + for (i = 0, s = mdt_handlers; i < ARRAY_SIZE(mdt_handlers); i++, s++) { + if (s->mos_opc_start <= opc && opc < s->mos_opc_end) { + h = s->mos_hs + (opc - s->mos_opc_start); + if (h->mos_opc != 0) + LASSERT(h->mos_opc == opc); + else + h = NULL; /* unsupported opc */ + break; + } + } + return h; +} + +struct mdt_object *mdt_object_find(struct mdt_device *d, struct lfid *f) +{ + struct lu_object *o; + + o = lu_object_find(&d->mdt_lu_dev.ld_site, f); + if (IS_ERR(o)) + return (struct mdd_object *)o; + else + return container_of(o, struct mdt_object, mot_obj.mo_lu); +} + +void mdt_object_put(struct mdt_object *o) +{ + lu_object_put(&o->mot_obj.mo_lu); +} + +static int mdt_req_handle(struct mdt_thread_info *info, + struct mdt_handler *h, struct ptlrpc_request *req, + int shift) +{ + int result; + + ENTRY; + + LASSERT(h->mh_act != NULL); + LASSERT(h->mh_opc == req->rq_reqmsg->opc); + + DEBUG_REQ(D_INODE, req, "%s", h->mh_name); + + if (h->mh_fail_id != 0) + OBD_FAIL_RETURN(h->mh_fail_id, 0); + + h->mh_offset = MDS_REQ_REC_OFF + shift; + if (h->mh_flags & HABEO_CORPUS) { + info->mti_body = lustre_swab_reqbuf(req, h->mh_offset, + sizeof *info->mti_body, + lustre_swab_mds_body); + if (info->mti_body == NULL) { + CERROR("Can't unpack body\n"); + result = req->rq_status = -EFAULT; + } + info->mti_object = mdt_object_find(info->mti_mdt, + info->mti_body.fid1); + if (IS_ERR(info->mti_object)) + result = PTR_ERR(info->mti_object); + } + if (result == 0) + result = h->mh_act(info, h, req); + /* + * XXX result value is unconditionally shoved into ->rq_status + * (original code sometimes placed error code into ->rq_status, and + * sometimes returned it to the + * caller). ptlrpc_server_handle_request() doesn't check return value + * anyway. + */ + req->rq_status = result; + RETURN(result); +} + +static void mdt_thread_info_init(struct mdt_thread_info *info) +{ + memset(info, 0, sizeof *info); + info->mti_fail_id = OBD_FAIL_MDS_ALL_REPLY_NET; + /* + * Poison size array. + */ + for (info->mti_rep_buf_nr = 0; + info->mti_rep_buf_nr < MDT_REP_BUF_NR_MAX; info->mti_rep_buf_nr++) + info->mti_rep_buf_size[info->mti_rep_buf_nr] = ~0; +} + +static void mdt_thread_info_fini(struct mdt_thread_info *info) +{ + if (info->mti_object != NULL) { + mdt_object_put(info->mti_object); + info->mti_object = NULL; + } +} + +int mdt_handle(struct ptlrpc_request *req) +{ + int should_process, + int rc = 0; + struct mds_obd *mds = NULL; /* quell gcc overwarning */ + struct obd_device *obd = NULL; + struct mdt_thread_info info; /* XXX on stack for now */ + struct mdt_handler *h; + + ENTRY; + + OBD_FAIL_RETURN(OBD_FAIL_MDS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0); + + LASSERT(current->journal_info == NULL); + + rc = mds_msg_check_version(req->rq_reqmsg); + if (rc) { + CERROR(LUSTRE_MDT0_NAME" drops mal-formed request\n"); + RETURN(rc); + } + + /* XXX identical to OST */ + if (req->rq_reqmsg->opc != MDS_CONNECT) { + struct mds_export_data *med; + int recovering, abort_recovery; + + if (req->rq_export == NULL) { + CERROR("operation %d on unconnected MDS from %s\n", + req->rq_reqmsg->opc, + libcfs_id2str(req->rq_peer)); + req->rq_status = -ENOTCONN; + GOTO(out, rc = -ENOTCONN); + } + + med = &req->rq_export->exp_mds_data; + obd = req->rq_export->exp_obd; + mds = &obd->u.mds; + + /* sanity check: if the xid matches, the request must + * be marked as a resent or replayed */ + if (req->rq_xid == med->med_mcd->mcd_last_xid) + LASSERTF(lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_RESENT | MSG_REPLAY), + "rq_xid "LPU64" matches last_xid, " + "expected RESENT flag\n", + req->rq_xid); + /* else: note the opposite is not always true; a + * RESENT req after a failover will usually not match + * the last_xid, since it was likely never + * committed. A REPLAYed request will almost never + * match the last xid, however it could for a + * committed, but still retained, open. */ + + /* Check for aborted recovery. */ + spin_lock_bh(&obd->obd_processing_task_lock); + abort_recovery = obd->obd_abort_recovery; + recovering = obd->obd_recovering; + spin_unlock_bh(&obd->obd_processing_task_lock); + if (abort_recovery) { + target_abort_recovery(obd); + } else if (recovering) { + rc = mds_filter_recovery_request(req, obd, + &should_process); + if (rc || !should_process) + RETURN(rc); + } + } + + h = mdt_handler_find(req->rq_reqmsg->opc); + if (h != NULL) { + rc = mdt_handle_req(&info, h, req, 0); + } else { + req->rq_status = -ENOTSUPP; + rc = ptlrpc_error(req); + RETURN(rc); + } + + LASSERT(current->journal_info == NULL); + + /* If we're DISCONNECTing, the mds_export_data is already freed */ + if (!rc && req->rq_reqmsg->opc != MDS_DISCONNECT) { + struct mds_export_data *med = &req->rq_export->exp_mds_data; + req->rq_repmsg->last_xid = + le64_to_cpu(med->med_mcd->mcd_last_xid); + + target_committed_to_req(req); + } + + EXIT; + out: + + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) { + if (obd && obd->obd_recovering) { + DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply"); + return target_queue_final_reply(req, rc); + } + /* Lost a race with recovery; let the error path DTRT. */ + rc = req->rq_status = -ENOTCONN; + } + + target_send_reply(req, rc, info.mti_fail_id); + return 0; +} + +static int mdt_intent_policy(struct ldlm_namespace *ns, + struct ldlm_lock **lockp, void *req_cookie, + ldlm_mode_t mode, int flags, void *data) +{ + RETURN(ELDLM_LOCK_ABORTED); +} + +struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c, + svc_handler_t h, char *name, + struct proc_dir_entry *proc_entry, + svcreq_printfn_t prntfn) +{ + return ptlrpc_init_svc(c->psc_nbufs, c->psc_bufsize, + c->psc_max_req_size, c->psc_max_reply_size, + c->psc_req_portal, c->psc_rep_portal, + c->psc_watchdog_timeout, + h, char name, proc_entry, + prntfn, c->psc_num_threads); +} + +int md_device_init(struct md_device *md) +{ + return lu_device_init(&md->md_lu_dev); +} + +void md_device_fini(struct md_device *md) +{ + lu_device_fini(&md->md_lu_dev); +} + +static struct lu_device_operations mdt_lu_ops; + +static int mdt_device_init(struct mdt_device *m) +{ + md_device_init(&m->mdt_md_dev); + + m->mdt_md_dev.md_lu_dev.ld_ops = &mdt_lu_ops; + + m->mdt_service_conf.psc_nbufs = MDS_NBUFS; + m->mdt_service_conf.psc_bufsize = MDS_BUFSIZE; + m->mdt_service_conf.psc_max_req_size = MDS_MAXREQSIZE; + m->mdt_service_conf.psc_max_reply_size = MDS_MAXREPSIZE; + m->mdt_service_conf.psc_req_portal = MDS_REQUEST_PORTAL; + m->mdt_service_conf.psc_rep_portal = MDC_REPLY_PORTAL; + m->mdt_service_conf.psc_watchdog_timeout = MDS_SERVICE_WATCHDOG_TIMEOUT; + /* + * We'd like to have a mechanism to set this on a per-device basis, + * but alas... + */ + if (mds_num_threads < 2) + mds_num_threads = MDS_DEF_THREADS; + m->mdt_service_conf.psc_num_threads = min(mds_num_threads, + MDS_MAX_THREADS); + return 0; +} + +static void mdt_device_fini(struct mdt_device *m) +{ + md_device_fini(&m->mdt_md_dev); +} + +static int lu_device_is_mdt(struct lu_device *d) +{ + /* + * XXX for now. Tags in lu_device_type->ldt_something are needed. + */ + return ergo(d->ld_ops != NULL, d->ld_ops == &mdt_lu_ops); +} + +static struct mdt_device *mdt_dev(struct lu_device *d) +{ + LASSERT(lu_device_is_mdt(d)); + return container_of(d, struct mdt_device, mdt_lu_dev); +} + +static struct mdt_object *mdt_obj(struct lu_object *o) +{ + LASSERT(lu_device_is_mdt(o->lo_dev)); + return container_of(o, struct mdt_object, mot_obj.mo_lu); +} + +static void mdt_fini(struct lu_device *d) +{ + struct mdt_device *m = mdt_dev(d); + + if (d->ld_site != NULL) { + lu_site_fini(d->ld_site); + d->ld_site = NULL; + } + if (m->mdt_service != NULL) { + ptlrpc_unregister_service(m->mdt_service); + m->mdt_service = NULL; + } + if (m->mdt_namespace != NULL) { + ldlm_namespace_free(m->mdt_namespace, 0); + m->mdt_namespace = NULL; + } + + LASSERT(atomic_read(&d->ld_ref) == 0); +} + +static int mdt_init0(struct lu_device *d) +{ + struct mdt_device *m = mdt_dev(d); + struct lu_site *s; + char ns_name[48]; + + ENTRY; + + OBD_ALLOC_PTR(s); + if (s == NULL) + return -ENOMEM; + + mdt_device_init(m); + lu_site_init(s, m); + + snprintf(ns_name, sizeof ns_name, LUSTRE_MDT0_NAME"-%p", m); + m->mdt_namespace = ldlm_namespace_new(ns_name, LDLM_NAMESPACE_SERVER); + if (m->mdt_namespace == NULL) + return -ENOMEM; + ldlm_register_intent(m->mst_namespace, mdt_intent_policy); + + ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, + "mdt_ldlm_client", &m->mdt_ldlm_client); + + m->mdt_service = ptlrpc_init_svc_conf(&mdt->mdt_service_conf, + mdt_handle, LUSTRE_MDT0_NAME, + mdt->mdt_lu_dev.ld_proc_entry + NULL); + if (m->mdt_service == NULL) + return -ENOMEM; + + return ptlrpc_start_threads(NULL, m->mdt_service, LUSTRE_MDT0_NAME); +} + +static int mdt_init(struct lu_device *d) +{ + int result; + + result = mdt_init0(d); + if (result != 0) + mdt_fini(d); + return result; +} + +struct lu_object *mdt_object_alloc(struct lu_device *d) +{ + struct mdt_object *mo; + + OBD_ALLOC_PTR(mo); + if (mo != NULL) { + struct lu_object *o; + struct lu_object_header *h; + + o = &mo->mot_obj.mo_lu; + h = &mo->mot_header; + lu_object_header_init(h); + lu_object_init(o, h, d); + /* ->lo_depth and ->lo_flags are automatically 0 */ + lu_object_add_top(h, o); + } else + return NULL; +} + +int mdt_object_init(struct lu_object *o) +{ + struct mdt_device *d = mdt_dev(o->lo_dev); + struct lu_device *under; + struct lu_object *below; + + under = &d->mdt_mdd->md_lu_dev; + below = under->ld_ops->ldo_alloc(under); + if (below != NULL) { + lu_object_add(o, below); + return 0; + } else + return -ENOMEM; +} + +void mdt_object_free(struct lu_object *o) +{ + struct lu_object_header; + + h = o->lo_header; + lu_object_fini(o); + lu_object_header_fini(h); +} + +void mdt_object_release(struct lu_object *o) +{ +} + +int mdt_object_print(struct seq_file *f, const struct lu_object *o) +{ + return seq_printf(f, LUSTRE_MDT0_NAME"-object@%p", o); +} + +static struct lu_device_operations mdt_lu_ops = { + .ldo_init = mdt_init, + .ldo_fini = mdt_fini, + .ldo_object_alloc = mdt_object_alloc, + .ldo_object_init = mdt_object_init, + .ldo_object_free = mdt_object_free, + .ldo_object_release = mdt_object_release, + .ldo_object_print = mdt_object_print +} + +int mdt_mkdir(struct mdt_device *d, struct lfid *pfid, const char *name) +{ + struct mdt_object *o; + struct lock_handle lh; + int result; + + o = mdt_object_find(d, pfid); + if (IS_ERR(o)) + return PTR_ERR(o); + result = fid_lock(pfid, LCK_PW, &lh); + if (result == 0) { + result = d->mdt_dev.md_ops->mdo_mkdir(o, name); + fid_unlock(&lh); + } + mdt_object_put(o); + return result; +} + +static struct obd_ops mdt_ops = { + .o_owner = THIS_MODULE, + .o_connect = mds_connect, + .o_reconnect = mds_reconnect, + .o_init_export = mds_init_export, + .o_destroy_export = mds_destroy_export, + .o_disconnect = mds_disconnect, + .o_setup = mds_setup, + .o_precleanup = mds_precleanup, + .o_cleanup = mds_cleanup, + .o_postrecov = mds_postrecov, + .o_statfs = mds_obd_statfs, + .o_iocontrol = mds_iocontrol, + .o_create = mds_obd_create, + .o_destroy = mds_obd_destroy, + .o_llog_init = mds_llog_init, + .o_llog_finish = mds_llog_finish, + .o_notify = mds_notify, + .o_health_check = mds_health_check, +}; + +static int __init mdt_mod_init(void) +{ + return 0; +} + +static void __exit mdt_mod_exit(void) +{ +} + +MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_DESCRIPTION("Lustre Meta-data Target Prototype ("LUSTRE_MDT0_NAME")"); +MODULE_LICENSE("GPL"); + +CFS_MODULE_PARM(mdt_num_threads, "i", int, 0444, + "number of mdt service threads to start"); + +cfs_module(mdt, "0.0.2", mdt_mod_init, mdt_mod_exit); + +#endif /* 0 */ diff --git a/lustre/obdclass/Makefile.in b/lustre/obdclass/Makefile.in index 6b923394..aef52de 100644 --- a/lustre/obdclass/Makefile.in +++ b/lustre/obdclass/Makefile.in @@ -1,7 +1,7 @@ MODULES := obdclass llog_test obdclass-objs := llog.o llog_cat.o llog_lvfs.o llog_obd.o llog_swab.o -obdclass-objs += class_obd.o +obdclass-objs += class_obd.o lu_object.o obdclass-objs += debug.o genops.o sysctl.o uuid.o llog_ioctl.o obdclass-objs += lprocfs_status.o lustre_handles.o lustre_peer.o obdclass-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o prng.o diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index f18f032..4f9819a 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -88,7 +88,7 @@ int obd_memmax; int proc_version; -/* The following are visible and mutable through /proc/sys/lustre/. */ +/* The following are visible and mutable through /proc/fs/lustre/. */ unsigned int obd_fail_loc; unsigned int obd_dump_on_timeout; unsigned int obd_timeout = 100; /* seconds */ @@ -794,6 +794,7 @@ static void cleanup_obdclass(void) EXIT; } + /* Check that we're building against the appropriate version of the Lustre * kernel patch */ #include diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c new file mode 100644 index 0000000..05fed47 --- /dev/null +++ b/lustre/obdclass/lu_object.c @@ -0,0 +1,359 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Object. + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + * + * These are the only exported functions, they provide some generic + * infrastructure for managing object devices + */ + +#define DEBUG_SUBSYSTEM S_CLASS +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#include +#include +#include +#include + +#include + +static void lu_object_free(struct lu_object *o); + +void lu_object_put(struct lu_object *o) +{ + struct lu_object_header *top; + struct lu_site *site; + + top = o->lo_header; + site = o->lo_dev->ld_site; + spin_lock(&site->ls_guard); + if (-- top->loh_ref == 0) { + list_for_each_entry(o, &top->loh_layers, lo_linkage) { + if (lu_object_ops(o)->ldo_object_release != NULL) + lu_object_ops(o)->ldo_object_release(o); + } + -- site->ls_busy; + if (lu_object_is_dying(top)) { + hlist_del_init(&top->loh_hash); + list_del_init(&top->loh_lru); + } + } + spin_unlock(&site->ls_guard); + if (lu_object_is_dying(top)) + /* + * Object was already removed from hash and lru above, can + * kill it. + */ + lu_object_free(o); +} +EXPORT_SYMBOL(lu_object_put); + +struct lu_object *lu_object_alloc(struct lu_site *s, const struct lfid *f) +{ + struct lu_object *scan; + struct lu_object *top; + int clean; + int result; + + top = s->ls_top_dev->ld_ops->ldo_object_alloc(s->ls_top_dev); + if (IS_ERR(top)) + return top; + *lu_object_fid(top) = *f; + do { + clean = 1; + list_for_each_entry(scan, + &top->lo_header->loh_layers, lo_linkage) { + if (scan->lo_flags & LU_OBJECT_ALLOCATED) + continue; + clean = 0; + result = lu_object_ops(scan)->ldo_object_init(scan); + if (result != 0) { + lu_object_free(top); + return ERR_PTR(result); + } + scan->lo_flags |= LU_OBJECT_ALLOCATED; + } + } while (!clean); + s->ls_stats.s_created ++; + return top; +} + +static void lu_object_free(struct lu_object *o) +{ + struct list_head splice; + + -- o->lo_dev->ld_site->ls_total; + INIT_LIST_HEAD(&splice); + list_splice_init(&o->lo_header->loh_layers, &splice); + while (!list_empty(&splice)) { + o = container_of(splice.next, struct lu_object, lo_linkage); + list_del_init(&o->lo_linkage); + LASSERT(lu_object_ops(o)->ldo_object_free != NULL); + lu_object_ops(o)->ldo_object_free(o); + } +} + +void lu_site_purge(struct lu_site *s, int nr) +{ + struct list_head dispose; + struct lu_object_header *h; + struct lu_object_header *temp; + + INIT_LIST_HEAD(&dispose); + spin_lock(&s->ls_guard); + list_for_each_entry_safe(h, temp, &s->ls_lru, loh_lru) { + if (nr-- == 0) + break; + if (h->loh_ref > 0) + continue; + hlist_del_init(&h->loh_hash); + list_move(&h->loh_lru, &dispose); + } + spin_unlock(&s->ls_guard); + while (!list_empty(&dispose)) { + h = container_of(dispose.next, + struct lu_object_header, loh_lru); + list_del_init(&h->loh_lru); + lu_object_free(lu_object_top(h)); + s->ls_stats.s_lru_purged ++; + } +} +EXPORT_SYMBOL(lu_site_purge); + +int lu_object_print(struct seq_file *f, const struct lu_object *o) +{ + static char ruler[] = "........................................"; + const struct lu_object *scan; + int nob; + int depth; + + nob = 0; + scan = o; + list_for_each_entry_continue(scan, &o->lo_linkage, lo_linkage) { + depth = scan->lo_depth; + if (depth <= o->lo_depth && scan != o) + break; + LASSERT(lu_object_ops(scan)->ldo_object_print != NULL); + nob += seq_printf(f, "%*.*s", depth, depth, ruler); + nob += lu_object_ops(scan)->ldo_object_print(f, scan); + nob += seq_printf(f, "\n"); + } + return nob; +} +EXPORT_SYMBOL(lu_object_print); + +static struct lu_object *htable_lookup(struct lu_site *s, + const struct hlist_head *bucket, + const struct lfid *f) +{ + struct lu_object_header *h; + struct hlist_node *scan; + + hlist_for_each_entry(h, scan, bucket, loh_hash) { + s->ls_stats.s_cache_check ++; + if (lfid_eq(&h->loh_fid, f) && !lu_object_is_dying(h)) { + /* bump reference count... */ + if (h->loh_ref ++ == 0) + ++ s->ls_busy; + /* and move to the head of the LRU */ + list_move_tail(&h->loh_lru, &s->ls_lru); + s->ls_stats.s_cache_hit ++; + return lu_object_top(h); + } + } + s->ls_stats.s_cache_miss ++; + return NULL; +} + +static __u32 fid_hash(const struct lfid *f) +{ + return f->f_seq + f->f_id + f->f_version; +} + +struct lu_object *lu_object_find(struct lu_site *s, const struct lfid *f) +{ + struct lu_object *o; + struct lu_object *shadow; + struct hlist_head *bucket; + + bucket = s->ls_hash + (fid_hash(f) & s->ls_hash_mask); + spin_lock(&s->ls_guard); + o = htable_lookup(s, bucket, f); + spin_unlock(&s->ls_guard); + if (o != NULL) + return o; + + o = lu_object_alloc(s, f); + if (IS_ERR(o)) + return o; + + ++ s->ls_total; + LASSERT(lfid_eq(lu_object_fid(o), f)); + + spin_lock(&s->ls_guard); + shadow = htable_lookup(s, bucket, f); + if (shadow == NULL) { + hlist_add_head(&o->lo_header->loh_hash, bucket); + list_add_tail(&s->ls_lru, &o->lo_header->loh_lru); + shadow = o; + o = NULL; + } else + s->ls_stats.s_cache_race ++; + spin_unlock(&s->ls_guard); + if (o != NULL) + lu_object_free(o); + return shadow; +} +EXPORT_SYMBOL(lu_object_find); + +enum { + LU_SITE_HTABLE_BITS = 8, + LU_SITE_HTABLE_SIZE = (1 << LU_SITE_HTABLE_BITS), + LU_SITE_HTABLE_MASK = LU_SITE_HTABLE_SIZE - 1 +}; + +int lu_site_init(struct lu_site *s, struct lu_device *top) +{ + memset(s, 0, sizeof *s); + + spin_lock_init(&s->ls_guard); + CFS_INIT_LIST_HEAD(&s->ls_lru); + s->ls_top_dev = top; + top->ld_site = s; + lu_device_get(top); + /* + * XXX nikita: fixed size hash-table. + */ + s->ls_hash_mask = LU_SITE_HTABLE_MASK; + OBD_ALLOC(s->ls_hash, LU_SITE_HTABLE_SIZE * sizeof s->ls_hash[0]); + if (s->ls_hash != NULL) { + int i; + for (i = 0; i < LU_SITE_HTABLE_SIZE; i++) + INIT_HLIST_HEAD(&s->ls_hash[i]); + return 0; + } else + return -ENOMEM; +} +EXPORT_SYMBOL(lu_site_init); + +void lu_site_fini(struct lu_site *s) +{ + LASSERT(list_empty(&s->ls_lru)); + LASSERT(s->ls_total == 0); + LASSERT(s->ls_busy == 0); + + if (s->ls_hash != NULL) { + int i; + for (i = 0; i < LU_SITE_HTABLE_SIZE; i++) + LASSERT(hlist_empty(&s->ls_hash[i])); + OBD_FREE(s->ls_hash, + LU_SITE_HTABLE_SIZE * sizeof s->ls_hash[0]); + s->ls_hash = NULL; + } + if (s->ls_top_dev != NULL) { + lu_device_put(s->ls_top_dev); + s->ls_top_dev = NULL; + } + } +EXPORT_SYMBOL(lu_site_fini); + +void lu_device_get(struct lu_device *d) +{ + atomic_inc(&d->ld_ref); +} +EXPORT_SYMBOL(lu_device_get); + +void lu_device_put(struct lu_device *d) +{ + atomic_dec(&d->ld_ref); +} +EXPORT_SYMBOL(lu_device_put); + +int lu_device_init(struct lu_device *d) +{ + memset(d, 0, sizeof *d); + atomic_set(&d->ld_ref, 0); + return 0; +} +EXPORT_SYMBOL(lu_device_init); + +void lu_device_fini(struct lu_device *d) +{ + LASSERT(atomic_read(&d->ld_ref) == 0); +} +EXPORT_SYMBOL(lu_device_fini); + +int lu_object_init(struct lu_object *o, + struct lu_object_header *h, struct lu_device *d) +{ + memset(o, 0, sizeof *o); + o->lo_header = h; + o->lo_dev = d; + lu_device_get(d); + CFS_INIT_LIST_HEAD(&o->lo_linkage); + return 0; +} +EXPORT_SYMBOL(lu_object_init); + +void lu_object_fini(struct lu_object *o) +{ + LASSERT(list_empty(&o->lo_linkage)); + + if (o->lo_dev != NULL) { + lu_device_get(o->lo_dev); + o->lo_dev = NULL; + } +} +EXPORT_SYMBOL(lu_object_fini); + +void lu_object_add_top(struct lu_object_header *h, struct lu_object *o) +{ + list_move(&o->lo_linkage, &h->loh_layers); +} +EXPORT_SYMBOL(lu_object_add_top); + +void lu_object_add(struct lu_object *before, struct lu_object *o) +{ + list_move(&o->lo_linkage, &before->lo_linkage); +} +EXPORT_SYMBOL(lu_object_add); + +int lu_object_header_init(struct lu_object_header *h) +{ + memset(h, 0, sizeof *h); + INIT_HLIST_NODE(&h->loh_hash); + CFS_INIT_LIST_HEAD(&h->loh_lru); + CFS_INIT_LIST_HEAD(&h->loh_layers); + return 0; +} +EXPORT_SYMBOL(lu_object_header_init); + +void lu_object_header_fini(struct lu_object_header *h) +{ + LASSERT(list_empty(&h->loh_layers)); + LASSERT(list_empty(&h->loh_lru)); + LASSERT(hlist_unhashed(&h->loh_hash)); +} +EXPORT_SYMBOL(lu_object_header_fini); diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 896e928..8d30479 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -55,7 +55,8 @@ int lustre_msg_check_version(struct lustre_msg *msg, __u32 version) } static void -lustre_init_msg (struct lustre_msg *msg, int count, int *lens, char **bufs) +lustre_init_msg (struct lustre_msg *msg, + int count, const int *lens, char **bufs) { char *ptr; int i; @@ -77,7 +78,7 @@ lustre_init_msg (struct lustre_msg *msg, int count, int *lens, char **bufs) } int lustre_pack_request (struct ptlrpc_request *req, - int count, int *lens, char **bufs) + int count, const int *lens, char **bufs) { int reqlen; ENTRY; @@ -168,7 +169,7 @@ out: int lustre_pack_reply (struct ptlrpc_request *req, - int count, int *lens, char **bufs) + int count, const int *lens, char **bufs) { struct ptlrpc_reply_state *rs; int msg_len; @@ -285,7 +286,7 @@ void lustre_free_reply_state (struct ptlrpc_reply_state *rs) /* This returns the size of the buffer that is required to hold a lustre_msg * with the given sub-buffer lengths. */ -int lustre_msg_size(int count, int *lengths) +int lustre_msg_size(int count, const int *lengths) { int size; int i;